Add String::lossy constructors to C++ API
This constructor maps to `String::from_utf8_lossy` or
`String::from_utf16_lossy`. It's useful in situations where producing a
slightly garbled string on invalid Unicode data is preferable to
crashing the process, e.g. when passing error messages from C++ to Rust.
diff --git a/book/src/binding/string.md b/book/src/binding/string.md
index d564e00..ad5e9ca 100644
--- a/book/src/binding/string.md
+++ b/book/src/binding/string.md
@@ -18,15 +18,24 @@
String(String &&) noexcept;
~String() noexcept;
- // Throws std::invalid_argument if not utf-8.
+ // Throws std::invalid_argument if not UTF-8.
String(const std::string &);
String(const char *);
String(const char *, size_t);
- // Throws std::invalid_argument if not utf-16.
+ // Replaces invalid UTF-8 data with the replacement character (U+FFFD).
+ static String lossy(const std::string &) noexcept;
+ static String lossy(const char *) noexcept;
+ static String lossy(const char *, std::size_t) noexcept;
+
+ // Throws std::invalid_argument if not UTF-16.
String(const char16_t *);
String(const char16_t *, size_t);
+ // Replaces invalid UTF-16 data with the replacement character (U+FFFD).
+ static String lossy(const char16_t *) noexcept;
+ static String lossy(const char16_t *, std::size_t) noexcept;
+
String &operator=(const String &) noexcept;
String &operator=(String &&) noexcept;
diff --git a/include/cxx.h b/include/cxx.h
index 3683951..aff2f5d 100644
--- a/include/cxx.h
+++ b/include/cxx.h
@@ -24,6 +24,7 @@
inline namespace cxxbridge1 {
struct unsafe_bitcopy_t;
+struct lossy_t;
namespace {
template <typename T>
@@ -46,6 +47,13 @@
String(const char16_t *);
String(const char16_t *, std::size_t);
+ // Replace invalid Unicode data with the replacement character (U+FFFD).
+ static String lossy(const std::string &) noexcept;
+ static String lossy(const char *) noexcept;
+ static String lossy(const char *, std::size_t) noexcept;
+ static String lossy(const char16_t *) noexcept;
+ static String lossy(const char16_t *, std::size_t) noexcept;
+
String &operator=(const String &) &noexcept;
String &operator=(String &&) &noexcept;
@@ -85,6 +93,8 @@
String(unsafe_bitcopy_t, const String &) noexcept;
private:
+ String(lossy_t, const char *, std::size_t) noexcept;
+ String(lossy_t, const char16_t *, std::size_t) noexcept;
friend void swap(String &lhs, String &rhs) noexcept { lhs.swap(rhs); }
// Size and alignment statically verified by rust_string.rs.
diff --git a/src/cxx.cc b/src/cxx.cc
index 4bdbd58..983e38f 100644
--- a/src/cxx.cc
+++ b/src/cxx.cc
@@ -40,8 +40,12 @@
const rust::String &other) noexcept;
bool cxxbridge1$string$from_utf8(rust::String *self, const char *ptr,
std::size_t len) noexcept;
+void cxxbridge1$string$from_utf8_lossy(rust::String *self, const char *ptr,
+ std::size_t len) noexcept;
bool cxxbridge1$string$from_utf16(rust::String *self, const char16_t *ptr,
std::size_t len) noexcept;
+void cxxbridge1$string$from_utf16_lossy(rust::String *self, const char16_t *ptr,
+ std::size_t len) noexcept;
void cxxbridge1$string$drop(rust::String *self) noexcept;
const char *cxxbridge1$string$ptr(const rust::String *self) noexcept;
std::size_t cxxbridge1$string$len(const rust::String *self) noexcept;
@@ -69,6 +73,8 @@
namespace rust {
inline namespace cxxbridge1 {
+struct lossy_t {};
+
template <typename Exception>
void panic [[noreturn]] (const char *msg) {
#if defined(RUST_CXX_NO_EXCEPTIONS)
@@ -132,6 +138,43 @@
len);
}
+String::String(lossy_t, const char *s, std::size_t len) noexcept {
+ cxxbridge1$string$from_utf8_lossy(
+ this, s == nullptr && len == 0 ? reinterpret_cast<const char *>(1) : s,
+ len);
+}
+
+String::String(lossy_t, const char16_t *s, std::size_t len) noexcept {
+ cxxbridge1$string$from_utf16_lossy(
+ this,
+ s == nullptr && len == 0 ? reinterpret_cast<const char16_t *>(2) : s,
+ len);
+}
+
+String String::lossy(const std::string &s) noexcept {
+ return String::lossy(s.data(), s.length());
+}
+
+String String::lossy(const char *s) noexcept {
+ assert(s != nullptr);
+ return String::lossy(s, std::strlen(s));
+}
+
+String String::lossy(const char *s, std::size_t len) noexcept {
+ assert(s != nullptr || len == 0);
+ return String(lossy_t{}, s, len);
+}
+
+String String::lossy(const char16_t *s) noexcept {
+ assert(s != nullptr);
+ return String::lossy(s, std::char_traits<char16_t>::length(s));
+}
+
+String String::lossy(const char16_t *s, std::size_t len) noexcept {
+ assert(s != nullptr || len == 0);
+ return String(lossy_t{}, s, len);
+}
+
String &String::operator=(const String &other) &noexcept {
if (this != &other) {
cxxbridge1$string$drop(this);
diff --git a/src/symbols/rust_string.rs b/src/symbols/rust_string.rs
index 115ffe3..8b7c8c4 100644
--- a/src/symbols/rust_string.rs
+++ b/src/symbols/rust_string.rs
@@ -39,6 +39,18 @@
}
}
+#[export_name = "cxxbridge1$string$from_utf8_lossy"]
+unsafe extern "C" fn string_from_utf8_lossy(
+ this: &mut MaybeUninit<String>,
+ ptr: *const u8,
+ len: usize,
+) {
+ let slice = unsafe { slice::from_raw_parts(ptr, len) };
+ let owned = String::from_utf8_lossy(slice).into_owned();
+ let this = this.as_mut_ptr();
+ unsafe { ptr::write(this, owned) }
+}
+
#[export_name = "cxxbridge1$string$from_utf16"]
unsafe extern "C" fn string_from_utf16(
this: &mut MaybeUninit<String>,
@@ -56,6 +68,18 @@
}
}
+#[export_name = "cxxbridge1$string$from_utf16_lossy"]
+unsafe extern "C" fn string_from_utf16_lossy(
+ this: &mut MaybeUninit<String>,
+ ptr: *const u16,
+ len: usize,
+) {
+ let slice = unsafe { slice::from_raw_parts(ptr, len) };
+ let owned = String::from_utf16_lossy(slice);
+ let this = this.as_mut_ptr();
+ unsafe { ptr::write(this, owned) }
+}
+
#[export_name = "cxxbridge1$string$drop"]
unsafe extern "C" fn string_drop(this: &mut ManuallyDrop<String>) {
unsafe { ManuallyDrop::drop(this) }
diff --git a/tests/ffi/lib.rs b/tests/ffi/lib.rs
index 8981178..2af771c 100644
--- a/tests/ffi/lib.rs
+++ b/tests/ffi/lib.rs
@@ -103,6 +103,7 @@
fn c_return_slice_char(shared: &Shared) -> &[c_char];
fn c_return_mutsliceu8(slice: &mut [u8]) -> &mut [u8];
fn c_return_rust_string() -> String;
+ fn c_return_rust_string_lossy() -> String;
fn c_return_unique_ptr_string() -> UniquePtr<CxxString>;
fn c_return_unique_ptr_vector_u8() -> UniquePtr<CxxVector<u8>>;
fn c_return_unique_ptr_vector_f64() -> UniquePtr<CxxVector<f64>>;
diff --git a/tests/ffi/tests.cc b/tests/ffi/tests.cc
index f85d2ec..99c5c62 100644
--- a/tests/ffi/tests.cc
+++ b/tests/ffi/tests.cc
@@ -110,6 +110,10 @@
rust::String c_return_rust_string() { return "2020"; }
+rust::String c_return_rust_string_lossy() {
+ return rust::String::lossy("Hello \xf0\x90\x80World");
+}
+
std::unique_ptr<std::string> c_return_unique_ptr_string() {
return std::unique_ptr<std::string>(new std::string("2020"));
}
@@ -860,6 +864,12 @@
rust::String utf16_rstring = utf16_literal;
ASSERT(utf8_rstring == utf16_rstring);
+ const char *bad_utf8_literal = "test\x80";
+ const char16_t *bad_utf16_literal = u"test\xDD1E";
+ rust::String bad_utf8_rstring = rust::String::lossy(bad_utf8_literal);
+ rust::String bad_utf16_rstring = rust::String::lossy(bad_utf16_literal);
+ ASSERT(bad_utf8_rstring == bad_utf16_rstring);
+
rust::Vec<int> vec1{1, 2};
rust::Vec<int> vec2{3, 4};
swap(vec1, vec2);
diff --git a/tests/ffi/tests.h b/tests/ffi/tests.h
index 52d853a..ea5c6c3 100644
--- a/tests/ffi/tests.h
+++ b/tests/ffi/tests.h
@@ -100,6 +100,7 @@
rust::Slice<const char> c_return_slice_char(const Shared &shared);
rust::Slice<uint8_t> c_return_mutsliceu8(rust::Slice<uint8_t> slice);
rust::String c_return_rust_string();
+rust::String c_return_rust_string_lossy();
std::unique_ptr<std::string> c_return_unique_ptr_string();
std::unique_ptr<std::vector<uint8_t>> c_return_unique_ptr_vector_u8();
std::unique_ptr<std::vector<double>> c_return_unique_ptr_vector_f64();
diff --git a/tests/test.rs b/tests/test.rs
index d536a35..735f7e5 100644
--- a/tests/test.rs
+++ b/tests/test.rs
@@ -51,6 +51,7 @@
cast::c_char_to_unsigned(ffi::c_return_slice_char(&shared)),
);
assert_eq!("2020", ffi::c_return_rust_string());
+ assert_eq!("Hello �World", ffi::c_return_rust_string_lossy());
assert_eq!("2020", ffi::c_return_unique_ptr_string().to_str().unwrap());
assert_eq!(4, ffi::c_return_unique_ptr_vector_u8().len());
assert_eq!(