Skip to content

Commit 7e9d425

Browse files
committed
glib: Add GStr::from_ptr_lossy() and GString::from_ptr_lossy()
These validate that the string is actually UTF-8 and otherwise create a copy with replacement characters.
1 parent 279b352 commit 7e9d425

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed

glib/src/gstring.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,27 @@ impl GStr {
5959
Self::from_bytes_with_nul_unchecked(cstr.to_bytes_with_nul())
6060
}
6161
// rustdoc-stripper-ignore-next
62+
/// Wraps a raw C string with a safe GLib string wrapper. The provided C string **must** be
63+
/// nul-terminated. All constraints from [`std::ffi::CStr::from_ptr`] also apply here.
64+
///
65+
/// If the string is valid UTF-8 then it is directly returned otherwise a copy is created with
66+
/// every invalid character replaced by the Unicode replacement character (U+FFFD).
67+
#[inline]
68+
pub unsafe fn from_ptr_lossy<'a>(ptr: *const c_char) -> Cow<'a, Self> {
69+
let mut end_ptr = ptr::null();
70+
if ffi::g_utf8_validate(ptr as *const _, -1, &mut end_ptr) != ffi::GFALSE {
71+
Cow::Borrowed(Self::from_bytes_with_nul_unchecked(slice::from_raw_parts(
72+
ptr as *const u8,
73+
end_ptr.offset_from(ptr) as usize + 1,
74+
)))
75+
} else {
76+
Cow::Owned(GString::from_glib_full(ffi::g_utf8_make_valid(
77+
ptr as *const _,
78+
-1,
79+
)))
80+
}
81+
}
82+
// rustdoc-stripper-ignore-next
6283
/// Converts this GLib string to a byte slice containing the trailing 0 byte.
6384
///
6485
/// This function is the equivalent of [`GStr::to_bytes`] except that it will retain the
@@ -408,6 +429,17 @@ impl GString {
408429
Inner::Foreign { ptr, .. } => ptr.as_ptr(),
409430
}
410431
}
432+
433+
// rustdoc-stripper-ignore-next
434+
/// Wraps a raw C string with a safe GLib string wrapper. The provided C string **must** be
435+
/// nul-terminated. All constraints from [`std::ffi::CStr::from_ptr`] also apply here.
436+
///
437+
/// If the string is valid UTF-8 then it is directly returned otherwise a copy is created with
438+
/// every invalid character replaced by the Unicode replacement character (U+FFFD).
439+
#[inline]
440+
pub unsafe fn from_ptr_lossy<'a>(ptr: *const c_char) -> Cow<'a, GStr> {
441+
GStr::from_ptr_lossy(ptr)
442+
}
411443
}
412444

413445
impl IntoGlibPtr<*mut c_char> for GString {
@@ -1236,4 +1268,25 @@ mod tests {
12361268
let gstring: GString = "foo".into();
12371269
assert!(h.contains_key(&gstring));
12381270
}
1271+
1272+
#[test]
1273+
fn test_gstring_from_ptr_lossy() {
1274+
let data = CString::new("foo").unwrap();
1275+
let ptr = data.as_ptr();
1276+
1277+
unsafe {
1278+
let gstring = GString::from_ptr_lossy(ptr);
1279+
assert_eq!(gstring.as_str(), "foo");
1280+
assert_eq!(ptr, gstring.as_ptr());
1281+
}
1282+
1283+
let data = b"foo\xF0\x90\x80bar\0";
1284+
let ptr = data.as_ptr();
1285+
1286+
unsafe {
1287+
let gstring = GString::from_ptr_lossy(ptr as *const _);
1288+
assert_eq!(gstring.as_str(), "foo���bar");
1289+
assert_ne!(ptr, gstring.as_ptr() as *const _);
1290+
}
1291+
}
12391292
}

0 commit comments

Comments
 (0)