diff --git a/src/basic.rs b/src/basic.rs index 2c1d042e..efc6e82a 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -39,6 +39,41 @@ pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { Ok(from_utf8_unchecked(input)) } } +/// for validating owned bytes sequences of UTF-8 +pub mod string { + pub use super::*; + + + /// Simple UTF-8 error containing the invalid utf8 bytes. + /// + /// No information is provided where the error occurred or how long the invalid byte + /// byte sequence is. + #[allow(missing_docs)] + #[derive(Debug, PartialEq, Eq)] + pub struct FromUtf8Error { + pub bytes: Vec, + error: Utf8Error, + } + + /// Analogue to [`String::from_utf8()`]. + /// + /// Checks if the passed byte sequence is valid UTF-8 and returns a + /// [`String`] with taking ownership of the the passed byte slice wrapped in `Ok()` if it is. + /// + /// # Errors + /// Will return an Err([`FromUtf8Error`]) + /// containing the original bytes passed in along with the zero-sized [`Utf8Error`] + /// if the input contains invalid UTF-8. + #[inline] + pub fn from_utf8(input: Vec) -> Result { + unsafe { + match validate_utf8_basic(&input) { + Ok(()) => Ok(String::from_utf8_unchecked(input)), + Err(Utf8Error) => Err(FromUtf8Error {bytes:input, error: Utf8Error}) + } + } + } +} /// Analogue to [`std::str::from_utf8_mut()`]. /// diff --git a/src/compat.rs b/src/compat.rs index 2e955b9f..538fa88e 100644 --- a/src/compat.rs +++ b/src/compat.rs @@ -83,6 +83,43 @@ pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { } } +/// for validating owned bytes sequences of UTF-8 +pub mod string { + pub use super::*; + + + /// Simple UTF-8 error containing the invalid utf8 bytes. + /// + /// Contains information on the location of the encountered validation error and the length of the + /// invalid UTF-8 sequence. + #[allow(missing_docs)] + #[derive(Debug, PartialEq, Eq)] + pub struct FromUtf8Error { + pub bytes: Vec, + error: Utf8Error, + } + + /// Analogue to [`String::from_utf8()`]. + /// + /// Checks if the passed byte sequence is valid UTF-8 and returns a + /// [`String`] with taking ownership of the the passed byte slice wrapped in `Ok()` if it is. + /// + /// # Errors + /// Will return an Err([`FromUtf8Error`]) + /// containing the original bytes passed in along with a [`Utf8Error`] + /// on if the input contains invalid UTF-8 with detailed error information. + /// if the input contains invalid UTF-8. + #[inline] + pub fn from_utf8(input: Vec) -> Result { + unsafe { + match validate_utf8_compat(&input) { + Ok(()) => Ok(String::from_utf8_unchecked(input)), + Err(err) => Err(FromUtf8Error {bytes:input, error: err}) + } + } + } +} + /// Analogue to [`std::str::from_utf8_mut()`]. /// /// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable