From 84df4c0cfc724a99fb28e8b6a4a44606a33e8817 Mon Sep 17 00:00:00 2001 From: NightMare-Vortex <93774088+NightMare-Vortex@users.noreply.github.com> Date: Sat, 16 Sep 2023 14:55:50 +0300 Subject: [PATCH 1/7] Added String support --- src/basic.rs | 34 ++++++++++++++++++++++++++++++++++ src/compat.rs | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/src/basic.rs b/src/basic.rs index 2c1d042e..b5199c65 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -39,6 +39,40 @@ pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { Ok(from_utf8_unchecked(input)) } } +/// for validating owned bytes sequences of UTF-8 +pub mod string { + pub use super::*; + + + /// Simple UTF-8 error containing the invalid utf8 bytes. + /// + /// No information is provided where the error occurred or how long the invalid byte + /// byte sequence is. + #[derive(Debug, PartialEq, Eq)] + pub struct FromUtf8Error { + bytes: Vec, + error: Utf8Error, + } + + /// Analogue to [`String::from_utf8()`]. + /// + /// Checks if the passed byte sequence is valid UTF-8 and returns a + /// [`String`] with taking ownership of the the passed byte slice wrapped in `Ok()` if it is. + /// + /// # Errors + /// Will return an Err([`FromUtf8Error`]) + /// containing the original bytes passed in along with the zero-sized [`Utf8Error`] + /// if the input contains invalid UTF-8. + #[inline] + pub fn from_utf8(input: Vec) -> Result { + unsafe { + match validate_utf8_basic(&input) { + Ok(()) => Ok(String::from_utf8_unchecked(input)), + Err(Utf8Error) => Err(FromUtf8Error {bytes:input, error: Utf8Error}) + } + } + } +} /// Analogue to [`std::str::from_utf8_mut()`]. /// diff --git a/src/compat.rs b/src/compat.rs index 2e955b9f..e7992c63 100644 --- a/src/compat.rs +++ b/src/compat.rs @@ -83,6 +83,42 @@ pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { } } +/// for validating owned bytes sequences of UTF-8 +pub mod string { + pub use super::*; + + + /// Simple UTF-8 error containing the invalid utf8 bytes. + /// + /// Contains information on the location of the encountered validation error and the length of the + /// invalid UTF-8 sequence. + #[derive(Debug, PartialEq, Eq)] + pub struct FromUtf8Error { + bytes: Vec, + error: Utf8Error, + } + + /// Analogue to [`String::from_utf8()`]. + /// + /// Checks if the passed byte sequence is valid UTF-8 and returns a + /// [`String`] with taking ownership of the the passed byte slice wrapped in `Ok()` if it is. + /// + /// # Errors + /// Will return an Err([`FromUtf8Error`]) + /// containing the original bytes passed in along with a [`Utf8Error`] + /// on if the input contains invalid UTF-8 with detailed error information. + /// if the input contains invalid UTF-8. + #[inline] + pub fn from_utf8(input: Vec) -> Result { + unsafe { + match validate_utf8_compat(&input) { + Ok(()) => Ok(String::from_utf8_unchecked(input)), + Err(err) => Err(FromUtf8Error {bytes:input, error: err}) + } + } + } +} + /// Analogue to [`std::str::from_utf8_mut()`]. /// /// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable From 0a4c16a88b3166e3c90d69191c348389551ee821 Mon Sep 17 00:00:00 2001 From: NightMare-Vortex <93774088+NightMare-Vortex@users.noreply.github.com> Date: Sun, 17 Sep 2023 19:06:27 +0300 Subject: [PATCH 2/7] made bytes public --- src/basic.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic.rs b/src/basic.rs index b5199c65..93183459 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -50,7 +50,7 @@ pub mod string { /// byte sequence is. #[derive(Debug, PartialEq, Eq)] pub struct FromUtf8Error { - bytes: Vec, + pub bytes: Vec, error: Utf8Error, } From 6015adb93dd398959174f2399c58e7328660c5f2 Mon Sep 17 00:00:00 2001 From: NightMare-Vortex <93774088+NightMare-Vortex@users.noreply.github.com> Date: Sun, 17 Sep 2023 19:07:09 +0300 Subject: [PATCH 3/7] made bytes public --- src/compat.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compat.rs b/src/compat.rs index e7992c63..173ba5fc 100644 --- a/src/compat.rs +++ b/src/compat.rs @@ -94,7 +94,7 @@ pub mod string { /// invalid UTF-8 sequence. #[derive(Debug, PartialEq, Eq)] pub struct FromUtf8Error { - bytes: Vec, + pub bytes: Vec, error: Utf8Error, } From a505ccb59e426149121738c4b392c6f127c4da79 Mon Sep 17 00:00:00 2001 From: NightMare-Vortex <93774088+NightMare-Vortex@users.noreply.github.com> Date: Tue, 19 Sep 2023 22:03:00 +0300 Subject: [PATCH 4/7] No more sync, AND Async Serde --- src/basic.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/basic.rs b/src/basic.rs index b5199c65..efc6e82a 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -48,9 +48,10 @@ pub mod string { /// /// No information is provided where the error occurred or how long the invalid byte /// byte sequence is. + #[allow(missing_docs)] #[derive(Debug, PartialEq, Eq)] pub struct FromUtf8Error { - bytes: Vec, + pub bytes: Vec, error: Utf8Error, } From 1ef8350634c904a08893dd475409cbcbc57fabb4 Mon Sep 17 00:00:00 2001 From: NightMare-Vortex <93774088+NightMare-Vortex@users.noreply.github.com> Date: Tue, 19 Sep 2023 22:07:52 +0300 Subject: [PATCH 5/7] No more sync, AND Async Serde --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index dca2f8dd..dcf51728 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simdutf8" -version = "0.1.4" +version = "0.1.5" authors = ["Hans Kratz "] edition = "2018" description = "SIMD-accelerated UTF-8 validation." From d602c994a8306d2e1212abc9514864c5481bdb35 Mon Sep 17 00:00:00 2001 From: NightMare-Vortex <93774088+NightMare-Vortex@users.noreply.github.com> Date: Tue, 19 Sep 2023 22:08:57 +0300 Subject: [PATCH 6/7] No more sync, AND Async Serde --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index dcf51728..dca2f8dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simdutf8" -version = "0.1.5" +version = "0.1.4" authors = ["Hans Kratz "] edition = "2018" description = "SIMD-accelerated UTF-8 validation." From 60a118d7b5cae61680eec184b287df82a5d30fca Mon Sep 17 00:00:00 2001 From: NightMare-Vortex <93774088+NightMare-Vortex@users.noreply.github.com> Date: Wed, 20 Sep 2023 22:25:29 +0300 Subject: [PATCH 7/7] use MaybeUninit --- src/compat.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compat.rs b/src/compat.rs index 173ba5fc..538fa88e 100644 --- a/src/compat.rs +++ b/src/compat.rs @@ -92,6 +92,7 @@ pub mod string { /// /// Contains information on the location of the encountered validation error and the length of the /// invalid UTF-8 sequence. + #[allow(missing_docs)] #[derive(Debug, PartialEq, Eq)] pub struct FromUtf8Error { pub bytes: Vec,