Skip to content

Commit e1b095e

Browse files
committed
Merge branch 'main' of github.com:rusticstuff/simdutf8
2 parents 58a3b06 + 0798da0 commit e1b095e

File tree

7 files changed

+40
-31
lines changed

7 files changed

+40
-31
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# Changelog
22
## [Unreleased]
33

4+
## [0.1.3] - 2021-05-14
5+
### New features
6+
* Low-level streaming validation API in `simdutf8::basic::imp`
7+
48
## [0.1.2] - 2021-05-09
59
### New features
610
* Aarch64 support (e.g. Apple Silicon, Raspberry Pi 4, ...) with nightly Rust and crate feature `aarch64_neon`
@@ -46,7 +50,8 @@
4650
## [0.0.1] - 2021-04-20
4751
- Initial release.
4852

49-
[Unreleased]: https://github.com/rusticstuff/simdutf8/compare/v0.1.2...HEAD
53+
[Unreleased]: https://github.com/rusticstuff/simdutf8/compare/v0.1.3...HEAD
54+
[0.1.3]: https://github.com/rusticstuff/simdutf8/compare/v0.1.2...v0.1.3
5055
[0.1.2]: https://github.com/rusticstuff/simdutf8/compare/v0.1.1...v0.1.2
5156
[0.1.1]: https://github.com/rusticstuff/simdutf8/compare/v0.1.0...v0.1.1
5257
[0.1.0]: https://github.com/rusticstuff/simdutf8/compare/v0.0.3...v0.1.0

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "simdutf8"
3-
version = "0.1.2"
3+
version = "0.1.3"
44
authors = ["Hans Kratz <hans@appfour.com>"]
55
edition = "2018"
66
description = "SIMD-accelerated UTF-8 validation."

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ This library has been thoroughly tested with sample data as well as fuzzing and
2828
Add the dependency to your Cargo.toml file:
2929
```toml
3030
[dependencies]
31-
simdutf8 = { version = "0.1.2" }
31+
simdutf8 = { version = "0.1.3" }
3232
```
3333
or on ARM64 with Rust Nightly:
3434
```toml
3535
[dependencies]
36-
simdutf8 = { version = "0.1.2", features = ["aarch64_neon"] }
36+
simdutf8 = { version = "0.1.3", features = ["aarch64_neon"] }
3737
```
3838

3939
Use `simdutf8::basic::from_utf8()` as a drop-in replacement for `std::str::from_utf8()`.

src/basic.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ impl std::error::Error for Utf8Error {}
2828
/// Analogue to [`std::str::from_utf8()`].
2929
///
3030
/// Checks if the passed byte sequence is valid UTF-8 and returns an
31-
/// [`std::str``] reference to the passed byte slice wrapped in `Ok()` if it is.
31+
/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is.
3232
///
3333
/// # Errors
3434
/// Will return the zero-sized Err([`Utf8Error`]) on if the input contains invalid UTF-8.
@@ -43,7 +43,7 @@ pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> {
4343
/// Analogue to [`std::str::from_utf8_mut()`].
4444
///
4545
/// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable
46-
/// [`std::str``] reference to the passed byte slice wrapped in `Ok()` if it is.
46+
/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is.
4747
///
4848
/// # Errors
4949
/// Will return the zero-sized Err([`Utf8Error`]) on if the input contains invalid UTF-8.
@@ -58,16 +58,18 @@ pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> {
5858
/// Allows direct access to the platform-specific unsafe validation implementations.
5959
#[cfg(feature = "public_imp")]
6060
pub mod imp {
61+
use crate::basic;
62+
6163
/// A low-level interfacne for streaming validation of UTF-8 data. It is meant to be integrated
6264
/// in high-performance data processing pipelines.
6365
///
6466
/// Data can be streamed in arbitrarily-sized chunks using the [`Self::update()`] method. There is
6567
/// no way to find out if the input so far was valid UTF-8 during the validation. Only when
6668
/// the validation is completed with the [`Self::finalize()`] method the result of the validation is
67-
/// returned. Use [`ChunkedUtf8Validator`] is possible for highest performance.
69+
/// returned. Use [`ChunkedUtf8Validator`] if possible for highest performance.
6870
///
6971
/// This implementation requires CPU SIMD features specified by the module it resides in.
70-
/// It is undefined behavior to call it if the required CPU features are not available which
72+
/// It is undefined behavior to use it if the required CPU features are not available which
7173
/// is why all trait methods are `unsafe`.
7274
///
7375
/// General usage:
@@ -123,13 +125,13 @@ pub mod imp {
123125
/// Finishes the validation and returns `Ok(())` if the input was valid UTF-8.
124126
///
125127
/// # Errors
126-
/// A [`crate::basic::Utf8Error`] is returned if the input was not valid UTF-8. No
128+
/// A [`basic::Utf8Error`] is returned if the input was not valid UTF-8. No
127129
/// further information about the location of the error is provided.
128130
///
129131
/// # Safety
130132
/// This implementation requires CPU SIMD features specified by the module it resides in.
131133
/// It is undefined behavior to call it if the required CPU features are not available.
132-
unsafe fn finalize(self) -> core::result::Result<(), crate::basic::Utf8Error>;
134+
unsafe fn finalize(self) -> core::result::Result<(), basic::Utf8Error>;
133135
}
134136

135137
/// Like [`Utf8Validator`] this low-level API is for streaming validation of UTF-8 data.
@@ -146,7 +148,7 @@ pub mod imp {
146148
/// data passed to it.
147149
///
148150
/// This implementation requires CPU SIMD features specified by the module it resides in.
149-
/// It is undefined behavior to call it if the required CPU features are not available which
151+
/// It is undefined behavior to use it if the required CPU features are not available which
150152
/// is why all trait methods are `unsafe`.
151153
pub trait ChunkedUtf8Validator {
152154
/// Creates a new validator.
@@ -175,7 +177,7 @@ pub mod imp {
175177
/// Finishes the validation and returns `Ok(())` if the input was valid UTF-8.
176178
///
177179
/// # Errors
178-
/// A [`crate::basic::Utf8Error`] is returned if the input was not valid UTF-8. No
180+
/// A [`basic::Utf8Error`] is returned if the input was not valid UTF-8. No
179181
/// further information about the location of the error is provided.
180182
///
181183
/// # Safety
@@ -184,7 +186,7 @@ pub mod imp {
184186
unsafe fn finalize(
185187
self,
186188
remaining_input: core::option::Option<&[u8]>,
187-
) -> core::result::Result<(), crate::basic::Utf8Error>;
189+
) -> core::result::Result<(), basic::Utf8Error>;
188190
}
189191

190192
/// Includes the x86/x86-64 SIMD implementations.
@@ -201,7 +203,7 @@ pub mod imp {
201203
}
202204
/// Includes the validation implementation for SSE 4.2-compatible CPUs.
203205
///
204-
/// Using the provided functionality on CPUs which do not support AVX 2 is undefined
206+
/// Using the provided functionality on CPUs which do not support SSE 4.2 is undefined
205207
/// behavior and will very likely cause a crash.
206208
pub mod sse42 {
207209
pub use crate::implementation::x86::sse42::validate_utf8_basic as validate_utf8;

src/compat.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
//!
88
//! The functions in this module also fail early: errors are checked on-the-fly as the string is processed and once
99
//! an invalid UTF-8 sequence is encountered, it returns without processing the rest of the data.
10-
//! This comes at a performance penality compared to the [`crate::basic`] module even if the input is valid UTF-8.
10+
//! This comes at a slight performance penality compared to the [`crate::basic`] module if the input is valid UTF-8.
1111
1212
use core::fmt::Display;
1313
use core::fmt::Formatter;

src/implementation/algorithm.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
66
macro_rules! algorithm_simd {
77
($feat:expr) => {
8+
use crate::{basic, compat};
9+
810
impl Utf8CheckAlgorithm<SimdU8Value> {
911
#[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
1012
#[inline]
@@ -203,7 +205,7 @@ macro_rules! algorithm_simd {
203205
/// Validation implementation for CPUs supporting the SIMD extension (see module).
204206
///
205207
/// # Errors
206-
/// Return the zero-sized [`crate::basic::Utf8Error`] on failure.
208+
/// Returns the zero-sized [`basic::Utf8Error`] on failure.
207209
///
208210
/// # Safety
209211
/// This function is inherently unsafe because it is compiled with SIMD extensions
@@ -213,7 +215,7 @@ macro_rules! algorithm_simd {
213215
#[inline]
214216
pub unsafe fn validate_utf8_basic(
215217
input: &[u8],
216-
) -> core::result::Result<(), crate::basic::Utf8Error> {
218+
) -> core::result::Result<(), basic::Utf8Error> {
217219
use crate::implementation::helpers::SIMD_CHUNK_SIZE;
218220
let len = input.len();
219221
let mut algorithm = Utf8CheckAlgorithm::<SimdU8Value>::default();
@@ -250,7 +252,7 @@ macro_rules! algorithm_simd {
250252
}
251253
algorithm.check_incomplete_pending();
252254
if algorithm.has_error() {
253-
Err(crate::basic::Utf8Error {})
255+
Err(basic::Utf8Error {})
254256
} else {
255257
Ok(())
256258
}
@@ -259,7 +261,7 @@ macro_rules! algorithm_simd {
259261
/// Validation implementation for CPUs supporting the SIMD extension (see module).
260262
///
261263
/// # Errors
262-
/// Return [`crate::compat::Utf8Error`] with detailed error information on failure.
264+
/// Returns [`compat::Utf8Error`] with detailed error information on failure.
263265
///
264266
/// # Safety
265267
/// This function is inherently unsafe because it is compiled with SIMD extensions
@@ -269,7 +271,7 @@ macro_rules! algorithm_simd {
269271
#[inline]
270272
pub unsafe fn validate_utf8_compat(
271273
input: &[u8],
272-
) -> core::result::Result<(), crate::compat::Utf8Error> {
274+
) -> core::result::Result<(), compat::Utf8Error> {
273275
validate_utf8_compat_simd0(input)
274276
.map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx))
275277
}
@@ -347,7 +349,7 @@ macro_rules! algorithm_simd {
347349
}
348350
}
349351

350-
/// Low-level implementation of the [`crate::basic::imp::Utf8Validator]` trait.
352+
/// Low-level implementation of the [`basic::imp::Utf8Validator`] trait.
351353
///
352354
/// This is implementation requires CPU SIMD features specified by the module it resides in.
353355
/// It is undefined behavior to call it if the required CPU features are not
@@ -371,7 +373,7 @@ macro_rules! algorithm_simd {
371373
}
372374

373375
#[cfg(feature = "public_imp")]
374-
impl crate::basic::imp::Utf8Validator for Utf8ValidatorImp {
376+
impl basic::imp::Utf8Validator for Utf8ValidatorImp {
375377
#[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
376378
#[inline]
377379
#[must_use]
@@ -424,7 +426,7 @@ macro_rules! algorithm_simd {
424426

425427
#[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
426428
#[inline]
427-
unsafe fn finalize(mut self) -> core::result::Result<(), crate::basic::Utf8Error> {
429+
unsafe fn finalize(mut self) -> core::result::Result<(), basic::Utf8Error> {
428430
if self.incomplete_len != 0 {
429431
for i in &mut self.incomplete_data[self.incomplete_len..] {
430432
*i = 0
@@ -433,14 +435,14 @@ macro_rules! algorithm_simd {
433435
}
434436
self.algorithm.check_incomplete_pending();
435437
if self.algorithm.has_error() {
436-
Err(crate::basic::Utf8Error {})
438+
Err(basic::Utf8Error {})
437439
} else {
438440
Ok(())
439441
}
440442
}
441443
}
442444

443-
/// Low-level implementation of the [`crate::basic::imp::ChunkedUtf8Validator]` trait.
445+
/// Low-level implementation of the [`basic::imp::ChunkedUtf8Validator`] trait.
444446
///
445447
/// This is implementation requires CPU SIMD features specified by the module it resides in.
446448
/// It is undefined behavior to call it if the required CPU features are not
@@ -451,7 +453,7 @@ macro_rules! algorithm_simd {
451453
}
452454

453455
#[cfg(feature = "public_imp")]
454-
impl crate::basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp {
456+
impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp {
455457
#[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
456458
#[inline]
457459
#[must_use]
@@ -480,7 +482,7 @@ macro_rules! algorithm_simd {
480482
unsafe fn finalize(
481483
mut self,
482484
remaining_input: core::option::Option<&[u8]>,
483-
) -> core::result::Result<(), crate::basic::Utf8Error> {
485+
) -> core::result::Result<(), basic::Utf8Error> {
484486
use crate::implementation::helpers::SIMD_CHUNK_SIZE;
485487

486488
if let Some(mut remaining_input) = remaining_input {
@@ -505,7 +507,7 @@ macro_rules! algorithm_simd {
505507
}
506508
self.algorithm.check_incomplete_pending();
507509
if self.algorithm.has_error() {
508-
Err(crate::basic::Utf8Error {})
510+
Err(basic::Utf8Error {})
509511
} else {
510512
Ok(())
511513
}

src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
//! Add the dependency to your Cargo.toml file:
2626
//! ```toml
2727
//! [dependencies]
28-
//! simdutf8 = { version = "0.1.2" }
28+
//! simdutf8 = { version = "0.1.3" }
2929
//! ```
3030
//! or on ARM64 with Rust Nightly:
3131
//! ```toml
3232
//! [dependencies]
33-
//! simdutf8 = { version = "0.1.2", features = ["aarch64_neon"] }
33+
//! simdutf8 = { version = "0.1.3", features = ["aarch64_neon"] }
3434
//! ```
3535
//!
3636
//! Use [`basic::from_utf8()`] as a drop-in replacement for `std::str::from_utf8()`.
@@ -87,7 +87,7 @@
8787
//!
8888
//! ### Access to low-level functionality
8989
//! If you want to be able to call a SIMD implementation directly, use the `public_imp` feature flag. The validation
90-
//! implementations are then accessible via [`basic::imp`] and [`compat::imp`].Traits facilitating streaming validation are available
90+
//! implementations are then accessible via [`basic::imp`] and [`compat::imp`]. Traits facilitating streaming validation are available
9191
//! there as well.
9292
//!
9393
//! ## Optimisation flags

0 commit comments

Comments
 (0)