Skip to content

Commit 84b79cf

Browse files
committed
Merge branch 'main' of github.com:rusticstuff/simdutf8
2 parents 7101d50 + 492d32c commit 84b79cf

File tree

4 files changed

+77
-35
lines changed

4 files changed

+77
-35
lines changed

src/implementation/macros.rs

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,23 +37,34 @@ macro_rules! static_cast_i8 {
3737
};
3838
}
3939

40+
macro_rules! check_utf8 {
41+
($feat:expr, $t:ident) => {
42+
#[target_feature(enable = $feat)]
43+
#[inline]
44+
unsafe fn check_utf8(&self, previous: &mut Utf8CheckingState<$t>) {
45+
if likely!(self.is_ascii()) {
46+
previous.error =
47+
Utf8CheckingState::<$t>::check_eof(previous.error, previous.incomplete)
48+
} else {
49+
self.check_block(previous);
50+
}
51+
}
52+
};
53+
}
54+
4055
/// check_bytes() strategy
4156
macro_rules! check_bytes {
4257
($feat:expr, $t:ident) => {
4358
#[target_feature(enable = $feat)]
4459
#[inline]
4560
unsafe fn check_bytes(current: $t, previous: &mut Utf8CheckingState<$t>) {
46-
if likely!(Self::is_ascii(current)) {
47-
previous.error = Self::check_eof(previous.error, previous.incomplete)
48-
} else {
49-
let prev1 = Self::prev1(current, previous.prev);
50-
let sc = Self::check_special_cases(current, prev1);
51-
previous.error = Self::or(
52-
previous.error,
53-
Self::check_multibyte_lengths(current, previous.prev, sc),
54-
);
55-
previous.incomplete = Self::is_incomplete(current);
56-
}
61+
let prev1 = Self::prev1(current, previous.prev);
62+
let sc = Self::check_special_cases(current, prev1);
63+
previous.error = Self::or(
64+
previous.error,
65+
Self::check_multibyte_lengths(current, previous.prev, sc),
66+
);
67+
previous.incomplete = Self::is_incomplete(current);
5768
previous.prev = current
5869
}
5970
};
@@ -79,12 +90,11 @@ macro_rules! validate_utf8_basic_simd {
7990
const SIMDINPUT_LENGTH: usize = 64;
8091
let len = input.len();
8192
let mut state = SimdInput::new_utf8_checking_state();
82-
let lenminus64: usize = if len < 64 { 0 } else { len as usize - 64 };
8393
let mut idx: usize = 0;
8494
let mut tmpbuf = $buf2type::new();
8595

8696
let align: usize = core::mem::align_of::<$buf2type>();
87-
if lenminus64 >= 4096 {
97+
if len >= 4096 {
8898
let off = (input.as_ptr() as usize) % align;
8999
if off != 0 {
90100
let to_copy = align - off;
@@ -98,7 +108,10 @@ macro_rules! validate_utf8_basic_simd {
98108
idx += to_copy;
99109
}
100110
}
101-
while idx < lenminus64 {
111+
112+
let rem = len - idx;
113+
let iter_lim = idx + (rem - (rem % 64));
114+
while idx < iter_lim {
102115
let input = SimdInput::new(input.get_unchecked(idx as usize..));
103116
input.check_utf8(&mut state);
104117
idx += SIMDINPUT_LENGTH;
@@ -151,12 +164,11 @@ macro_rules! validate_utf8_compat_simd {
151164
const SIMDINPUT_LENGTH: usize = 64;
152165
let len = input.len();
153166
let mut state = SimdInput::new_utf8_checking_state();
154-
let lenminus64: usize = if len < 64 { 0 } else { len as usize - 64 };
155167
let mut idx: usize = 0;
156168
let mut tmpbuf = $buf2type::new();
157169

158170
let align: usize = core::mem::align_of::<$buf2type>();
159-
if lenminus64 >= 4096 {
171+
if len >= 4096 {
160172
let off = (input.as_ptr() as usize) % align;
161173
if off != 0 {
162174
let to_copy = align - off;
@@ -174,7 +186,9 @@ macro_rules! validate_utf8_compat_simd {
174186
}
175187
}
176188

177-
while idx < lenminus64 {
189+
let rem = len - idx;
190+
let iter_lim = idx + (rem - (rem % 64));
191+
while idx < iter_lim {
178192
let simd_input = SimdInput::new(input.get_unchecked(idx as usize..));
179193
simd_input.check_utf8(&mut state);
180194
if SimdInput::check_utf8_errors(&state) {

src/implementation/x86/avx2.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,6 @@ impl Utf8CheckingState<__m256i> {
3737
_mm256_or_si256(a, b)
3838
}
3939

40-
#[target_feature(enable = "avx2")]
41-
#[inline]
42-
unsafe fn is_ascii(input: __m256i) -> bool {
43-
_mm256_movemask_epi8(input) == 0
44-
}
45-
4640
#[target_feature(enable = "avx2")]
4741
#[inline]
4842
unsafe fn check_eof(error: __m256i, incomplete: __m256i) -> __m256i {
@@ -272,8 +266,6 @@ impl Utf8CheckingState<__m256i> {
272266
_mm256_testz_si256(error, error) != 1
273267
}
274268

275-
#[target_feature(enable = "avx2")]
276-
#[inline]
277269
check_bytes!("avx2", __m256i);
278270
}
279271

@@ -302,11 +294,18 @@ impl SimdInput {
302294

303295
#[target_feature(enable = "avx2")]
304296
#[inline]
305-
unsafe fn check_utf8(&self, state: &mut Utf8CheckingState<__m256i>) {
297+
unsafe fn check_block(&self, state: &mut Utf8CheckingState<__m256i>) {
306298
Utf8CheckingState::<__m256i>::check_bytes(self.v0, state);
307299
Utf8CheckingState::<__m256i>::check_bytes(self.v1, state);
308300
}
309301

302+
#[target_feature(enable = "avx2")]
303+
#[inline]
304+
unsafe fn is_ascii(&self) -> bool {
305+
let res = _mm256_or_si256(self.v0, self.v1);
306+
_mm256_movemask_epi8(res) == 0
307+
}
308+
310309
#[target_feature(enable = "avx2")]
311310
#[inline]
312311
unsafe fn check_eof(state: &mut Utf8CheckingState<__m256i>) {
@@ -318,6 +317,8 @@ impl SimdInput {
318317
unsafe fn check_utf8_errors(state: &Utf8CheckingState<__m256i>) -> bool {
319318
Utf8CheckingState::<__m256i>::has_error(state.error)
320319
}
320+
321+
check_utf8!("avx2", __m256i);
321322
}
322323

323324
use crate::implementation::Temp2x64A32;

src/implementation/x86/mod.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ pub(crate) unsafe fn validate_utf8_basic(
2424
(fun)(input)
2525
}
2626

27+
if input.len() < 64 {
28+
return super::validate_utf8_basic_fallback(input);
29+
}
30+
2731
let fun = FN.load(Ordering::Relaxed);
2832
mem::transmute::<FnRaw, super::ValidateUtf8Fn>(fun)(input)
2933
}
@@ -46,6 +50,10 @@ fn get_fastest_available_implementation_basic() -> super::ValidateUtf8Fn {
4650
pub(crate) unsafe fn validate_utf8_basic(
4751
input: &[u8],
4852
) -> core::result::Result<(), crate::basic::Utf8Error> {
53+
if input.len() < 64 {
54+
return super::validate_utf8_basic_fallback(input);
55+
}
56+
4957
avx2::validate_utf8_basic(input)
5058
}
5159

@@ -57,6 +65,10 @@ pub(crate) unsafe fn validate_utf8_basic(
5765
pub(crate) unsafe fn validate_utf8_basic(
5866
input: &[u8],
5967
) -> core::result::Result<(), crate::basic::Utf8Error> {
68+
if input.len() < 64 {
69+
return super::validate_utf8_basic_fallback(input);
70+
}
71+
6072
sse42::validate_utf8_basic(input)
6173
}
6274

@@ -88,6 +100,10 @@ pub(crate) unsafe fn validate_utf8_compat(
88100
(fun)(input)
89101
}
90102

103+
if input.len() < 64 {
104+
return super::validate_utf8_compat_fallback(input);
105+
}
106+
91107
let fun = FN.load(Ordering::Relaxed);
92108
mem::transmute::<FnRaw, super::ValidateUtf8CompatFn>(fun)(input)
93109
}
@@ -110,6 +126,10 @@ fn get_fastest_available_implementation_compat() -> super::ValidateUtf8CompatFn
110126
pub(crate) unsafe fn validate_utf8_compat(
111127
input: &[u8],
112128
) -> core::result::Result<(), crate::compat::Utf8Error> {
129+
if input.len() < 64 {
130+
return super::validate_utf8_compat_fallback(input);
131+
}
132+
113133
avx2::validate_utf8_compat(input)
114134
}
115135

@@ -121,6 +141,10 @@ pub(crate) unsafe fn validate_utf8_compat(
121141
pub(crate) unsafe fn validate_utf8_compat(
122142
input: &[u8],
123143
) -> core::result::Result<(), crate::compat::Utf8Error> {
144+
if input.len() < 64 {
145+
return super::validate_utf8_compat_fallback(input);
146+
}
147+
124148
sse42::validate_utf8_compat(input)
125149
}
126150

src/implementation/x86/sse42.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,6 @@ impl Utf8CheckingState<__m128i> {
3636
_mm_or_si128(a, b)
3737
}
3838

39-
#[target_feature(enable = "sse4.2")]
40-
#[inline]
41-
unsafe fn is_ascii(input: __m128i) -> bool {
42-
_mm_movemask_epi8(input) == 0
43-
}
44-
4539
#[target_feature(enable = "sse4.2")]
4640
#[inline]
4741
unsafe fn check_eof(error: __m128i, incomplete: __m128i) -> __m128i {
@@ -205,8 +199,6 @@ impl Utf8CheckingState<__m128i> {
205199
_mm_testz_si128(error, error) != 1
206200
}
207201

208-
#[target_feature(enable = "sse4.2")]
209-
#[inline]
210202
check_bytes!("sse4.2", __m128i);
211203
}
212204

@@ -239,13 +231,22 @@ impl SimdInput {
239231

240232
#[target_feature(enable = "sse4.2")]
241233
#[inline]
242-
unsafe fn check_utf8(&self, state: &mut Utf8CheckingState<__m128i>) {
234+
unsafe fn check_block(&self, state: &mut Utf8CheckingState<__m128i>) {
243235
Utf8CheckingState::<__m128i>::check_bytes(self.v0, state);
244236
Utf8CheckingState::<__m128i>::check_bytes(self.v1, state);
245237
Utf8CheckingState::<__m128i>::check_bytes(self.v2, state);
246238
Utf8CheckingState::<__m128i>::check_bytes(self.v3, state);
247239
}
248240

241+
#[target_feature(enable = "sse4.2")]
242+
#[inline]
243+
unsafe fn is_ascii(&self) -> bool {
244+
let r1 = _mm_or_si128(self.v0, self.v1);
245+
let r2 = _mm_or_si128(self.v2, self.v3);
246+
let r = _mm_or_si128(r1, r2);
247+
_mm_movemask_epi8(r) == 0
248+
}
249+
249250
#[target_feature(enable = "sse4.2")]
250251
#[inline]
251252
unsafe fn check_eof(state: &mut Utf8CheckingState<__m128i>) {
@@ -257,6 +258,8 @@ impl SimdInput {
257258
unsafe fn check_utf8_errors(state: &Utf8CheckingState<__m128i>) -> bool {
258259
Utf8CheckingState::<__m128i>::has_error(state.error)
259260
}
261+
262+
check_utf8!("sse4.2", __m128i);
260263
}
261264

262265
use crate::implementation::Temp2x64A16;

0 commit comments

Comments
 (0)