Skip to content

Commit 32fcd77

Browse files
authored
Revert "Optimize string parsing" (#27)
This reverts commit 0ff4ece. This fixes a seg-fault that was introduced with 0.1.14. The reason for that segfault is that it introduced an issue of us reading after the input data od possibly crossing page boundaries. Better handling of relocated data might solve that but rust could get into the way here. Worth an investigation later.
1 parent d0536a5 commit 32fcd77

File tree

1 file changed

+42
-12
lines changed

1 file changed

+42
-12
lines changed

src/lib.rs

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
//! most of the design closely with a few exceptions to make it better
55
//! fit into the rust ecosystem.
66
//!
7-
//! Note: by default rustc will compile for compatibility, not
7+
//! Note: by default rustc will compile for compatibility, not
88
//! performance, to take advantage of the simd part of simd json. You
99
//! have to use a native cpu target on a avx2 capable host system. An
1010
//! example how to do this can be found in the `.cargo` directory on
@@ -210,13 +210,27 @@ impl<'de> Deserializer<'de> {
210210
// fancy in it like object keys
211211
#[cfg_attr(not(feature = "no-inline"), inline(always))]
212212
fn parse_short_str_(&mut self) -> Result<&'de str> {
213+
let mut padding = [0u8; 32];
213214
let idx = self.iidx + 1;
214215
let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) };
215216

216217
//short strings are very common for IDs
217-
#[allow(clippy::cast_ptr_alignment)]
218-
let v: __m256i =
219-
unsafe { _mm256_loadu_si256(src.get_unchecked(..32).as_ptr() as *const __m256i) };
218+
let v: __m256i = if src.len() >= 32 {
219+
// This is safe since we ensure src is at least 32 wide
220+
#[allow(clippy::cast_ptr_alignment)]
221+
unsafe {
222+
_mm256_loadu_si256(src.get_unchecked(..32).as_ptr() as *const __m256i)
223+
}
224+
} else {
225+
unsafe {
226+
padding
227+
.get_unchecked_mut(..src.len())
228+
.clone_from_slice(&src);
229+
// This is safe since we ensure src is at least 32 wide
230+
#[allow(clippy::cast_ptr_alignment)]
231+
_mm256_loadu_si256(padding.get_unchecked(..32).as_ptr() as *const __m256i)
232+
}
233+
};
220234
let bs_bits: u32 = unsafe {
221235
static_cast_u32!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(
222236
v,
@@ -227,22 +241,25 @@ impl<'de> Deserializer<'de> {
227241
let quote_bits = unsafe { static_cast_u32!(_mm256_movemask_epi8(quote_mask)) };
228242
if (bs_bits.wrapping_sub(1) & quote_bits) != 0 {
229243
let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32;
244+
let v = unsafe {
245+
self.input.get_unchecked(idx..idx + quote_dist as usize) as *const [u8]
246+
as *const str
247+
};
230248
self.str_offset = idx + quote_dist as usize;
249+
231250
unsafe {
232-
let v = self.input.get_unchecked(idx..idx + quote_dist as usize) as *const [u8]
233-
as *const str;
234-
Ok(&*v)
251+
return Ok(&*v);
235252
}
236-
} else {
237-
self.parse_str_()
238253
}
254+
self.parse_str_()
239255
}
240256

241257
#[cfg_attr(not(feature = "no-inline"), inline(always))]
242258
fn parse_str_(&mut self) -> Result<&'de str> {
243259
use std::slice::from_raw_parts_mut;
244260
// Add 1 to skip the initial "
245261
let idx = self.iidx + 1;
262+
let mut padding = [0u8; 32];
246263
//let mut read: usize = 0;
247264

248265
let needs_relocation = idx - self.str_offset <= 32;
@@ -267,9 +284,22 @@ impl<'de> Deserializer<'de> {
267284
let mut src_i: usize = 0;
268285
let mut dst_i: usize = 0;
269286
loop {
270-
#[allow(clippy::cast_ptr_alignment)]
271-
let v: __m256i =
272-
unsafe { _mm256_loadu_si256(src.as_ptr().add(src_i) as *const __m256i) };
287+
let v: __m256i = if src.len() >= src_i + 32 {
288+
// This is safe since we ensure src is at least 32 wide
289+
#[allow(clippy::cast_ptr_alignment)]
290+
unsafe {
291+
_mm256_loadu_si256(src.as_ptr().add(src_i) as *const __m256i)
292+
}
293+
} else {
294+
unsafe {
295+
padding
296+
.get_unchecked_mut(..src.len() - src_i)
297+
.clone_from_slice(src.get_unchecked(src_i..));
298+
// This is safe since we ensure src is at least 32 wide
299+
#[allow(clippy::cast_ptr_alignment)]
300+
_mm256_loadu_si256(padding.as_ptr() as *const __m256i)
301+
}
302+
};
273303

274304
#[allow(clippy::cast_ptr_alignment)]
275305
unsafe {

0 commit comments

Comments
 (0)