4
4
//! most of the design closely with a few exceptions to make it better
5
5
//! fit into the rust ecosystem.
6
6
//!
7
- //! Note: by default rustc will compile for compatibility, not
7
+ //! Note: by default rustc will compile for compatibility, not
8
8
//! performance, to take advantage of the simd part of simd json. You
9
9
//! have to use a native cpu target on a avx2 capable host system. An
10
10
//! example how to do this can be found in the `.cargo` directory on
@@ -210,13 +210,27 @@ impl<'de> Deserializer<'de> {
210
210
// fancy in it like object keys
211
211
#[ cfg_attr( not( feature = "no-inline" ) , inline( always) ) ]
212
212
fn parse_short_str_ ( & mut self ) -> Result < & ' de str > {
213
+ let mut padding = [ 0u8 ; 32 ] ;
213
214
let idx = self . iidx + 1 ;
214
215
let src: & [ u8 ] = unsafe { & self . input . get_unchecked ( idx..) } ;
215
216
216
217
//short strings are very common for IDs
217
- #[ allow( clippy:: cast_ptr_alignment) ]
218
- let v: __m256i =
219
- unsafe { _mm256_loadu_si256 ( src. get_unchecked ( ..32 ) . as_ptr ( ) as * const __m256i ) } ;
218
+ let v: __m256i = if src. len ( ) >= 32 {
219
+ // This is safe since we ensure src is at least 32 wide
220
+ #[ allow( clippy:: cast_ptr_alignment) ]
221
+ unsafe {
222
+ _mm256_loadu_si256 ( src. get_unchecked ( ..32 ) . as_ptr ( ) as * const __m256i )
223
+ }
224
+ } else {
225
+ unsafe {
226
+ padding
227
+ . get_unchecked_mut ( ..src. len ( ) )
228
+ . clone_from_slice ( & src) ;
229
+ // This is safe since we ensure src is at least 32 wide
230
+ #[ allow( clippy:: cast_ptr_alignment) ]
231
+ _mm256_loadu_si256 ( padding. get_unchecked ( ..32 ) . as_ptr ( ) as * const __m256i )
232
+ }
233
+ } ;
220
234
let bs_bits: u32 = unsafe {
221
235
static_cast_u32 ! ( _mm256_movemask_epi8( _mm256_cmpeq_epi8(
222
236
v,
@@ -227,22 +241,25 @@ impl<'de> Deserializer<'de> {
227
241
let quote_bits = unsafe { static_cast_u32 ! ( _mm256_movemask_epi8( quote_mask) ) } ;
228
242
if ( bs_bits. wrapping_sub ( 1 ) & quote_bits) != 0 {
229
243
let quote_dist: u32 = trailingzeroes ( u64:: from ( quote_bits) ) as u32 ;
244
+ let v = unsafe {
245
+ self . input . get_unchecked ( idx..idx + quote_dist as usize ) as * const [ u8 ]
246
+ as * const str
247
+ } ;
230
248
self . str_offset = idx + quote_dist as usize ;
249
+
231
250
unsafe {
232
- let v = self . input . get_unchecked ( idx..idx + quote_dist as usize ) as * const [ u8 ]
233
- as * const str ;
234
- Ok ( & * v)
251
+ return Ok ( & * v) ;
235
252
}
236
- } else {
237
- self . parse_str_ ( )
238
253
}
254
+ self . parse_str_ ( )
239
255
}
240
256
241
257
#[ cfg_attr( not( feature = "no-inline" ) , inline( always) ) ]
242
258
fn parse_str_ ( & mut self ) -> Result < & ' de str > {
243
259
use std:: slice:: from_raw_parts_mut;
244
260
// Add 1 to skip the initial "
245
261
let idx = self . iidx + 1 ;
262
+ let mut padding = [ 0u8 ; 32 ] ;
246
263
//let mut read: usize = 0;
247
264
248
265
let needs_relocation = idx - self . str_offset <= 32 ;
@@ -267,9 +284,22 @@ impl<'de> Deserializer<'de> {
267
284
let mut src_i: usize = 0 ;
268
285
let mut dst_i: usize = 0 ;
269
286
loop {
270
- #[ allow( clippy:: cast_ptr_alignment) ]
271
- let v: __m256i =
272
- unsafe { _mm256_loadu_si256 ( src. as_ptr ( ) . add ( src_i) as * const __m256i ) } ;
287
+ let v: __m256i = if src. len ( ) >= src_i + 32 {
288
+ // This is safe since we ensure src is at least 32 wide
289
+ #[ allow( clippy:: cast_ptr_alignment) ]
290
+ unsafe {
291
+ _mm256_loadu_si256 ( src. as_ptr ( ) . add ( src_i) as * const __m256i )
292
+ }
293
+ } else {
294
+ unsafe {
295
+ padding
296
+ . get_unchecked_mut ( ..src. len ( ) - src_i)
297
+ . clone_from_slice ( src. get_unchecked ( src_i..) ) ;
298
+ // This is safe since we ensure src is at least 32 wide
299
+ #[ allow( clippy:: cast_ptr_alignment) ]
300
+ _mm256_loadu_si256 ( padding. as_ptr ( ) as * const __m256i )
301
+ }
302
+ } ;
273
303
274
304
#[ allow( clippy:: cast_ptr_alignment) ]
275
305
unsafe {
0 commit comments