|
4 | 4 | //! most of the design closely with a few exceptions to make it better
|
5 | 5 | //! fit into the rust ecosystem.
|
6 | 6 | //!
|
7 |
| -//! Note: by default rustc will compile for compatibility, not |
| 7 | +//! Note: by default rustc will compile for compatibility, not |
8 | 8 | //! performance, to take advantage of the simd part of simd json. You
|
9 | 9 | //! have to use a native cpu target on a avx2 capable host system. An
|
10 | 10 | //! example how to do this can be found in the `.cargo` directory on
|
@@ -157,19 +157,16 @@ impl<'de> Deserializer<'de> {
|
157 | 157 | }
|
158 | 158 | };
|
159 | 159 |
|
160 |
| - let (counts, str_len) = Deserializer::validate(input, &structural_indexes)?; |
| 160 | + let counts = Deserializer::validate(input, &structural_indexes)?; |
161 | 161 |
|
162 |
| - let mut v = Vec::with_capacity(str_len + SIMDJSON_PADDING); |
163 |
| - unsafe { |
164 |
| - v.set_len(str_len + SIMDJSON_PADDING); |
165 |
| - }; |
| 162 | + let strings = Vec::with_capacity(len + SIMDJSON_PADDING); |
166 | 163 |
|
167 | 164 | Ok(Deserializer {
|
168 | 165 | counts,
|
169 | 166 | structural_indexes,
|
170 | 167 | input,
|
171 | 168 | idx: 0,
|
172 |
| - strings: v, |
| 169 | + strings, |
173 | 170 | str_offset: 0,
|
174 | 171 | iidx: 0,
|
175 | 172 | })
|
@@ -206,83 +203,87 @@ impl<'de> Deserializer<'de> {
|
206 | 203 | unsafe { *self.counts.get_unchecked(self.idx) }
|
207 | 204 | }
|
208 | 205 |
|
209 |
| - // We parse a string that's likely to be less then 32 characters and without any |
210 |
| - // fancy in it like object keys |
211 |
| - #[cfg_attr(not(feature = "no-inline"), inline(always))] |
212 |
| - fn parse_short_str_(&mut self) -> Result<&'de str> { |
213 |
| - let mut padding = [0u8; 32]; |
214 |
| - let idx = self.iidx + 1; |
215 |
| - let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) }; |
216 |
| - |
217 |
| - //short strings are very common for IDs |
218 |
| - let v: __m256i = if src.len() >= 32 { |
219 |
| - // This is safe since we ensure src is at least 32 wide |
220 |
| - #[allow(clippy::cast_ptr_alignment)] |
221 |
| - unsafe { |
222 |
| - _mm256_loadu_si256(src.get_unchecked(..32).as_ptr() as *const __m256i) |
223 |
| - } |
224 |
| - } else { |
225 |
| - unsafe { |
226 |
| - padding |
227 |
| - .get_unchecked_mut(..src.len()) |
228 |
| - .clone_from_slice(&src); |
229 |
| - // This is safe since we ensure src is at least 32 wide |
230 |
| - #[allow(clippy::cast_ptr_alignment)] |
231 |
| - _mm256_loadu_si256(padding.get_unchecked(..32).as_ptr() as *const __m256i) |
232 |
| - } |
233 |
| - }; |
234 |
| - let bs_bits: u32 = unsafe { |
235 |
| - static_cast_u32!(_mm256_movemask_epi8(_mm256_cmpeq_epi8( |
236 |
| - v, |
237 |
| - _mm256_set1_epi8(b'\\' as i8) |
238 |
| - ))) |
239 |
| - }; |
240 |
| - let quote_mask = unsafe { _mm256_cmpeq_epi8(v, _mm256_set1_epi8(b'"' as i8)) }; |
241 |
| - let quote_bits = unsafe { static_cast_u32!(_mm256_movemask_epi8(quote_mask)) }; |
242 |
| - if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { |
243 |
| - let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; |
244 |
| - let v = unsafe { |
245 |
| - self.input.get_unchecked(idx..idx + quote_dist as usize) as *const [u8] |
246 |
| - as *const str |
247 |
| - }; |
248 |
| - self.str_offset = idx + quote_dist as usize; |
249 |
| - |
250 |
| - unsafe { |
251 |
| - return Ok(&*v); |
252 |
| - } |
253 |
| - } |
254 |
| - self.parse_str_() |
255 |
| - } |
256 |
| - |
257 | 206 | #[cfg_attr(not(feature = "no-inline"), inline(always))]
|
258 | 207 | fn parse_str_(&mut self) -> Result<&'de str> {
|
259 |
| - use std::slice::from_raw_parts_mut; |
260 | 208 | // Add 1 to skip the initial "
|
261 | 209 | let idx = self.iidx + 1;
|
262 | 210 | let mut padding = [0u8; 32];
|
263 | 211 | //let mut read: usize = 0;
|
264 | 212 |
|
265 |
| - let needs_relocation = idx - self.str_offset <= 32; |
266 | 213 | // we include the terminal '"' so we know where to end
|
267 | 214 | // This is safe since we check sub's lenght in the range access above and only
|
268 | 215 | // create sub sliced form sub to `sub.len()`.
|
269 | 216 |
|
270 |
| - // if we don't need relocation we can write directly to the input |
271 |
| - // saving us to copy data to the string storage first and then |
272 |
| - // back tot he input. |
273 |
| - // We can't always do that as if we're less then 32 characters |
274 |
| - // behind we'll overwrite important parts of the input. |
275 |
| - let dst: &mut [u8] = if needs_relocation { |
276 |
| - &mut self.strings |
277 |
| - } else { |
278 |
| - let ptr = self.input.as_mut_ptr(); |
279 |
| - unsafe { |
280 |
| - from_raw_parts_mut(ptr.add(self.str_offset), self.input.len() - self.str_offset) |
281 |
| - } |
282 |
| - }; |
283 | 217 | let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) };
|
284 | 218 | let mut src_i: usize = 0;
|
| 219 | + let mut len = src_i; |
| 220 | + loop { |
| 221 | + let v: __m256i = if src.len() >= src_i + 32 { |
| 222 | + // This is safe since we ensure src is at least 32 wide |
| 223 | + #[allow(clippy::cast_ptr_alignment)] |
| 224 | + unsafe { |
| 225 | + _mm256_loadu_si256(src.as_ptr().add(src_i) as *const __m256i) |
| 226 | + } |
| 227 | + } else { |
| 228 | + unsafe { |
| 229 | + padding |
| 230 | + .get_unchecked_mut(..src.len() - src_i) |
| 231 | + .clone_from_slice(src.get_unchecked(src_i..)); |
| 232 | + // This is safe since we ensure src is at least 32 wide |
| 233 | + #[allow(clippy::cast_ptr_alignment)] |
| 234 | + _mm256_loadu_si256(padding.as_ptr() as *const __m256i) |
| 235 | + } |
| 236 | + }; |
| 237 | + |
| 238 | + // store to dest unconditionally - we can overwrite the bits we don't like |
| 239 | + // later |
| 240 | + let bs_bits: u32 = unsafe { |
| 241 | + static_cast_u32!(_mm256_movemask_epi8(_mm256_cmpeq_epi8( |
| 242 | + v, |
| 243 | + _mm256_set1_epi8(b'\\' as i8) |
| 244 | + ))) |
| 245 | + }; |
| 246 | + let quote_mask = unsafe { _mm256_cmpeq_epi8(v, _mm256_set1_epi8(b'"' as i8)) }; |
| 247 | + let quote_bits = unsafe { static_cast_u32!(_mm256_movemask_epi8(quote_mask)) }; |
| 248 | + if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { |
| 249 | + // we encountered quotes first. Move dst to point to quotes and exit |
| 250 | + // find out where the quote is... |
| 251 | + let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; |
| 252 | + |
| 253 | + /////////////////////// |
| 254 | + // Above, check for overflow in case someone has a crazy string (>=4GB?) |
| 255 | + // But only add the overflow check when the document itself exceeds 4GB |
| 256 | + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. |
| 257 | + //////////////////////// |
| 258 | + |
| 259 | + // we advance the point, accounting for the fact that we have a NULl termination |
| 260 | + |
| 261 | + len += quote_dist as usize; |
| 262 | + unsafe { |
| 263 | + let v = self.input.get_unchecked(idx..idx + len) as *const [u8] as *const str; |
| 264 | + return Ok(&*v); |
| 265 | + } |
| 266 | + |
| 267 | + // we compare the pointers since we care if they are 'at the same spot' |
| 268 | + // not if they are the same value |
| 269 | + } |
| 270 | + if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { |
| 271 | + // Move to the 'bad' character |
| 272 | + let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); |
| 273 | + len += bs_dist as usize; |
| 274 | + src_i += bs_dist as usize; |
| 275 | + break; |
| 276 | + } else { |
| 277 | + // they are the same. Since they can't co-occur, it means we encountered |
| 278 | + // neither. |
| 279 | + src_i += 32; |
| 280 | + len += 32; |
| 281 | + } |
| 282 | + } |
| 283 | + |
285 | 284 | let mut dst_i: usize = 0;
|
| 285 | + let dst: &mut [u8] = &mut self.strings; |
| 286 | + |
286 | 287 | loop {
|
287 | 288 | let v: __m256i = if src.len() >= src_i + 32 {
|
288 | 289 | // This is safe since we ensure src is at least 32 wide
|
@@ -331,15 +332,11 @@ impl<'de> Deserializer<'de> {
|
331 | 332 |
|
332 | 333 | dst_i += quote_dist as usize;
|
333 | 334 | unsafe {
|
334 |
| - if needs_relocation { |
335 |
| - self.input |
336 |
| - .get_unchecked_mut(self.str_offset..self.str_offset + dst_i as usize) |
337 |
| - .clone_from_slice(&self.strings.get_unchecked(..dst_i)); |
338 |
| - } |
339 |
| - let v = self |
340 |
| - .input |
341 |
| - .get_unchecked(self.str_offset..self.str_offset + dst_i as usize) |
342 |
| - as *const [u8] as *const str; |
| 335 | + self.input |
| 336 | + .get_unchecked_mut(idx + len..idx + len + dst_i) |
| 337 | + .clone_from_slice(&self.strings.get_unchecked(..dst_i)); |
| 338 | + let v = self.input.get_unchecked(idx..idx + len + dst_i) as *const [u8] |
| 339 | + as *const str; |
343 | 340 | self.str_offset += dst_i as usize;
|
344 | 341 | return Ok(&*v);
|
345 | 342 | }
|
|
0 commit comments