Skip to content

Commit 631a975

Browse files
committed
feat: trying to fix parse_str
1 parent b876ff7 commit 631a975

File tree

1 file changed

+55
-8
lines changed

1 file changed

+55
-8
lines changed

src/neon/deser.rs

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,26 @@
33
pub use crate::error::{Error, ErrorType};
44
pub use crate::Deserializer;
55
pub use crate::Result;
6-
pub use crate::neon::stage1::SIMDJSON_PADDING;
6+
pub use crate::neon::stage1::*;
77
pub use crate::neon::intrinsics::*;
88
pub use crate::neon::utf8check::*;
99
pub use crate::stringparse::*;
1010

1111
pub use crate::neon::intrinsics::*;
1212

13-
unsafe fn find_bs_bits_and_quote_bits(src: &[u8], dst: &mut [u8]) -> ParseStringHelper {
13+
unsafe fn find_bs_bits_and_quote_bits(src: &[u8], dstx: Option<&mut [u8]>) -> ParseStringHelper {
1414
// this can read up to 31 bytes beyond the buffer size, but we require
1515
// SIMDJSON_PADDING of padding
1616
let v0 : uint8x16_t = vld1q_u8(src.as_ptr());
1717
let v1 : uint8x16_t = vld1q_u8(src.as_ptr().add(16));
18-
vst1q_u8(dst.as_mut_ptr(), v0);
19-
vst1q_u8(dst.as_mut_ptr().add(16), v1);
18+
19+
match dstx {
20+
Some(dst) => {
21+
vst1q_u8(dst.as_mut_ptr(), v0);
22+
vst1q_u8(dst.as_mut_ptr().add(16), v1);
23+
},
24+
_ => ()
25+
}
2026

2127
let bs_mask : uint8x16_t = vmovq_n_u8('\\' as u8);
2228
let qt_mask : uint8x16_t = vmovq_n_u8('"' as u8);
@@ -50,7 +56,7 @@ impl<'de> Deserializer<'de> {
5056
pub fn parse_str_(&mut self) -> Result<&'de str> {
5157
// Add 1 to skip the initial "
5258
let idx = self.iidx + 1;
53-
// let padding = [0u8; 32];
59+
// let mut padding = [0u8; 32];
5460
//let mut read: usize = 0;
5561

5662
// we include the terminal '"' so we know where to end
@@ -59,14 +65,55 @@ impl<'de> Deserializer<'de> {
5965

6066
let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) };
6167
let mut src_i: usize = 0;
62-
let len = src_i;
68+
let mut len = src_i;
69+
loop {
70+
// store to dest unconditionally - we can overwrite the bits we don't like
71+
// later
72+
let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&src[src_i..], None) };
73+
74+
if (bs_bits.wrapping_sub(1) & quote_bits) != 0 {
75+
// we encountered quotes first. Move dst to point to quotes and exit
76+
// find out where the quote is...
77+
let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32;
78+
79+
///////////////////////
80+
// Above, check for overflow in case someone has a crazy string (>=4GB?)
81+
// But only add the overflow check when the document itself exceeds 4GB
82+
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
83+
////////////////////////
84+
85+
// we advance the point, accounting for the fact that we have a NULl termination
86+
87+
len += quote_dist as usize;
88+
unsafe {
89+
let v = self.input.get_unchecked(idx..idx + len) as *const [u8] as *const str;
90+
return Ok(&*v);
91+
}
92+
93+
// we compare the pointers since we care if they are 'at the same spot'
94+
// not if they are the same value
95+
}
96+
if (quote_bits.wrapping_sub(1) & bs_bits) != 0 {
97+
// Move to the 'bad' character
98+
let bs_dist: u32 = trailingzeroes(u64::from(bs_bits));
99+
len += bs_dist as usize;
100+
src_i += bs_dist as usize;
101+
break;
102+
} else {
103+
// they are the same. Since they can't co-occur, it means we encountered
104+
// neither.
105+
src_i += 32;
106+
len += 32;
107+
}
108+
}
109+
63110
let mut dst_i: usize = 0;
64111
let dst: &mut [u8] = &mut self.strings;
65112

66113
loop {
67114
// store to dest unconditionally - we can overwrite the bits we don't like
68115
// later
69-
let ParseStringHelper {bs_bits, quote_bits} = unsafe { find_bs_bits_and_quote_bits(src, dst) };
116+
let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&src[src_i..], Some(&mut dst[dst_i..])) };
70117

71118
if (bs_bits.wrapping_sub(1) & quote_bits) != 0 {
72119
// we encountered quotes first. Move dst to point to quotes and exit
@@ -143,4 +190,4 @@ impl<'de> Deserializer<'de> {
143190
}
144191
}
145192
}
146-
}
193+
}

0 commit comments

Comments
 (0)