3
3
pub use crate :: error:: { Error , ErrorType } ;
4
4
pub use crate :: Deserializer ;
5
5
pub use crate :: Result ;
6
- pub use crate :: neon:: stage1:: SIMDJSON_PADDING ;
6
+ pub use crate :: neon:: stage1:: * ;
7
7
pub use crate :: neon:: intrinsics:: * ;
8
8
pub use crate :: neon:: utf8check:: * ;
9
9
pub use crate :: stringparse:: * ;
10
10
11
11
pub use crate :: neon:: intrinsics:: * ;
12
12
13
- unsafe fn find_bs_bits_and_quote_bits ( src : & [ u8 ] , dst : & mut [ u8 ] ) -> ParseStringHelper {
13
+ unsafe fn find_bs_bits_and_quote_bits ( src : & [ u8 ] , dstx : Option < & mut [ u8 ] > ) -> ParseStringHelper {
14
14
// this can read up to 31 bytes beyond the buffer size, but we require
15
15
// SIMDJSON_PADDING of padding
16
16
let v0 : uint8x16_t = vld1q_u8 ( src. as_ptr ( ) ) ;
17
17
let v1 : uint8x16_t = vld1q_u8 ( src. as_ptr ( ) . add ( 16 ) ) ;
18
- vst1q_u8 ( dst. as_mut_ptr ( ) , v0) ;
19
- vst1q_u8 ( dst. as_mut_ptr ( ) . add ( 16 ) , v1) ;
18
+
19
+ match dstx {
20
+ Some ( dst) => {
21
+ vst1q_u8 ( dst. as_mut_ptr ( ) , v0) ;
22
+ vst1q_u8 ( dst. as_mut_ptr ( ) . add ( 16 ) , v1) ;
23
+ } ,
24
+ _ => ( )
25
+ }
20
26
21
27
let bs_mask : uint8x16_t = vmovq_n_u8 ( '\\' as u8 ) ;
22
28
let qt_mask : uint8x16_t = vmovq_n_u8 ( '"' as u8 ) ;
@@ -50,7 +56,7 @@ impl<'de> Deserializer<'de> {
50
56
pub fn parse_str_ ( & mut self ) -> Result < & ' de str > {
51
57
// Add 1 to skip the initial "
52
58
let idx = self . iidx + 1 ;
53
- // let padding = [0u8; 32];
59
+ // let mut padding = [0u8; 32];
54
60
//let mut read: usize = 0;
55
61
56
62
// we include the terminal '"' so we know where to end
@@ -59,14 +65,55 @@ impl<'de> Deserializer<'de> {
59
65
60
66
let src: & [ u8 ] = unsafe { & self . input . get_unchecked ( idx..) } ;
61
67
let mut src_i: usize = 0 ;
62
- let len = src_i;
68
+ let mut len = src_i;
69
+ loop {
70
+ // store to dest unconditionally - we can overwrite the bits we don't like
71
+ // later
72
+ let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits ( & src[ src_i..] , None ) } ;
73
+
74
+ if ( bs_bits. wrapping_sub ( 1 ) & quote_bits) != 0 {
75
+ // we encountered quotes first. Move dst to point to quotes and exit
76
+ // find out where the quote is...
77
+ let quote_dist: u32 = trailingzeroes ( u64:: from ( quote_bits) ) as u32 ;
78
+
79
+ ///////////////////////
80
+ // Above, check for overflow in case someone has a crazy string (>=4GB?)
81
+ // But only add the overflow check when the document itself exceeds 4GB
82
+ // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
83
+ ////////////////////////
84
+
85
+ // we advance the point, accounting for the fact that we have a NULl termination
86
+
87
+ len += quote_dist as usize ;
88
+ unsafe {
89
+ let v = self . input . get_unchecked ( idx..idx + len) as * const [ u8 ] as * const str ;
90
+ return Ok ( & * v) ;
91
+ }
92
+
93
+ // we compare the pointers since we care if they are 'at the same spot'
94
+ // not if they are the same value
95
+ }
96
+ if ( quote_bits. wrapping_sub ( 1 ) & bs_bits) != 0 {
97
+ // Move to the 'bad' character
98
+ let bs_dist: u32 = trailingzeroes ( u64:: from ( bs_bits) ) ;
99
+ len += bs_dist as usize ;
100
+ src_i += bs_dist as usize ;
101
+ break ;
102
+ } else {
103
+ // they are the same. Since they can't co-occur, it means we encountered
104
+ // neither.
105
+ src_i += 32 ;
106
+ len += 32 ;
107
+ }
108
+ }
109
+
63
110
let mut dst_i: usize = 0 ;
64
111
let dst: & mut [ u8 ] = & mut self . strings ;
65
112
66
113
loop {
67
114
// store to dest unconditionally - we can overwrite the bits we don't like
68
115
// later
69
- let ParseStringHelper { bs_bits, quote_bits} = unsafe { find_bs_bits_and_quote_bits ( src, dst) } ;
116
+ let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits ( & src[ src_i.. ] , Some ( & mut dst[ dst_i.. ] ) ) } ;
70
117
71
118
if ( bs_bits. wrapping_sub ( 1 ) & quote_bits) != 0 {
72
119
// we encountered quotes first. Move dst to point to quotes and exit
@@ -143,4 +190,4 @@ impl<'de> Deserializer<'de> {
143
190
}
144
191
}
145
192
}
146
- }
193
+ }
0 commit comments