12
12
// See the License for the specific language governing permissions and
13
13
// limitations under the License.
14
14
15
+ use std:: collections:: VecDeque ;
15
16
use std:: io:: BufRead ;
16
17
use std:: io:: Cursor ;
17
18
use std:: io:: ErrorKind ;
@@ -23,6 +24,11 @@ use crate::cursor_ext::cursor_read_bytes_ext::ReadBytesExt;
23
24
pub trait BufferReadStringExt {
24
25
fn read_quoted_text ( & mut self , buf : & mut Vec < u8 > , quota : u8 ) -> Result < ( ) > ;
25
26
fn read_escaped_string_text ( & mut self , buf : & mut Vec < u8 > ) -> Result < ( ) > ;
27
+ fn fast_read_quoted_text (
28
+ & mut self ,
29
+ buf : & mut Vec < u8 > ,
30
+ positions : & mut VecDeque < usize > ,
31
+ ) -> Result < ( ) > ;
26
32
}
27
33
28
34
impl < T > BufferReadStringExt for Cursor < T >
@@ -111,6 +117,86 @@ where T: AsRef<[u8]>
111
117
}
112
118
Ok ( ( ) )
113
119
}
120
+
121
+ // `positions` stores the positions of all `'` and `\` that are pre-generated
122
+ // by the `Aho-Corasick` algorithm, which can use SIMD instructions to
123
+ // accelerate the search process.
124
+ // Using these positions, we can directly jump to the end of the text,
125
+ // instead of inefficient step-by-step iterate over the buffer.
126
+ fn fast_read_quoted_text (
127
+ & mut self ,
128
+ buf : & mut Vec < u8 > ,
129
+ positions : & mut VecDeque < usize > ,
130
+ ) -> Result < ( ) > {
131
+ self . must_ignore_byte ( b'\'' ) ?;
132
+ let mut start = self . position ( ) as usize ;
133
+ check_pos ( start - 1 , positions) ?;
134
+
135
+ // Get next possible end position.
136
+ while let Some ( pos) = positions. pop_front ( ) {
137
+ let len = pos - start;
138
+ buf. extend_from_slice ( & self . remaining_slice ( ) [ ..len] ) ;
139
+ self . consume ( len) ;
140
+
141
+ if self . ignore_byte ( b'\'' ) {
142
+ return Ok ( ( ) ) ;
143
+ } else if self . ignore_byte ( b'\\' ) {
144
+ let b = self . remaining_slice ( ) ;
145
+ if b. is_empty ( ) {
146
+ return Err ( std:: io:: Error :: new (
147
+ ErrorKind :: InvalidData ,
148
+ "Expected to have terminated string literal after escaped char '\' ."
149
+ . to_string ( ) ,
150
+ ) ) ;
151
+ }
152
+ let c = b[ 0 ] ;
153
+ self . ignore_byte ( c) ;
154
+
155
+ match c {
156
+ b'n' => buf. push ( b'\n' ) ,
157
+ b't' => buf. push ( b'\t' ) ,
158
+ b'r' => buf. push ( b'\r' ) ,
159
+ b'0' => buf. push ( b'\0' ) ,
160
+ b'\'' => {
161
+ check_pos ( pos + 1 , positions) ?;
162
+ buf. push ( b'\'' ) ;
163
+ }
164
+ b'\\' => {
165
+ check_pos ( pos + 1 , positions) ?;
166
+ buf. push ( b'\\' ) ;
167
+ }
168
+ b'\"' => buf. push ( b'\"' ) ,
169
+ _ => {
170
+ buf. push ( b'\\' ) ;
171
+ buf. push ( c) ;
172
+ }
173
+ }
174
+ } else {
175
+ break ;
176
+ }
177
+ start = self . position ( ) as usize ;
178
+ }
179
+ Err ( std:: io:: Error :: new (
180
+ ErrorKind :: InvalidData ,
181
+ format ! (
182
+ "Expected to have terminated string literal after quota \' , while consumed buf: {:?}" ,
183
+ buf
184
+ ) ,
185
+ ) )
186
+ }
187
+ }
188
+
189
+ // Check that the pre-calculated position is correct.
190
+ fn check_pos ( curr_pos : usize , positions : & mut VecDeque < usize > ) -> Result < ( ) > {
191
+ if let Some ( pos) = positions. pop_front ( ) {
192
+ if curr_pos == pos {
193
+ return Ok ( ( ) ) ;
194
+ }
195
+ }
196
+ Err ( std:: io:: Error :: new (
197
+ ErrorKind :: InvalidData ,
198
+ "Expected to have quotes in string literal." . to_string ( ) ,
199
+ ) )
114
200
}
115
201
116
202
fn unescape ( c : u8 ) -> u8 {
0 commit comments