Skip to content

Commit 6dad005

Browse files
committed
feat(query): fast parse insert values
1 parent 288c3bb commit 6dad005

File tree

7 files changed

+467
-22
lines changed

7 files changed

+467
-22
lines changed

Cargo.lock

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/common/io/src/cursor_ext/cursor_read_string_ext.rs

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use std::collections::VecDeque;
1516
use std::io::BufRead;
1617
use std::io::Cursor;
1718
use std::io::ErrorKind;
@@ -23,6 +24,11 @@ use crate::cursor_ext::cursor_read_bytes_ext::ReadBytesExt;
2324
pub trait BufferReadStringExt {
2425
fn read_quoted_text(&mut self, buf: &mut Vec<u8>, quota: u8) -> Result<()>;
2526
fn read_escaped_string_text(&mut self, buf: &mut Vec<u8>) -> Result<()>;
27+
fn fast_read_quoted_text(
28+
&mut self,
29+
buf: &mut Vec<u8>,
30+
positions: &mut VecDeque<usize>,
31+
) -> Result<()>;
2632
}
2733

2834
impl<T> BufferReadStringExt for Cursor<T>
@@ -111,6 +117,86 @@ where T: AsRef<[u8]>
111117
}
112118
Ok(())
113119
}
120+
121+
// `positions` stores the positions of all `'` and `\` that are pre-generated
122+
// by the `Aho-Corasick` algorithm, which can use SIMD instructions to
123+
// accelerate the search process.
124+
// Using these positions, we can directly jump to the end of the text,
125+
// instead of inefficient step-by-step iterate over the buffer.
126+
fn fast_read_quoted_text(
127+
&mut self,
128+
buf: &mut Vec<u8>,
129+
positions: &mut VecDeque<usize>,
130+
) -> Result<()> {
131+
self.must_ignore_byte(b'\'')?;
132+
let mut start = self.position() as usize;
133+
check_pos(start - 1, positions)?;
134+
135+
// Get next possible end position.
136+
while let Some(pos) = positions.pop_front() {
137+
let len = pos - start;
138+
buf.extend_from_slice(&self.remaining_slice()[..len]);
139+
self.consume(len);
140+
141+
if self.ignore_byte(b'\'') {
142+
return Ok(());
143+
} else if self.ignore_byte(b'\\') {
144+
let b = self.remaining_slice();
145+
if b.is_empty() {
146+
return Err(std::io::Error::new(
147+
ErrorKind::InvalidData,
148+
"Expected to have terminated string literal after escaped char '\' ."
149+
.to_string(),
150+
));
151+
}
152+
let c = b[0];
153+
self.ignore_byte(c);
154+
155+
match c {
156+
b'n' => buf.push(b'\n'),
157+
b't' => buf.push(b'\t'),
158+
b'r' => buf.push(b'\r'),
159+
b'0' => buf.push(b'\0'),
160+
b'\'' => {
161+
check_pos(pos + 1, positions)?;
162+
buf.push(b'\'');
163+
}
164+
b'\\' => {
165+
check_pos(pos + 1, positions)?;
166+
buf.push(b'\\');
167+
}
168+
b'\"' => buf.push(b'\"'),
169+
_ => {
170+
buf.push(b'\\');
171+
buf.push(c);
172+
}
173+
}
174+
} else {
175+
break;
176+
}
177+
start = self.position() as usize;
178+
}
179+
Err(std::io::Error::new(
180+
ErrorKind::InvalidData,
181+
format!(
182+
"Expected to have terminated string literal after quota \', while consumed buf: {:?}",
183+
buf
184+
),
185+
))
186+
}
187+
}
188+
189+
// Check that the pre-calculated position is correct.
190+
fn check_pos(curr_pos: usize, positions: &mut VecDeque<usize>) -> Result<()> {
191+
if let Some(pos) = positions.pop_front() {
192+
if curr_pos == pos {
193+
return Ok(());
194+
}
195+
}
196+
Err(std::io::Error::new(
197+
ErrorKind::InvalidData,
198+
"Expected to have quotes in string literal.".to_string(),
199+
))
114200
}
115201

116202
fn unescape(c: u8) -> u8 {

0 commit comments

Comments
 (0)