Skip to content

Commit bf6f647

Browse files
authored
Merge pull request #7534 from andylokandy/fixxxx
2 parents 69f82ef + 79e5b6f commit bf6f647

File tree

2 files changed

+25
-11
lines changed

2 files changed

+25
-11
lines changed

src/query/ast/src/parser/token.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ pub struct Token<'a> {
2929
}
3030

3131
impl<'a> Token<'a> {
32+
pub fn new_eoi(source: &'a str) -> Self {
33+
Token {
34+
source,
35+
kind: TokenKind::EOI,
36+
span: source.len()..source.len(),
37+
}
38+
}
39+
3240
pub fn text(&self) -> &'a str {
3341
&self.source[self.span.clone()]
3442
}
@@ -78,11 +86,7 @@ impl<'a> Iterator for Tokenizer<'a> {
7886
})),
7987
None if !self.eoi => {
8088
self.eoi = true;
81-
Some(Ok(Token {
82-
source: self.source,
83-
kind: TokenKind::EOI,
84-
span: (self.lexer.span().end)..(self.lexer.span().end),
85-
}))
89+
Some(Ok(Token::new_eoi(self.source)))
8690
}
8791
None => None,
8892
}

src/query/service/src/sql/planner/mod.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,21 @@ impl Planner {
7070

7171
// Step 1: Tokenize the SQL.
7272
let mut tokenizer = Tokenizer::new(sql).peekable();
73+
74+
// Only tokenize the beginning tokens for `INSERT INTO` statement because the tokens of values is unused.
75+
//
76+
// Stop the tokenizer on unrecognized token because some values inputs (e.g. CSV) may not be valid for the tokenizer.
77+
// See also: https://github.com/datafuselabs/databend/issues/6669
7378
let is_insert_stmt = tokenizer
7479
.peek()
7580
.and_then(|token| Some(token.as_ref().ok()?.kind))
7681
== Some(TokenKind::INSERT);
77-
// Only tokenize the beginning tokens for `INSERT INTO` statement because it's unnecessary to tokenize tokens for values.
78-
//
79-
// Stop the tokenizer on unrecognized token because some values inputs (e.g. CSV) may not be recognized by the tokenizer.
80-
// See also: https://github.com/datafuselabs/databend/issues/6669
8182
let mut tokens: Vec<Token> = if is_insert_stmt {
8283
(&mut tokenizer)
8384
.take(PROBE_INSERT_INITIAL_TOKENS)
8485
.take_while(|token| token.is_ok())
86+
// Make sure the tokens stream is always ended with EOI.
87+
.chain(std::iter::once(Ok(Token::new_eoi(sql))))
8588
.collect::<Result<_>>()
8689
.unwrap()
8790
} else {
@@ -116,17 +119,24 @@ impl Planner {
116119
.await;
117120

118121
if res.is_err() && matches!(tokenizer.peek(), Some(Ok(_))) {
122+
// Remove the previous EOI.
123+
tokens.pop();
119124
// Tokenize more and try again.
120125
if tokens.len() < PROBE_INSERT_MAX_TOKENS {
121126
let iter = (&mut tokenizer)
122127
.take(tokens.len() * 2)
123128
.take_while(|token| token.is_ok())
124-
.map(|token| token.unwrap());
129+
.chain(std::iter::once(Ok(Token::new_eoi(sql))))
130+
.map(|token| token.unwrap())
131+
// Make sure the tokens stream is always ended with EOI.
132+
.chain(std::iter::once(Token::new_eoi(sql)));
125133
tokens.extend(iter);
126134
} else {
127135
let iter = (&mut tokenizer)
128136
.take_while(|token| token.is_ok())
129-
.map(|token| token.unwrap());
137+
.map(|token| token.unwrap())
138+
// Make sure the tokens stream is always ended with EOI.
139+
.chain(std::iter::once(Token::new_eoi(sql)));
130140
tokens.extend(iter);
131141
};
132142
} else {

0 commit comments

Comments
 (0)