@@ -70,18 +70,21 @@ impl Planner {
70
70
71
71
// Step 1: Tokenize the SQL.
72
72
let mut tokenizer = Tokenizer :: new ( sql) . peekable ( ) ;
73
+
74
+ // Only tokenize the beginning tokens for `INSERT INTO` statement because the tokens of values is unused.
75
+ //
76
+ // Stop the tokenizer on unrecognized token because some values inputs (e.g. CSV) may not be valid for the tokenizer.
77
+ // See also: https://github.com/datafuselabs/databend/issues/6669
73
78
let is_insert_stmt = tokenizer
74
79
. peek ( )
75
80
. and_then ( |token| Some ( token. as_ref ( ) . ok ( ) ?. kind ) )
76
81
== Some ( TokenKind :: INSERT ) ;
77
- // Only tokenize the beginning tokens for `INSERT INTO` statement because it's unnecessary to tokenize tokens for values.
78
- //
79
- // Stop the tokenizer on unrecognized token because some values inputs (e.g. CSV) may not be recognized by the tokenizer.
80
- // See also: https://github.com/datafuselabs/databend/issues/6669
81
82
let mut tokens: Vec < Token > = if is_insert_stmt {
82
83
( & mut tokenizer)
83
84
. take ( PROBE_INSERT_INITIAL_TOKENS )
84
85
. take_while ( |token| token. is_ok ( ) )
86
+ // Make sure the tokens stream is always ended with EOI.
87
+ . chain ( std:: iter:: once ( Ok ( Token :: new_eoi ( sql) ) ) )
85
88
. collect :: < Result < _ > > ( )
86
89
. unwrap ( )
87
90
} else {
@@ -116,17 +119,24 @@ impl Planner {
116
119
. await ;
117
120
118
121
if res. is_err ( ) && matches ! ( tokenizer. peek( ) , Some ( Ok ( _) ) ) {
122
+ // Remove the previous EOI.
123
+ tokens. pop ( ) ;
119
124
// Tokenize more and try again.
120
125
if tokens. len ( ) < PROBE_INSERT_MAX_TOKENS {
121
126
let iter = ( & mut tokenizer)
122
127
. take ( tokens. len ( ) * 2 )
123
128
. take_while ( |token| token. is_ok ( ) )
124
- . map ( |token| token. unwrap ( ) ) ;
129
+ . chain ( std:: iter:: once ( Ok ( Token :: new_eoi ( sql) ) ) )
130
+ . map ( |token| token. unwrap ( ) )
131
+ // Make sure the tokens stream is always ended with EOI.
132
+ . chain ( std:: iter:: once ( Token :: new_eoi ( sql) ) ) ;
125
133
tokens. extend ( iter) ;
126
134
} else {
127
135
let iter = ( & mut tokenizer)
128
136
. take_while ( |token| token. is_ok ( ) )
129
- . map ( |token| token. unwrap ( ) ) ;
137
+ . map ( |token| token. unwrap ( ) )
138
+ // Make sure the tokens stream is always ended with EOI.
139
+ . chain ( std:: iter:: once ( Token :: new_eoi ( sql) ) ) ;
130
140
tokens. extend ( iter) ;
131
141
} ;
132
142
} else {
0 commit comments