Skip to content

Commit 7f7c735

Browse files
authored
fix: string Scalar should be escaped in sql_display (#17854)
* fix: string Scalar should escape when displayed. to be able to parse again. * update tests. * fix * update tests.
1 parent b73a9eb commit 7f7c735

File tree

9 files changed

+292
-306
lines changed

9 files changed

+292
-306
lines changed

src/query/ast/src/parser/parser.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,10 @@ pub fn run_parser<O>(
123123
} else {
124124
Err(ParseError(
125125
transform_span(&rest[..1]),
126-
"unable to parse rest of the sql".to_string(),
126+
format!(
127+
"unable to parse rest of the sql, rest tokens: {:?} ",
128+
rest.tokens
129+
),
127130
))
128131
}
129132
}

src/query/expression/src/utils/display.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use std::fmt::Write;
2020
use comfy_table::Cell;
2121
use comfy_table::Table;
2222
use databend_common_ast::ast::quote::display_ident;
23+
use databend_common_ast::ast::quote::QuotedString;
2324
use databend_common_ast::parser::Dialect;
2425
use databend_common_column::binary::BinaryColumn;
2526
use databend_common_io::deserialize_bitmap;
@@ -244,7 +245,7 @@ impl Display for ScalarRef<'_> {
244245
}
245246
Ok(())
246247
}
247-
ScalarRef::String(s) => write!(f, "'{s}'"),
248+
ScalarRef::String(s) => write!(f, "{}", QuotedString(s, '\'')),
248249
ScalarRef::Timestamp(t) => write!(f, "'{}'", timestamp_to_string(*t, &TimeZone::UTC)),
249250
ScalarRef::Date(d) => write!(f, "'{}'", date_to_string(*d as i64, &TimeZone::UTC)),
250251
ScalarRef::Interval(interval) => write!(f, "'{}'", interval_to_string(interval)),
@@ -282,19 +283,19 @@ impl Display for ScalarRef<'_> {
282283
ScalarRef::Variant(s) => {
283284
let raw_jsonb = RawJsonb::new(s);
284285
let value = raw_jsonb.to_string();
285-
write!(f, "'{value}'")
286+
write!(f, "{}", QuotedString(value, '\''))
286287
}
287288
ScalarRef::Geometry(s) => {
288289
let geom = ewkb_to_geo(&mut Ewkb(s))
289290
.and_then(|(geo, srid)| geo_to_ewkt(geo, srid))
290291
.unwrap_or_else(|e| format!("GeozeroError: {:?}", e));
291-
write!(f, "'{geom}'")
292+
write!(f, "{}", QuotedString(geom, '\''))
292293
}
293294
ScalarRef::Geography(v) => {
294295
let geog = ewkb_to_geo(&mut Ewkb(v.0))
295296
.and_then(|(geo, srid)| geo_to_ewkt(geo, srid))
296297
.unwrap_or_else(|e| format!("GeozeroError: {:?}", e));
297-
write!(f, "'{geog}'")
298+
write!(f, "{}", QuotedString(geog, '\''))
298299
}
299300
}
300301
}

src/query/functions/tests/it/scalars/testdata/comparison.txt

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,7 +1172,7 @@ evaluation:
11721172
| Row 1 | '-32768' | '-32768' | true |
11731173
| Row 2 | '1234.5678' | '1234.5678' | true |
11741174
| Row 3 | '1.912e2' | '1.912e2' | true |
1175-
| Row 4 | '"\\\"abc\\\""' | '"\\\"abc\\\""' | true |
1175+
| Row 4 | '"\\\\\\"abc\\\\\\""' | '"\\\\\\"abc\\\\\\""' | true |
11761176
| Row 5 | '{"k":"v","a":"b"}' | '{"k":"v","a":"d"}' | false |
11771177
| Row 6 | '[1,2,3,["a","b","d"]]' | '[1,2,3,["a","b","c"]]' | true |
11781178
+--------+-----------------------------------------------------------+-----------------------------------------------------------+---------+
@@ -1199,7 +1199,7 @@ evaluation:
11991199
| Row 1 | '-32768' | '-32768' | true |
12001200
| Row 2 | '1234.5678' | '1234.5678' | true |
12011201
| Row 3 | '1.912e2' | '1.912e2' | true |
1202-
| Row 4 | '"\\\"abc\\\""' | '"\\\"abc\\\""' | true |
1202+
| Row 4 | '"\\\\\\"abc\\\\\\""' | '"\\\\\\"abc\\\\\\""' | true |
12031203
| Row 5 | '{"k":"v","a":"b"}' | '{"k":"v","a":"d"}' | false |
12041204
| Row 6 | '[1,2,3,["a","b","d"]]' | '[1,2,3,["a","b","c"]]' | true |
12051205
+--------+-----------------------------------------------------------+-----------------------------------------------------------+---------------+
@@ -1224,8 +1224,7 @@ output : false
12241224

12251225
ast : 'hello
12261226
' like 'h%'
1227-
raw expr : like('hello
1228-
', 'h%')
1227+
raw expr : like('hello\n', 'h%')
12291228
checked expr : like<String, String>("hello\n", "h%")
12301229
optimized expr : true
12311230
output type : Boolean
@@ -1235,8 +1234,7 @@ output : true
12351234

12361235
ast : 'h
12371236
' like 'h_'
1238-
raw expr : like('h
1239-
', 'h_')
1237+
raw expr : like('h\n', 'h_')
12401238
checked expr : like<String, String>("h\n", "h_")
12411239
optimized expr : true
12421240
output type : Boolean
@@ -1245,7 +1243,7 @@ output : true
12451243

12461244

12471245
ast : '%' like '\%'
1248-
raw expr : like('%', '\%')
1246+
raw expr : like('%', '\\%')
12491247
checked expr : like<String, String>("%", "\\%")
12501248
optimized expr : true
12511249
output type : Boolean
@@ -1254,7 +1252,7 @@ output : true
12541252

12551253

12561254
ast : 'v%xx' like '_\%%'
1257-
raw expr : like('v%xx', '_\%%')
1255+
raw expr : like('v%xx', '_\\%%')
12581256
checked expr : like<String, String>("v%xx", "_\\%%")
12591257
optimized expr : true
12601258
output type : Boolean

src/query/functions/tests/it/scalars/testdata/regexp.txt

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -342,10 +342,8 @@ evaluation:
342342
| Row 0 | 'abc' | '^a' | true |
343343
| Row 1 | 'abd' | 'Ab' | true |
344344
| Row 2 | 'Abe' | 'abe' | true |
345-
| Row 3 | 'new* | 'new\*.\*line' | false |
346-
| | *line' | | |
347-
| Row 4 | 'fo | '^fo$' | false |
348-
| | fo' | | |
345+
| Row 3 | 'new*\n*line' | 'new\\*.\\*line' | false |
346+
| Row 4 | 'fo\nfo' | '^fo$' | false |
349347
| Row 5 | '' | '' | true |
350348
+--------+----------------------+-------------------------+---------+
351349
evaluation (internal):
@@ -372,10 +370,8 @@ evaluation:
372370
| Row 0 | 'abc' | '^a' | '' | true |
373371
| Row 1 | 'abd' | 'Ab' | 'c' | false |
374372
| Row 2 | 'Abe' | 'abe' | 'i' | true |
375-
| Row 3 | 'new* | 'new\*.\*line' | 'n' | true |
376-
| | *line' | | | |
377-
| Row 4 | 'fo | '^fo$' | 'm' | true |
378-
| | fo' | | | |
373+
| Row 3 | 'new*\n*line' | 'new\\*.\\*line' | 'n' | true |
374+
| Row 4 | 'fo\nfo' | '^fo$' | 'm' | true |
379375
| Row 5 | '' | '' | 'c' | true |
380376
+--------+----------------------+-------------------------+------------+---------+
381377
evaluation (internal):
@@ -1206,7 +1202,7 @@ evaluation (internal):
12061202

12071203

12081204
ast : regexp_extract(null, '(\d+)-(\d+)-(\d+)', ['y', 'm'])
1209-
raw expr : regexp_extract(NULL, '(\d+)-(\d+)-(\d+)', array('y', 'm'))
1205+
raw expr : regexp_extract(NULL, '(\\d+)-(\\d+)-(\\d+)', array('y', 'm'))
12101206
checked expr : regexp_extract<String NULL, String NULL, Array(String) NULL>(CAST<NULL>(NULL AS String NULL), CAST<String>("(\\d+)-(\\d+)-(\\d+)" AS String NULL), CAST<Array(String)>(array<T0=String><T0, T0>("y", "m") AS Array(String) NULL))
12111207
optimized expr : NULL
12121208
output type : Map(String, String) NULL
@@ -1215,7 +1211,7 @@ output : NULL
12151211

12161212

12171213
ast : regexp_extract_all(null, 'Order-(\d+)-(\d+)', 2)
1218-
raw expr : regexp_extract_all(NULL, 'Order-(\d+)-(\d+)', 2)
1214+
raw expr : regexp_extract_all(NULL, 'Order-(\\d+)-(\\d+)', 2)
12191215
checked expr : regexp_extract_all<String NULL, String NULL, UInt32 NULL>(CAST<NULL>(NULL AS String NULL), CAST<String>("Order-(\\d+)-(\\d+)" AS String NULL), CAST<UInt8>(2_u8 AS UInt32 NULL))
12201216
optimized expr : NULL
12211217
output type : Array(String) NULL
@@ -1224,7 +1220,7 @@ output : NULL
12241220

12251221

12261222
ast : regexp_extract(null, '([A-Za-z]+) ([A-Za-z]+), Age: (\d+)', 3)
1227-
raw expr : regexp_extract(NULL, '([A-Za-z]+) ([A-Za-z]+), Age: (\d+)', 3)
1223+
raw expr : regexp_extract(NULL, '([A-Za-z]+) ([A-Za-z]+), Age: (\\d+)', 3)
12281224
checked expr : regexp_extract<String NULL, String NULL, UInt32 NULL>(CAST<NULL>(NULL AS String NULL), CAST<String>("([A-Za-z]+) ([A-Za-z]+), Age: (\\d+)" AS String NULL), CAST<UInt8>(3_u8 AS UInt32 NULL))
12291225
optimized expr : NULL
12301226
output type : String NULL

src/query/functions/tests/it/scalars/testdata/string.txt

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -343,21 +343,21 @@ evaluation (internal):
343343

344344

345345
ast : quote('a\0b')
346-
raw expr : quote('ab')
346+
raw expr : quote('a\x00b')
347347
checked expr : quote<String>("a\0b")
348348
optimized expr : "a\\0b"
349349
output type : String
350350
output domain : {"a\\0b"..="a\\0b"}
351-
output : 'a\0b'
351+
output : 'a\\0b'
352352

353353

354354
ast : quote('a\'b')
355-
raw expr : quote('a'b')
355+
raw expr : quote('a\'b')
356356
checked expr : quote<String>("a'b")
357357
optimized expr : "a\\'b"
358358
output type : String
359359
output domain : {"a\\'b"..="a\\'b"}
360-
output : 'a\'b'
360+
output : 'a\\\'b'
361361

362362

363363
ast : quote('a\"b')
@@ -366,53 +366,52 @@ checked expr : quote<String>("a\"b")
366366
optimized expr : "a\\\"b"
367367
output type : String
368368
output domain : {"a\\\"b"..="a\\\"b"}
369-
output : 'a\"b'
369+
output : 'a\\"b'
370370

371371

372372
ast : quote('a\bb')
373-
raw expr : quote('ab')
373+
raw expr : quote('a\x08b')
374374
checked expr : quote<String>("a\u{8}b")
375375
optimized expr : "a\\bb"
376376
output type : String
377377
output domain : {"a\\bb"..="a\\bb"}
378-
output : 'a\bb'
378+
output : 'a\\bb'
379379

380380

381381
ast : quote('a\nb')
382-
raw expr : quote('a
383-
b')
382+
raw expr : quote('a\nb')
384383
checked expr : quote<String>("a\nb")
385384
optimized expr : "a\\nb"
386385
output type : String
387386
output domain : {"a\\nb"..="a\\nb"}
388-
output : 'a\nb'
387+
output : 'a\\nb'
389388

390389

391390
ast : quote('a\rb')
392-
raw expr : quote('ab')
391+
raw expr : quote('a\rb')
393392
checked expr : quote<String>("a\rb")
394393
optimized expr : "a\\rb"
395394
output type : String
396395
output domain : {"a\\rb"..="a\\rb"}
397-
output : 'a\rb'
396+
output : 'a\\rb'
398397

399398

400399
ast : quote('a\tb')
401-
raw expr : quote('a b')
400+
raw expr : quote('a\tb')
402401
checked expr : quote<String>("a\tb")
403402
optimized expr : "a\\tb"
404403
output type : String
405404
output domain : {"a\\tb"..="a\\tb"}
406-
output : 'a\tb'
405+
output : 'a\\tb'
407406

408407

409408
ast : quote('a\\b')
410-
raw expr : quote('a\b')
409+
raw expr : quote('a\\b')
411410
checked expr : quote<String>("a\\b")
412411
optimized expr : "a\\\\b"
413412
output type : String
414413
output domain : {"a\\\\b"..="a\\\\b"}
415-
output : 'a\\b'
414+
output : 'a\\\\b'
416415

417416

418417
ast : quote('你好')
@@ -455,15 +454,15 @@ ast : quote(a)
455454
raw expr : quote(a::String)
456455
checked expr : quote<String>(a)
457456
evaluation:
458-
+--------+---------------------+----------+
459-
| | a | Output |
460-
+--------+---------------------+----------+
461-
| Type | String | String |
462-
| Domain | {"a\\'b"..="a\\nb"} | {""..} |
463-
| Row 0 | 'a\0b' | 'a\\0b' |
464-
| Row 1 | 'a\'b' | 'a\\\'b' |
465-
| Row 2 | 'a\nb' | 'a\\nb' |
466-
+--------+---------------------+----------+
457+
+--------+---------------------+--------------+
458+
| | a | Output |
459+
+--------+---------------------+--------------+
460+
| Type | String | String |
461+
| Domain | {"a\\'b"..="a\\nb"} | {""..} |
462+
| Row 0 | 'a\\0b' | 'a\\\\0b' |
463+
| Row 1 | 'a\\\'b' | 'a\\\\\\\'b' |
464+
| Row 2 | 'a\\nb' | 'a\\\\nb' |
465+
+--------+---------------------+--------------+
467466
evaluation (internal):
468467
+--------+------------------------------------+
469468
| Column | Data |
@@ -752,12 +751,12 @@ output : ' aa'
752751

753752

754753
ast : ltrim('\taa')
755-
raw expr : ltrim(' aa')
754+
raw expr : ltrim('\taa')
756755
checked expr : ltrim<String>("\taa")
757756
optimized expr : "\taa"
758757
output type : String
759758
output domain : {"\taa"..="\taa"}
760-
output : ' aa'
759+
output : '\taa'
761760

762761

763762
ast : ltrim('#000000123','0#')
@@ -856,12 +855,12 @@ output : 'aa '
856855

857856

858857
ast : rtrim('aa\t')
859-
raw expr : rtrim('aa ')
858+
raw expr : rtrim('aa\t')
860859
checked expr : rtrim<String>("aa\t")
861860
optimized expr : "aa\t"
862861
output type : String
863862
output domain : {"aa\t"..="aa\t"}
864-
output : 'aa '
863+
output : 'aa\t'
865864

866865

867866
ast : rtrim('$125.00','0.')
@@ -1413,12 +1412,12 @@ evaluation (internal):
14131412

14141413

14151414
ast : trim('\ta\t')
1416-
raw expr : trim(' a ')
1415+
raw expr : trim('\ta\t')
14171416
checked expr : trim<String>("\ta\t")
14181417
optimized expr : "\ta\t"
14191418
output type : String
14201419
output domain : {"\ta\t"..="\ta\t"}
1421-
output : ' a '
1420+
output : '\ta\t'
14221421

14231422

14241423
ast : trim('*-*ABC-*-','*-')

src/query/functions/tests/it/scalars/testdata/variant.txt

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ evaluation:
101101
| Row 3 | '-32768' | '-32768' |
102102
| Row 4 | '1234.5678' | '1234.5678' |
103103
| Row 5 | '1.912e2' | '191.2' |
104-
| Row 6 | '"\\\"abc\\\""' | '"\\\"abc\\\""' |
104+
| Row 6 | '"\\\\\\"abc\\\\\\""' | '"\\\\\\"abc\\\\\\""' |
105105
| Row 7 | '"databend"' | '"databend"' |
106106
| Row 8 | '{"k":"v","a":"b"}' | '{"a":"b","k":"v"}' |
107107
| Row 9 | '[1,2,3,["a","b","c"]]' | '[1,2,3,["a","b","c"]]' |
@@ -234,7 +234,7 @@ evaluation:
234234
| Row 3 | '-32768' | '-32768' |
235235
| Row 4 | '1234.5678' | '1234.5678' |
236236
| Row 5 | '1.912e2' | '191.2' |
237-
| Row 6 | '"\\\"abc\\\""' | '"\\\"abc\\\""' |
237+
| Row 6 | '"\\\\\\"abc\\\\\\""' | '"\\\\\\"abc\\\\\\""' |
238238
| Row 7 | '"databend"' | '"databend"' |
239239
| Row 8 | '{"k":"v","a":"b"}' | '{"a":"b","k":"v"}' |
240240
| Row 9 | '[1,2,3,["a","b","c"]]' | '[1,2,3,["a","b","c"]]' |
@@ -2807,14 +2807,7 @@ checked expr : json_pretty<Variant>(CAST<String>("[1, 2, 3, 4, 5, 6]" AS Varia
28072807
optimized expr : "[\n 1,\n 2,\n 3,\n 4,\n 5,\n 6\n]"
28082808
output type : String
28092809
output domain : {"[\n 1,\n 2,\n 3,\n 4,\n 5,\n 6\n]"..="[\n 1,\n 2,\n 3,\n 4,\n 5,\n 6\n]"}
2810-
output : '[
2811-
1,
2812-
2,
2813-
3,
2814-
4,
2815-
5,
2816-
6
2817-
]'
2810+
output : '[\n 1,\n 2,\n 3,\n 4,\n 5,\n 6\n]'
28182811

28192812

28202813
ast : json_pretty(parse_json('{"k1":123, "k2":"abc"}'))
@@ -2823,10 +2816,7 @@ checked expr : json_pretty<Variant>(CAST<String>("{\"k1\":123, \"k2\":\"abc\"}
28232816
optimized expr : "{\n \"k1\": 123,\n \"k2\": \"abc\"\n}"
28242817
output type : String
28252818
output domain : {"{\n \"k1\": 123,\n \"k2\": \"abc\"\n}"..="{\n \"k1\": 123,\n \"k2\": \"abc\"\n}"}
2826-
output : '{
2827-
"k1": 123,
2828-
"k2": "abc"
2829-
}'
2819+
output : '{\n "k1": 123,\n "k2": "abc"\n}'
28302820

28312821

28322822
ast : json_pretty(parse_json('{"a":1,"b":true,"c":["1","2","3"],"d":{"a":1,"b":[1,2,3],"c":{"a":1,"b":2}}}'))
@@ -2835,27 +2825,7 @@ checked expr : json_pretty<Variant>(CAST<String>("{\"a\":1,\"b\":true,\"c\":[\
28352825
optimized expr : "{\n \"a\": 1,\n \"b\": true,\n \"c\": [\n \"1\",\n \"2\",\n \"3\"\n ],\n \"d\": {\n \"a\": 1,\n \"b\": [\n 1,\n 2,\n 3\n ],\n \"c\": {\n \"a\": 1,\n \"b\": 2\n }\n }\n}"
28362826
output type : String
28372827
output domain : {"{\n \"a\": 1,\n \"b\": true,\n \"c\": [\n \"1\",\n \"2\",\n \"3\"\n ],\n \"d\": {\n \"a\": 1,\n \"b\": [\n 1,\n 2,\n 3\n ],\n \"c\": {\n \"a\": 1,\n \"b\": 2\n }\n }\n}"..="{\n \"a\": 1,\n \"b\": true,\n \"c\": [\n \"1\",\n \"2\",\n \"3\"\n ],\n \"d\": {\n \"a\": 1,\n \"b\": [\n 1,\n 2,\n 3\n ],\n \"c\": {\n \"a\": 1,\n \"b\": 2\n }\n }\n}"}
2838-
output : '{
2839-
"a": 1,
2840-
"b": true,
2841-
"c": [
2842-
"1",
2843-
"2",
2844-
"3"
2845-
],
2846-
"d": {
2847-
"a": 1,
2848-
"b": [
2849-
1,
2850-
2,
2851-
3
2852-
],
2853-
"c": {
2854-
"a": 1,
2855-
"b": 2
2856-
}
2857-
}
2858-
}'
2828+
output : '{\n "a": 1,\n "b": true,\n "c": [\n "1",\n "2",\n "3"\n ],\n "d": {\n "a": 1,\n "b": [\n 1,\n 2,\n 3\n ],\n "c": {\n "a": 1,\n "b": 2\n }\n }\n}'
28592829

28602830

28612831
ast : json_strip_nulls(parse_json('true'))

0 commit comments

Comments
 (0)