Skip to content

Commit 6b9013f

Browse files
authored
feat(parse): Simplify the matching pattern when parse function, avoid exponential backtracking (#17942)
perf: Simplify the matching pattern when parse function, avoid exponential backtracking
1 parent 35c0199 commit 6b9013f

File tree

3 files changed

+178
-106
lines changed

3 files changed

+178
-106
lines changed

src/query/ast/benches/bench.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ fn main() {
1616
divan::main()
1717
}
1818

19-
// bench fastest │ slowest │ median │ mean │ samples │ iters
20-
// ╰─ dummy │ │ │ │ │
21-
// ├─ deep_query 122 µs │ 324.3 µs │ 127.2 µs │ 130.1 µs │ 100 │ 100
22-
// ├─ large_query 1.366 ms │ 1.686 ms │ 1.409 ms │ 1.417 ms │ 100 │ 100
23-
// ├─ large_statement 1.336 ms │ 1.441 ms │ 1.391 ms │ 1.39 ms │ 100 │ 100
24-
// ╰─ wide_expr 556 µs │ 697.2 µs │ 578.3 µs │ 580.5 µs │ 100 │ 100
19+
// bench fastest │ slowest │ median │ mean │ samples │ iters
20+
// ╰─ dummy │ │ │ │ │
21+
// ├─ deep_function_call 732.9 ms │ 732.9 ms │ 732.9 ms │ 732.9 ms │ 1 │ 1
22+
// ├─ deep_query 319.4 µs │ 515.6 µs │ 333.4 µs │ 335.3 µs │ 100 │ 100
23+
// ├─ large_query 1.998 ms │ 2.177 ms │ 2.032 ms │ 2.038 ms │ 100 │ 100
24+
// ├─ large_statement 1.952 ms │ 2.079 ms │ 2.016 ms │ 2.011 ms │ 100 │ 100
25+
// ╰─ wide_expr 620.4 µs │ 783.7 µs │ 646 µs │ 646.4 µs │ 100 │ 100
26+
2527
#[divan::bench_group(max_time = 0.5)]
2628
mod dummy {
2729
use databend_common_ast::parser::parse_expr;
@@ -60,4 +62,12 @@ mod dummy {
6062
let expr = parse_expr(&tokens, Dialect::PostgreSQL).unwrap();
6163
divan::black_box(expr);
6264
}
65+
66+
#[divan::bench]
67+
fn deep_function_call() {
68+
let case = r#"ROUND(6378.138 * 2 * ASIN(SQRT(POW(SIN(RADIANS(CASE WHEN MOD(EXTRACT(SECOND FROM CURRENT_TIMESTAMP), 2) = 0 THEN 45.6789 ELSE 30.1234 END - IFNULL(NULLIF((SELECT 37.7749), 0), 15.4321))), 2) + POW(SIN(RADIANS((SELECT -122.4194) / 2)), 2) * COS(RADIANS(LEAST(60, GREATEST(20, (SELECT 25.5))))))) * (1000 + (RAND() * 500 - 250)), 2)"#;
69+
let tokens = tokenize_sql(case).unwrap();
70+
let expr = parse_expr(&tokens, Dialect::PostgreSQL).unwrap();
71+
divan::black_box(expr);
72+
}
6373
}

src/query/ast/src/parser/expr.rs

Lines changed: 161 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,100 +1038,6 @@ pub fn expr_element(i: Input) -> IResult<WithSpan<ExprElement>> {
10381038
},
10391039
);
10401040

1041-
let function_call = map(
1042-
rule! {
1043-
#function_name
1044-
~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")"
1045-
},
1046-
|(name, _, opt_distinct, opt_args, _)| ExprElement::FunctionCall {
1047-
func: FunctionCall {
1048-
distinct: opt_distinct.is_some(),
1049-
name,
1050-
args: opt_args.unwrap_or_default(),
1051-
params: vec![],
1052-
order_by: vec![],
1053-
window: None,
1054-
lambda: None,
1055-
},
1056-
},
1057-
);
1058-
let function_call_with_lambda = map(
1059-
rule! {
1060-
#function_name
1061-
~ "(" ~ #subexpr(0) ~ "," ~ #lambda_params ~ "->" ~ #subexpr(0) ~ ")"
1062-
},
1063-
|(name, _, arg, _, params, _, expr, _)| ExprElement::FunctionCall {
1064-
func: FunctionCall {
1065-
distinct: false,
1066-
name,
1067-
args: vec![arg],
1068-
params: vec![],
1069-
order_by: vec![],
1070-
window: None,
1071-
lambda: Some(Lambda {
1072-
params,
1073-
expr: Box::new(expr),
1074-
}),
1075-
},
1076-
},
1077-
);
1078-
let function_call_with_window = map(
1079-
rule! {
1080-
#function_name
1081-
~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")"
1082-
~ #window_function
1083-
},
1084-
|(name, _, opt_distinct, opt_args, _, window)| ExprElement::FunctionCall {
1085-
func: FunctionCall {
1086-
distinct: opt_distinct.is_some(),
1087-
name,
1088-
args: opt_args.unwrap_or_default(),
1089-
params: vec![],
1090-
order_by: vec![],
1091-
window: Some(window),
1092-
lambda: None,
1093-
},
1094-
},
1095-
);
1096-
let function_call_with_within_group_window = map(
1097-
rule! {
1098-
#function_name
1099-
~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")"
1100-
~ #within_group
1101-
~ #window_function?
1102-
},
1103-
|(name, _, opt_distinct, opt_args, _, order_by, window)| ExprElement::FunctionCall {
1104-
func: FunctionCall {
1105-
distinct: opt_distinct.is_some(),
1106-
name,
1107-
args: opt_args.unwrap_or_default(),
1108-
params: vec![],
1109-
order_by,
1110-
window,
1111-
lambda: None,
1112-
},
1113-
},
1114-
);
1115-
let function_call_with_params_window = map(
1116-
rule! {
1117-
#function_name
1118-
~ "(" ~ #comma_separated_list1(subexpr(0)) ~ ")"
1119-
~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")"
1120-
~ #window_function?
1121-
},
1122-
|(name, _, params, _, _, opt_distinct, opt_args, _, window)| ExprElement::FunctionCall {
1123-
func: FunctionCall {
1124-
distinct: opt_distinct.is_some(),
1125-
name,
1126-
args: opt_args.unwrap_or_default(),
1127-
params,
1128-
order_by: vec![],
1129-
window,
1130-
lambda: None,
1131-
},
1132-
},
1133-
);
1134-
11351041
let case = map(
11361042
rule! {
11371043
CASE ~ #subexpr(0)?
@@ -1429,11 +1335,7 @@ pub fn expr_element(i: Input) -> IResult<WithSpan<ExprElement>> {
14291335
| #chain_function_call : "x.function(...)"
14301336
| #list_comprehensions: "[expr for x in ... [if ...]]"
14311337
| #count_all_with_window : "`COUNT(*) OVER ...`"
1432-
| #function_call_with_lambda : "`function(..., x -> ...)`"
1433-
| #function_call_with_window : "`function(...) OVER ([ PARTITION BY <expr>, ... ] [ ORDER BY <expr>, ... ] [ <window frame> ])`"
1434-
| #function_call_with_within_group_window: "`function(...) [ WITHIN GROUP ( ORDER BY <expr>, ... ) ] OVER ([ PARTITION BY <expr>, ... ] [ ORDER BY <expr>, ... ] [ <window frame> ])`"
1435-
| #function_call_with_params_window : "`function(...)(...) OVER ([ PARTITION BY <expr>, ... ] [ ORDER BY <expr>, ... ] [ <window frame> ])`"
1436-
| #function_call : "`function(...)`"
1338+
| #function_call
14371339
),
14381340
rule!(
14391341
#case : "`CASE ... END`"
@@ -2038,6 +1940,166 @@ pub fn map_element(i: Input) -> IResult<(Literal, Expr)> {
20381940
)(i)
20391941
}
20401942

1943+
pub fn function_call(i: Input) -> IResult<ExprElement> {
1944+
enum FunctionCallSuffix {
1945+
Simple {
1946+
distinct: bool,
1947+
args: Vec<Expr>,
1948+
},
1949+
Lambda {
1950+
arg: Expr,
1951+
params: Vec<Identifier>,
1952+
expr: Box<Expr>,
1953+
},
1954+
Window {
1955+
distinct: bool,
1956+
args: Vec<Expr>,
1957+
window: WindowDesc,
1958+
},
1959+
WithInGroupWindow {
1960+
distinct: bool,
1961+
args: Vec<Expr>,
1962+
order_by: Vec<OrderByExpr>,
1963+
window: Option<WindowDesc>,
1964+
},
1965+
ParamsWindow {
1966+
distinct: bool,
1967+
params: Vec<Expr>,
1968+
args: Vec<Expr>,
1969+
window: Option<WindowDesc>,
1970+
},
1971+
}
1972+
let function_call_with_lambda_body = map(
1973+
rule! {
1974+
"(" ~ #subexpr(0) ~ "," ~ #lambda_params ~ "->" ~ #subexpr(0) ~ ")"
1975+
},
1976+
|(_, arg, _, params, _, expr, _)| FunctionCallSuffix::Lambda {
1977+
arg,
1978+
params,
1979+
expr: Box::new(expr),
1980+
},
1981+
);
1982+
let function_call_with_within_group_window_body = map(
1983+
rule! {
1984+
"(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")"
1985+
~ #within_group?
1986+
~ #window_function?
1987+
},
1988+
|(_, opt_distinct, opt_args, _, order_by, window)| match (order_by, window) {
1989+
(Some(order_by), window) => FunctionCallSuffix::WithInGroupWindow {
1990+
distinct: opt_distinct.is_some(),
1991+
args: opt_args.unwrap_or_default(),
1992+
order_by,
1993+
window,
1994+
},
1995+
(None, Some(window)) => FunctionCallSuffix::Window {
1996+
distinct: opt_distinct.is_some(),
1997+
args: opt_args.unwrap_or_default(),
1998+
window,
1999+
},
2000+
(None, None) => FunctionCallSuffix::Simple {
2001+
distinct: opt_distinct.is_some(),
2002+
args: opt_args.unwrap_or_default(),
2003+
},
2004+
},
2005+
);
2006+
let function_call_with_params_window_body = map(
2007+
rule! {
2008+
"(" ~ #comma_separated_list1(subexpr(0)) ~ ")"
2009+
~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")"
2010+
~ #window_function?
2011+
},
2012+
|(_, params, _, _, opt_distinct, opt_args, _, window)| FunctionCallSuffix::ParamsWindow {
2013+
distinct: opt_distinct.is_some(),
2014+
params,
2015+
args: opt_args.unwrap_or_default(),
2016+
window,
2017+
},
2018+
);
2019+
2020+
map(
2021+
rule!(
2022+
#function_name
2023+
~ (
2024+
#function_call_with_lambda_body : "`function(..., x -> ...)`"
2025+
| #function_call_with_params_window_body : "`function(...)(...) OVER ([ PARTITION BY <expr>, ... ] [ ORDER BY <expr>, ... ] [ <window frame> ])`"
2026+
| #function_call_with_within_group_window_body : "`function(...) [ WITHIN GROUP ( ORDER BY <expr>, ... ) ] [ OVER ([ PARTITION BY <expr>, ... ] [ ORDER BY <expr>, ... ] [ <window frame> ]) ]`"
2027+
)
2028+
),
2029+
|(name, suffix)| match suffix {
2030+
FunctionCallSuffix::Simple { distinct, args } => ExprElement::FunctionCall {
2031+
func: FunctionCall {
2032+
distinct,
2033+
name,
2034+
args,
2035+
params: vec![],
2036+
order_by: vec![],
2037+
window: None,
2038+
lambda: None,
2039+
},
2040+
},
2041+
FunctionCallSuffix::Lambda { arg, params, expr } => ExprElement::FunctionCall {
2042+
func: FunctionCall {
2043+
distinct: false,
2044+
name,
2045+
args: vec![arg],
2046+
params: vec![],
2047+
order_by: vec![],
2048+
window: None,
2049+
lambda: Some(Lambda { params, expr }),
2050+
},
2051+
},
2052+
FunctionCallSuffix::Window {
2053+
distinct,
2054+
args,
2055+
window,
2056+
} => ExprElement::FunctionCall {
2057+
func: FunctionCall {
2058+
distinct,
2059+
name,
2060+
args,
2061+
params: vec![],
2062+
order_by: vec![],
2063+
window: Some(window),
2064+
lambda: None,
2065+
},
2066+
},
2067+
FunctionCallSuffix::WithInGroupWindow {
2068+
distinct,
2069+
args,
2070+
order_by,
2071+
window,
2072+
} => ExprElement::FunctionCall {
2073+
func: FunctionCall {
2074+
distinct,
2075+
name,
2076+
args,
2077+
params: vec![],
2078+
order_by,
2079+
window,
2080+
lambda: None,
2081+
},
2082+
},
2083+
FunctionCallSuffix::ParamsWindow {
2084+
distinct,
2085+
params,
2086+
args,
2087+
window,
2088+
} => ExprElement::FunctionCall {
2089+
func: FunctionCall {
2090+
distinct,
2091+
name,
2092+
args,
2093+
params,
2094+
order_by: vec![],
2095+
window,
2096+
lambda: None,
2097+
},
2098+
},
2099+
},
2100+
)(i)
2101+
}
2102+
20412103
pub fn parse_float(text: &str) -> Result<Literal, ErrorKind> {
20422104
let text = text.trim_start_matches('0');
20432105
let point_pos = text.find('.');

src/query/ast/tests/it/testdata/stmt-error.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -982,7 +982,7 @@ error:
982982
--> SQL:1:65
983983
|
984984
1 | CREATE FUNCTION IF NOT EXISTS isnotempty AS(p) -> not(is_null(p)
985-
| ------ -- ---- ^ unexpected end of input, expecting `)`, `IGNORE`, `RESPECT`, `OVER`, `WITHIN`, `(`, `IS`, `NOT`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `REGEXP`, `RLIKE`, `SOUNDS`, <BitWiseOr>, <BitWiseAnd>, <BitWiseXor>, <ShiftLeft>, <ShiftRight>, `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, <Factorial>, <SquareRoot>, <BitWiseNot>, <CubeRoot>, <Abs>, `CAST`, `TRY_CAST`, `::`, or 45 more ...
985+
| ------ -- ---- ^ unexpected end of input, expecting `)`, `(`, `WITHIN`, `IGNORE`, `RESPECT`, `OVER`, `IS`, `NOT`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `REGEXP`, `RLIKE`, `SOUNDS`, <BitWiseOr>, <BitWiseAnd>, <BitWiseXor>, <ShiftLeft>, <ShiftRight>, `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, <Factorial>, <SquareRoot>, <BitWiseNot>, <CubeRoot>, <Abs>, `CAST`, `TRY_CAST`, `::`, or 45 more ...
986986
| | | | |
987987
| | | | while parsing `(<expr> [, ...])`
988988
| | | while parsing expression

0 commit comments

Comments
 (0)