From 5d3e422c0bb8be5e0f2961825f826187cb5b2882 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Mon, 26 May 2025 09:30:21 +0200 Subject: [PATCH 01/12] Fix ScdhemaDisplay --- datafusion/expr/src/expr.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index fe5ea2ecd5b8..3f6932235580 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2500,7 +2500,28 @@ impl Display for SchemaDisplay<'_> { } } Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),) + fn write_child( + f: &mut Formatter<'_>, + expr: &Expr, + precedence: u8, + ) -> fmt::Result { + match expr { + Expr::BinaryExpr(child) => { + let p = child.op.precedence(); + if p == 0 || p < precedence { + write!(f, "({})", SchemaDisplay(expr))?; + } else { + write!(f, "{}", SchemaDisplay(expr))?; + } + } + _ => write!(f, "{}", SchemaDisplay(expr))?, + } + Ok(()) + } + let precedence = op.precedence(); + write_child(f, left.as_ref(), precedence)?; + write!(f, " {} ", op)?; + write_child(f, right.as_ref(), precedence) } Expr::Case(Case { expr, From 6b30b4fa2dc1445c9472d8023cef2be5158cf77a Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Thu, 29 May 2025 18:10:25 +0200 Subject: [PATCH 02/12] Add test --- datafusion/expr/src/expr.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 3f6932235580..e3692956bfc3 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -3598,6 +3598,18 @@ mod test { ); } + #[test] + fn test_schema_display_nested_binary_expr() { + let expr = (lit(1) + lit(2)) * lit(3); + assert_eq!( + format!( + "{}", + SchemaDisplay(&expr) + ), + "(Int32(1) + Int32(2)) * Int32(3)" + ) + } + fn wildcard_options( opt_ilike: Option, opt_exclude: Option, From 085bf74d3ebd64f1113540d5df5e656ea9f8fb88 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Thu, 29 May 2025 18:12:47 +0200 Subject: [PATCH 03/12] linting --- datafusion/expr/src/expr.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index e3692956bfc3..7a17d51bebc9 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2520,7 +2520,7 @@ impl Display for SchemaDisplay<'_> { } let precedence = op.precedence(); write_child(f, left.as_ref(), precedence)?; - write!(f, " {} ", op)?; + write!(f, " {op} ")?; write_child(f, right.as_ref(), precedence) } Expr::Case(Case { @@ -3602,10 +3602,7 @@ mod test { fn test_schema_display_nested_binary_expr() { let expr = (lit(1) + lit(2)) * lit(3); assert_eq!( - format!( - "{}", - SchemaDisplay(&expr) - ), + format!("{}", SchemaDisplay(&expr)), "(Int32(1) + Int32(2)) * Int32(3)" ) } From fdbfa36c60bf5e4cbc01cd63caf28d6c2d44a0a3 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Thu, 29 May 2025 19:08:12 +0200 Subject: [PATCH 04/12] Always add parantheses --- datafusion/expr/src/expr.rs | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 7a17d51bebc9..881bb7101e19 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2500,28 +2500,7 @@ impl Display for SchemaDisplay<'_> { } } Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - fn write_child( - f: &mut Formatter<'_>, - expr: &Expr, - precedence: u8, - ) -> fmt::Result { - match expr { - Expr::BinaryExpr(child) => { - let p = child.op.precedence(); - if p == 0 || p < precedence { - write!(f, "({})", SchemaDisplay(expr))?; - } else { - write!(f, "{}", SchemaDisplay(expr))?; - } - } - _ => write!(f, "{}", SchemaDisplay(expr))?, - } - Ok(()) - } - let precedence = op.precedence(); - write_child(f, left.as_ref(), precedence)?; - write!(f, " {op} ")?; - write_child(f, right.as_ref(), precedence) + write!(f, "({} {op} {})", SchemaDisplay(left), SchemaDisplay(right),) } Expr::Case(Case { expr, @@ -3600,11 +3579,11 @@ mod test { #[test] fn test_schema_display_nested_binary_expr() { - let expr = (lit(1) + lit(2)) * lit(3); + let expr = lit(1) * (lit(2) + lit(3)); assert_eq!( format!("{}", SchemaDisplay(&expr)), - "(Int32(1) + Int32(2)) * Int32(3)" - ) + "(Int32(1) * (Int32(2) + Int32(3)))" + ); } fn wildcard_options( From e7aa20445131d1c2ba91366c6854cf746aacf376 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Thu, 29 May 2025 19:37:26 +0200 Subject: [PATCH 05/12] Additional test case --- datafusion/expr/src/expr.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 881bb7101e19..f205a5c8afbf 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -3584,6 +3584,12 @@ mod test { format!("{}", SchemaDisplay(&expr)), "(Int32(1) * (Int32(2) + Int32(3)))" ); + + let expr = -(lit(1) + (lit(2))); + assert_eq!( + format!("{}", SchemaDisplay(&expr)), + "(- (Int32(1) + (Int32(2))" + ); } fn wildcard_options( From 378b969163c518d1961e417864dc72504c19dd30 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Sat, 31 May 2025 18:12:39 +0200 Subject: [PATCH 06/12] Adjust tests --- datafusion/core/src/execution/context/mod.rs | 10 ++-- datafusion/core/src/physical_planner.rs | 2 +- datafusion/core/tests/dataframe/mod.rs | 50 +++++++++---------- .../core/tests/expr_api/simplification.rs | 2 +- .../core/tests/user_defined/expr_planner.rs | 20 ++++---- datafusion/expr/src/expr.rs | 2 +- .../optimizer/src/common_subexpr_eliminate.rs | 20 ++++---- .../src/decorrelate_predicate_subquery.rs | 46 ++++++++--------- .../optimizer/src/scalar_subquery_to_join.rs | 8 +-- .../simplify_expressions/simplify_exprs.rs | 10 ++-- .../src/single_distinct_to_groupby.rs | 4 +- .../optimizer/tests/optimizer_integration.rs | 8 +-- datafusion/sql/tests/cases/params.rs | 4 +- datafusion/sql/tests/sql_integration.rs | 20 ++++---- .../tests/cases/consumer_integration.rs | 24 ++++----- .../substrait/tests/cases/emit_kind_tests.rs | 2 +- .../tests/cases/roundtrip_logical_plan.rs | 4 +- 17 files changed, 118 insertions(+), 118 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 5ef666b61e54..0e749f6df590 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1963,11 +1963,11 @@ mod tests { .await?; assert_snapshot!(batches_to_string(&results), @r" - +----------------------+------------------------+---------------------+ - | @@version | @name | @integer + Int64(1) | - +----------------------+------------------------+---------------------+ - | system-var-@@version | user-defined-var-@name | 42 | - +----------------------+------------------------+---------------------+ + +----------------------+------------------------+-----------------------+ + | @@version | @name | (@integer + Int64(1)) | + +----------------------+------------------------+-----------------------+ + | system-var-@@version | user-defined-var-@name | 42 | + +----------------------+------------------------+-----------------------+ "); Ok(()) diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index fbb4250fc4df..cdbe207af81d 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -3049,7 +3049,7 @@ mod tests { // display it online here: https://dreampuf.github.io/GraphvizOnline digraph { - 1[shape=box label="ProjectionExec: expr=[id@0 + 2 as employee.id + Int32(2)]", tooltip=""] + 1[shape=box label="ProjectionExec: expr=[id@0 + 2 as (employee.id + Int32(2))]", tooltip=""] 2[shape=box label="EmptyExec", tooltip=""] 1 -> 2 [arrowhead=none, arrowtail=normal, dir=back] } diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index dfd11fcb096f..dadfaf877ed0 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -835,15 +835,15 @@ async fn test_aggregate_subexpr() -> Result<()> { assert_snapshot!( batches_to_sort_string(&df_results), @r###" - +----------------+------+ - | c2 + Int32(10) | sum | - +----------------+------+ - | 12 | 431 | - | 13 | 248 | - | 14 | 453 | - | 15 | 95 | - | 16 | -146 | - +----------------+------+ + +------------------+------+ + | (c2 + Int32(10)) | sum | + +------------------+------+ + | 12 | 431 | + | 13 | 248 | + | 14 | 453 | + | 15 | 95 | + | 16 | -146 | + +------------------+------+ "### ); @@ -5649,14 +5649,14 @@ async fn test_alias() -> Result<()> { assert_snapshot!( batches_to_sort_string(&df.collect().await.unwrap()), @r###" - +-----------+---------------------------------+ - | a | table_alias.b + table_alias.one | - +-----------+---------------------------------+ - | 123AbcDef | 101 | - | CBAdef | 11 | - | abc123 | 11 | - | abcDEF | 2 | - +-----------+---------------------------------+ + +-----------+-----------------------------------+ + | a | (table_alias.b + table_alias.one) | + +-----------+-----------------------------------+ + | 123AbcDef | 101 | + | CBAdef | 11 | + | abc123 | 11 | + | abcDEF | 2 | + +-----------+-----------------------------------+ "### ); Ok(()) @@ -5758,14 +5758,14 @@ async fn test_alias_nested() -> Result<()> { assert_snapshot!( batches_to_sort_string(&select1.collect().await.unwrap()), @r###" - +-----------+-----------------------+ - | a | alias2.b + alias2.one | - +-----------+-----------------------+ - | 123AbcDef | 101 | - | CBAdef | 11 | - | abc123 | 11 | - | abcDEF | 2 | - +-----------+-----------------------+ + +-----------+-------------------------+ + | a | (alias2.b + alias2.one) | + +-----------+-------------------------+ + | 123AbcDef | 101 | + | CBAdef | 11 | + | abc123 | 11 | + | abcDEF | 2 | + +-----------+-------------------------+ "### ); diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs index 34e0487f312f..24401364e64f 100644 --- a/datafusion/core/tests/expr_api/simplification.rs +++ b/datafusion/core/tests/expr_api/simplification.rs @@ -293,7 +293,7 @@ fn select_date_plus_interval() -> Result<()> { // Note that constant folder runs and folds the entire // expression down to a single constant (true) - let expected = r#"Projection: Date32("2021-01-09") AS to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) + IntervalDayTime("IntervalDayTime { days: 123, milliseconds: 0 }") + let expected = r#"Projection: Date32("2021-01-09") AS (to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) + IntervalDayTime("IntervalDayTime { days: 123, milliseconds: 0 }")) TableScan: test"#; let actual = get_optimized_plan_formatted(plan, &time); diff --git a/datafusion/core/tests/user_defined/expr_planner.rs b/datafusion/core/tests/user_defined/expr_planner.rs index 1fc6d14c5b22..215bfec4996d 100644 --- a/datafusion/core/tests/user_defined/expr_planner.rs +++ b/datafusion/core/tests/user_defined/expr_planner.rs @@ -78,11 +78,11 @@ async fn plan_and_collect(sql: &str) -> Result> { async fn test_custom_operators_arrow() { let actual = plan_and_collect("select 'foo'->'bar';").await.unwrap(); insta::assert_snapshot!(batches_to_string(&actual), @r###" - +----------------------------+ - | Utf8("foo") || Utf8("bar") | - +----------------------------+ - | foobar | - +----------------------------+ + +------------------------------+ + | (Utf8("foo") || Utf8("bar")) | + +------------------------------+ + | foobar | + +------------------------------+ "###); } @@ -90,11 +90,11 @@ async fn test_custom_operators_arrow() { async fn test_custom_operators_long_arrow() { let actual = plan_and_collect("select 1->>2;").await.unwrap(); insta::assert_snapshot!(batches_to_string(&actual), @r###" - +---------------------+ - | Int64(1) + Int64(2) | - +---------------------+ - | 3 | - +---------------------+ + +-----------------------+ + | (Int64(1) + Int64(2)) | + +-----------------------+ + | 3 | + +-----------------------+ "###); } diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index f205a5c8afbf..5fcd00cfb282 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -3588,7 +3588,7 @@ mod test { let expr = -(lit(1) + (lit(2))); assert_eq!( format!("{}", SchemaDisplay(&expr)), - "(- (Int32(1) + (Int32(2))" + "(- (Int32(1) + (Int32(2)))" ); } diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 6a49e5d22087..2078d0b0a650 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -863,7 +863,7 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS test.a * Int32(1) - test.b), sum(__common_expr_1 AS test.a * Int32(1) - test.b * (Int32(1) + test.c))]] + Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS (test.a * (Int32(1) - test.b))), sum(__common_expr_1 AS (test.a * (Int32(1) - test.b)) * (Int32(1) + test.c))]] Projection: test.a * (Int32(1) - test.b) AS __common_expr_1, test.a, test.b, test.c TableScan: test " @@ -884,7 +884,7 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Projection: __common_expr_1 - test.c AS alias1 * __common_expr_1 AS test.a + test.b, __common_expr_1 AS test.a + test.b + Projection: __common_expr_1 - test.c AS alias1 * __common_expr_1 AS (test.a + test.b), __common_expr_1 AS (test.a + test.b) Projection: test.a + test.b AS __common_expr_1, test.a, test.b, test.c TableScan: test " @@ -1000,7 +1000,7 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Aggregate: groupBy=[[__common_expr_1 AS UInt32(1) + test.a]], aggr=[[avg(__common_expr_1) AS col1, my_agg(__common_expr_1) AS col2]] + Aggregate: groupBy=[[__common_expr_1 AS (UInt32(1) + test.a)]], aggr=[[avg(__common_expr_1) AS col1, my_agg(__common_expr_1) AS col2]] Projection: UInt32(1) + test.a AS __common_expr_1, test.a, test.b, test.c TableScan: test " @@ -1024,8 +1024,8 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Projection: UInt32(1) + test.a, UInt32(1) + __common_expr_2 AS col1, UInt32(1) - __common_expr_2 AS col2, __common_expr_4 AS avg(UInt32(1) + test.a), UInt32(1) + __common_expr_3 AS col3, UInt32(1) - __common_expr_3 AS col4, __common_expr_5 AS my_agg(UInt32(1) + test.a) - Aggregate: groupBy=[[__common_expr_1 AS UInt32(1) + test.a]], aggr=[[avg(__common_expr_1) AS __common_expr_2, my_agg(__common_expr_1) AS __common_expr_3, avg(__common_expr_1 AS UInt32(1) + test.a) AS __common_expr_4, my_agg(__common_expr_1 AS UInt32(1) + test.a) AS __common_expr_5]] + Projection: UInt32(1) + test.a, UInt32(1) + __common_expr_2 AS col1, UInt32(1) - __common_expr_2 AS col2, __common_expr_4 AS avg((UInt32(1) + test.a)), UInt32(1) + __common_expr_3 AS col3, UInt32(1) - __common_expr_3 AS col4, __common_expr_5 AS my_agg((UInt32(1) + test.a)) + Aggregate: groupBy=[[__common_expr_1 AS (UInt32(1) + test.a)]], aggr=[[avg(__common_expr_1) AS __common_expr_2, my_agg(__common_expr_1) AS __common_expr_3, avg(__common_expr_1 AS (UInt32(1) + test.a)) AS __common_expr_4, my_agg(__common_expr_1 AS (UInt32(1) + test.a)) AS __common_expr_5]] Projection: UInt32(1) + test.a AS __common_expr_1, test.a, test.b, test.c TableScan: test " @@ -1052,8 +1052,8 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Projection: table.test.col.a, UInt32(1) + __common_expr_2 AS avg(UInt32(1) + table.test.col.a), __common_expr_2 AS avg(UInt32(1) + table.test.col.a) - Aggregate: groupBy=[[table.test.col.a]], aggr=[[avg(__common_expr_1 AS UInt32(1) + table.test.col.a) AS __common_expr_2]] + Projection: table.test.col.a, UInt32(1) + __common_expr_2 AS avg((UInt32(1) + table.test.col.a)), __common_expr_2 AS avg((UInt32(1) + table.test.col.a)) + Aggregate: groupBy=[[table.test.col.a]], aggr=[[avg(__common_expr_1 AS (UInt32(1) + table.test.col.a)) AS __common_expr_2]] Projection: UInt32(1) + table.test.col.a AS __common_expr_1, table.test.col.a TableScan: table.test " @@ -1092,7 +1092,7 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Projection: __common_expr_1 AS Int32(1) + test.a, __common_expr_1 AS test.a + Int32(1) + Projection: __common_expr_1 AS (Int32(1) + test.a), __common_expr_1 AS (test.a + Int32(1)) Projection: Int32(1) + test.a AS __common_expr_1, test.a, test.b, test.c TableScan: test " @@ -1692,7 +1692,7 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Projection: __common_expr_1 AS NOT test.a = test.b, __common_expr_1 AS NOT test.b = test.a + Projection: __common_expr_1 AS NOT (test.a = test.b), __common_expr_1 AS NOT (test.b = test.a) Projection: NOT test.a = test.b AS __common_expr_1, test.a, test.b, test.c TableScan: test " @@ -1708,7 +1708,7 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Projection: __common_expr_1 AS test.a = test.b IS NULL, __common_expr_1 AS test.b = test.a IS NULL + Projection: __common_expr_1 AS (test.a = test.b) IS NULL, __common_expr_1 AS (test.b = test.a) IS NULL Projection: test.a = test.b IS NULL AS __common_expr_1, test.a, test.b, test.c TableScan: test " diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs index a72657bf689d..60a3632ce345 100644 --- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs +++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs @@ -895,10 +895,10 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.orders.o_custkey + Int32(1) AND customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] + LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.(orders.o_custkey + Int32(1)) AND customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] TableScan: customer [c_custkey:Int64, c_name:Utf8] - SubqueryAlias: __correlated_sq_1 [orders.o_custkey + Int32(1):Int64, o_custkey:Int64] - Projection: orders.o_custkey + Int32(1), orders.o_custkey [orders.o_custkey + Int32(1):Int64, o_custkey:Int64] + SubqueryAlias: __correlated_sq_1 [(orders.o_custkey + Int32(1)):Int64, o_custkey:Int64] + Projection: orders.o_custkey + Int32(1), orders.o_custkey [(orders.o_custkey + Int32(1)):Int64, o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] " ) @@ -1103,10 +1103,10 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] + LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.(sq.c * UInt32(2)) [a:UInt32, b:UInt32, c:UInt32] TableScan: test [a:UInt32, b:UInt32, c:UInt32] - SubqueryAlias: __correlated_sq_1 [sq.c * UInt32(2):UInt32] - Projection: sq.c * UInt32(2) [sq.c * UInt32(2):UInt32] + SubqueryAlias: __correlated_sq_1 [(sq.c * UInt32(2)):UInt32] + Projection: sq.c * UInt32(2) [(sq.c * UInt32(2)):UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] " ) @@ -1135,10 +1135,10 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) AND test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] + LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.(sq.c * UInt32(2)) AND test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] TableScan: test [a:UInt32, b:UInt32, c:UInt32] - SubqueryAlias: __correlated_sq_1 [sq.c * UInt32(2):UInt32, a:UInt32] - Projection: sq.c * UInt32(2), sq.a [sq.c * UInt32(2):UInt32, a:UInt32] + SubqueryAlias: __correlated_sq_1 [(sq.c * UInt32(2)):UInt32, a:UInt32] + Projection: sq.c * UInt32(2), sq.a [(sq.c * UInt32(2)):UInt32, a:UInt32] Filter: sq.a + UInt32(1) = sq.b [a:UInt32, b:UInt32, c:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] " @@ -1169,10 +1169,10 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) AND test.a + test.b = __correlated_sq_1.a + __correlated_sq_1.b [a:UInt32, b:UInt32, c:UInt32] + LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.(sq.c * UInt32(2)) AND test.a + test.b = __correlated_sq_1.a + __correlated_sq_1.b [a:UInt32, b:UInt32, c:UInt32] TableScan: test [a:UInt32, b:UInt32, c:UInt32] - SubqueryAlias: __correlated_sq_1 [sq.c * UInt32(2):UInt32, a:UInt32, b:UInt32] - Projection: sq.c * UInt32(2), sq.a, sq.b [sq.c * UInt32(2):UInt32, a:UInt32, b:UInt32] + SubqueryAlias: __correlated_sq_1 [(sq.c * UInt32(2)):UInt32, a:UInt32, b:UInt32] + Projection: sq.c * UInt32(2), sq.a, sq.b [(sq.c * UInt32(2)):UInt32, a:UInt32, b:UInt32] Filter: sq.a + UInt32(1) = sq.b [a:UInt32, b:UInt32, c:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] " @@ -1210,14 +1210,14 @@ mod tests { @r" Projection: test.b [b:UInt32] Filter: test.c > UInt32(1) [a:UInt32, b:UInt32, c:UInt32] - LeftSemi Join: Filter: test.c * UInt32(2) = __correlated_sq_2.sq2.c * UInt32(2) AND test.a > __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32] - LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq1.c * UInt32(2) AND test.a > __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] + LeftSemi Join: Filter: test.c * UInt32(2) = __correlated_sq_2.(sq2.c * UInt32(2)) AND test.a > __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32] + LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.(sq1.c * UInt32(2)) AND test.a > __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] TableScan: test [a:UInt32, b:UInt32, c:UInt32] - SubqueryAlias: __correlated_sq_1 [sq1.c * UInt32(2):UInt32, a:UInt32] - Projection: sq1.c * UInt32(2), sq1.a [sq1.c * UInt32(2):UInt32, a:UInt32] + SubqueryAlias: __correlated_sq_1 [(sq1.c * UInt32(2)):UInt32, a:UInt32] + Projection: sq1.c * UInt32(2), sq1.a [(sq1.c * UInt32(2)):UInt32, a:UInt32] TableScan: sq1 [a:UInt32, b:UInt32, c:UInt32] - SubqueryAlias: __correlated_sq_2 [sq2.c * UInt32(2):UInt32, a:UInt32] - Projection: sq2.c * UInt32(2), sq2.a [sq2.c * UInt32(2):UInt32, a:UInt32] + SubqueryAlias: __correlated_sq_2 [(sq2.c * UInt32(2)):UInt32, a:UInt32] + Projection: sq2.c * UInt32(2), sq2.a [(sq2.c * UInt32(2)):UInt32, a:UInt32] TableScan: sq2 [a:UInt32, b:UInt32, c:UInt32] " ) @@ -1571,8 +1571,8 @@ mod tests { Projection: customer.c_custkey [c_custkey:Int64] LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] TableScan: customer [c_custkey:Int64, c_name:Utf8] - SubqueryAlias: __correlated_sq_1 [orders.o_custkey + Int32(1):Int64, o_custkey:Int64] - Projection: orders.o_custkey + Int32(1), orders.o_custkey [orders.o_custkey + Int32(1):Int64, o_custkey:Int64] + SubqueryAlias: __correlated_sq_1 [(orders.o_custkey + Int32(1)):Int64, o_custkey:Int64] + Projection: orders.o_custkey + Int32(1), orders.o_custkey [(orders.o_custkey + Int32(1)):Int64, o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] " ) @@ -1865,9 +1865,9 @@ mod tests { Projection: test.b [b:UInt32] LeftSemi Join: Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] TableScan: test [a:UInt32, b:UInt32, c:UInt32] - SubqueryAlias: __correlated_sq_1 [sq.b + sq.c:UInt32, a:UInt32] - Distinct: [sq.b + sq.c:UInt32, a:UInt32] - Projection: sq.b + sq.c, sq.a [sq.b + sq.c:UInt32, a:UInt32] + SubqueryAlias: __correlated_sq_1 [(sq.b + sq.c):UInt32, a:UInt32] + Distinct: [(sq.b + sq.c:UInt32), a:UInt32] + Projection: sq.b + sq.c, sq.a [(sq.b + sq.c):UInt32, a:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] " ) diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs index 897e07cb987e..8c3079bf8e53 100644 --- a/datafusion/optimizer/src/scalar_subquery_to_join.rs +++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs @@ -774,11 +774,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) + Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N] - Left Join: Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N] + Filter: customer.c_custkey = __scalar_sq_1.(max(orders.o_custkey) + Int32(1)) [c_custkey:Int64, c_name:Utf8, (max(orders.o_custkey) + Int32(1)):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N] + Left Join: Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, (max(orders.o_custkey) + Int32(1)):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N] TableScan: customer [c_custkey:Int64, c_name:Utf8] - SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64, __always_true:Boolean] - Projection: max(orders.o_custkey) + Int32(1), orders.o_custkey, __always_true [max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64, __always_true:Boolean] + SubqueryAlias: __scalar_sq_1 [(max(orders.o_custkey) + Int32(1)):Int64;N, o_custkey:Int64, __always_true:Boolean] + Projection: max(orders.o_custkey) + Int32(1), orders.o_custkey, __always_true [(max(orders.o_custkey) + Int32(1)):Int64;N, o_custkey:Int64, __always_true:Boolean] Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, __always_true:Boolean, max(orders.o_custkey):Int64;N] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] " diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index ccf90893e17e..0aa30167b93e 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -404,7 +404,7 @@ mod tests { assert_optimized_plan_equal!( plan, @ r" - Projection: test.a, test.d, NOT test.b AS test.b = Boolean(false) + Projection: test.a, test.d, NOT test.b AS (test.b = Boolean(false)) TableScan: test " ) @@ -424,7 +424,7 @@ mod tests { assert_optimized_plan_equal!( plan, @ r" - Aggregate: groupBy=[[test.a, test.c]], aggr=[[max(test.b) AS max(test.b = Boolean(true)), min(test.b)]] + Aggregate: groupBy=[[test.a, test.c]], aggr=[[max(test.b) AS max((test.b = Boolean(true))), min(test.b)]] Projection: test.a, test.c, test.b TableScan: test " @@ -448,7 +448,7 @@ mod tests { assert_optimized_plan_equal!( plan, - @ "Values: (Int32(3) AS Int32(1) + Int32(2), Int32(1) AS Int32(2) - Int32(1))" + @ "Values: (Int32(3) AS (Int32(1) + Int32(2)), Int32(1) AS (Int32(2) - Int32(1)))" ) } @@ -492,7 +492,7 @@ mod tests { let actual = get_optimized_plan_formatted(plan, &time); let expected = - "Projection: NOT test.a AS Boolean(true) OR Boolean(false) != test.a\ + "Projection: NOT test.a AS ((Boolean(true) OR Boolean(false)) != test.a)\ \n TableScan: test"; assert_eq!(expected, actual); @@ -857,7 +857,7 @@ mod tests { assert_optimized_plan_equal!( plan, @ r" - Aggregate: groupBy=[[GROUPING SETS ((Int32(43) AS age, test.a), (Boolean(false) AS cond), (test.d AS e, Int32(3) AS Int32(1) + Int32(2)))]], aggr=[[]] + Aggregate: groupBy=[[GROUPING SETS ((Int32(43) AS age, test.a), (Boolean(false) AS cond), (test.d AS e, Int32(3) AS (Int32(1) + Int32(2))))]], aggr=[[]] TableScan: test " ) diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs index 50783a214342..5b69731bbdaa 100644 --- a/datafusion/optimizer/src/single_distinct_to_groupby.rs +++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs @@ -431,7 +431,7 @@ mod tests { assert_optimized_plan_equal!( plan, @r" - Projection: count(alias1) AS count(DISTINCT Int32(2) * test.b) [count(DISTINCT Int32(2) * test.b):Int64] + Projection: count(alias1) AS count(DISTINCT (Int32(2) * test.b)) [count(DISTINCT (Int32(2) * test.b)):Int64] Aggregate: groupBy=[[]], aggr=[[count(alias1)]] [count(alias1):Int64] Aggregate: groupBy=[[Int32(2) * test.b AS alias1]], aggr=[[]] [alias1:Int64] TableScan: test [a:UInt32, b:UInt32, c:UInt32] @@ -536,7 +536,7 @@ mod tests { assert_optimized_plan_equal!( plan, @r" - Projection: group_alias_0 AS test.a + Int32(1), count(alias1) AS count(DISTINCT test.c) [test.a + Int32(1):Int64, count(DISTINCT test.c):Int64] + Projection: group_alias_0 AS (test.a + Int32(1)), count(alias1) AS count(DISTINCT test.c) [(test.a + Int32(1)):Int64, count(DISTINCT test.c):Int64] Aggregate: groupBy=[[group_alias_0]], aggr=[[count(alias1)]] [group_alias_0:Int64, count(alias1):Int64] Aggregate: groupBy=[[test.a + Int32(1) AS group_alias_0, test.c AS alias1]], aggr=[[]] [group_alias_0:Int64, alias1:UInt32] TableScan: test [a:UInt32, b:UInt32, c:UInt32] diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index 95a9db6c8abd..07379ffa0086 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -54,7 +54,7 @@ fn case_when() -> Result<()> { assert_snapshot!( format!("{plan}"), @r#" -Projection: CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END +Projection: CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN (test.col_int32 > Int64(0)) THEN Int64(1) ELSE Int64(0) END TableScan: test projection=[col_int32] "# ); @@ -108,8 +108,8 @@ fn case_when_aggregate() -> Result<()> { assert_snapshot!( format!("{plan}"), @r#" - Projection: test.col_utf8, sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END) AS n - Aggregate: groupBy=[[test.col_utf8]], aggr=[[sum(CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END) AS sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END)]] + Projection: test.col_utf8, sum(CASE WHEN (test.col_int32 > Int64(0)) THEN Int64(1) ELSE Int64(0) END) AS n + Aggregate: groupBy=[[test.col_utf8]], aggr=[[sum(CASE WHEN (test.col_int32 > Int32(0)) THEN Int64(1) ELSE Int64(0) END) AS sum(CASE WHEN (test.col_int32 > Int64(0)) THEN Int64(1) ELSE Int64(0) END)]] TableScan: test projection=[col_int32, col_utf8] "# ); @@ -342,7 +342,7 @@ fn push_down_filter_groupby_expr_contains_alias() { assert_snapshot!( format!("{plan}"), @r#" - Projection: test.col_int32 + test.col_uint32 AS c, count(Int64(1)) AS count(*) + Projection: (test.col_int32 + test.col_uint32) AS c, count(Int64(1)) AS count(*) Aggregate: groupBy=[[CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64)]], aggr=[[count(Int64(1))]] Filter: CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64) > Int64(3) TableScan: test projection=[col_int32, col_uint32] diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs index b3cc49c31071..1034ea11d700 100644 --- a/datafusion/sql/tests/cases/params.rs +++ b/datafusion/sql/tests/cases/params.rs @@ -303,7 +303,7 @@ fn test_prepare_statement_to_plan_params_as_constants() { assert_snapshot!( plan_with_params, @r" - Projection: Int64(1) + Int32(10) AS Int64(1) + $1 + Projection: Int64(1) + Int32(10) AS (Int64(1) + $1) EmptyRelation " ); @@ -331,7 +331,7 @@ fn test_prepare_statement_to_plan_params_as_constants() { assert_snapshot!( plan_with_params, @r" - Projection: Int64(1) + Int32(10) + Float64(10) AS Int64(1) + $1 + $2 + Projection: Int64(1) + Int32(10) + Float64(10) AS ((Int64(1) + $1) + $2) EmptyRelation " ); diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 365012b7f6b0..82c49fd33a68 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -1454,7 +1454,7 @@ fn select_aggregate_with_group_by_with_having_using_derived_column_aggregate_not plan, @r#" Projection: person.first_name, max(person.age) - Filter: max(person.age) > Int64(100) AND min(person.id - Int64(2)) < Int64(50) + Filter: max(person.age) > Int64(100) AND min((person.id - Int64(2))) < Int64(50) Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id - Int64(2))]] TableScan: person "# @@ -1821,7 +1821,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_selected_and_resol assert_snapshot!( plan, @r#" - Projection: person.age + Int64(1), min(person.first_name) + Projection: (person.age + Int64(1)), min(person.first_name) Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]] TableScan: person "# @@ -1832,7 +1832,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_selected_and_resol assert_snapshot!( plan, @r#" - Projection: min(person.first_name), person.age + Int64(1) + Projection: min(person.first_name), (person.age + Int64(1)) Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]] TableScan: person "# @@ -1847,7 +1847,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_resolva assert_snapshot!( plan, @r#" - Projection: person.age + Int64(1) / Int64(2) * person.age + Int64(1), min(person.first_name) + Projection: (person.age + Int64(1)) / Int64(2) * (person.age + Int64(1)), min(person.first_name) Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]] TableScan: person "# @@ -1864,7 +1864,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_not_res assert_snapshot!( err.strip_backtrace(), @r#" - Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.age" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.age + Int64(1), min(person.first_name)" appears in the SELECT clause satisfies this requirement + Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.age" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "(person.age + Int64(1)), min(person.first_name)" appears in the SELECT clause satisfies this requirement "# ); } @@ -1877,7 +1877,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_and_its_column_sel assert_snapshot!( err.strip_backtrace(), @r#" - Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.age" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.age + Int64(1), min(person.first_name)" appears in the SELECT clause satisfies this requirement + Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.age" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "(person.age + Int64(1)), min(person.first_name)" appears in the SELECT clause satisfies this requirement "# ); } @@ -1930,7 +1930,7 @@ fn select_aggregate_with_non_column_inner_expression_with_groupby() { assert_snapshot!( plan, @r#" - Projection: person.state, min(person.age + Int64(1)) + Projection: person.state, min((person.age + Int64(1))) Aggregate: groupBy=[[person.state]], aggr=[[min(person.age + Int64(1))]] TableScan: person "# @@ -2780,7 +2780,7 @@ fn empty_over_plus() { assert_snapshot!( plan, @r#" -Projection: orders.order_id, max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING +Projection: orders.order_id, max((orders.qty * Float64(1.1))) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING WindowAggr: windowExpr=[[max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] TableScan: orders "# @@ -3230,7 +3230,7 @@ fn select_groupby_orderby() { assert_snapshot!( plan, @r#" -Sort: avg(person.age) + avg(person.age) ASC NULLS LAST +Sort: (avg(person.age) + avg(person.age)) ASC NULLS LAST Projection: avg(person.age) + avg(person.age), date_trunc(Utf8("month"), person.birth_date) AS birth_date Aggregate: groupBy=[[person.birth_date]], aggr=[[avg(person.age)]] TableScan: person @@ -3690,7 +3690,7 @@ fn rank_partition_grouping() { assert_snapshot!( plan, @r#" -Projection: sum(person.age) AS total_sum, person.state, person.last_name, grouping(person.state) + grouping(person.last_name) AS x, rank() PARTITION BY [grouping(person.state) + grouping(person.last_name), CASE WHEN grouping(person.last_name) = Int64(0) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS the_rank +Projection: sum(person.age) AS total_sum, person.state, person.last_name, grouping(person.state) + grouping(person.last_name) AS x, rank() PARTITION BY [(grouping(person.state) + grouping(person.last_name)), CASE WHEN (grouping(person.last_name) = Int64(0)) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS the_rank WindowAggr: windowExpr=[[rank() PARTITION BY [grouping(person.state) + grouping(person.last_name), CASE WHEN grouping(person.last_name) = Int64(0) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] Aggregate: groupBy=[[ROLLUP (person.state, person.last_name)]], aggr=[[sum(person.age), grouping(person.state), grouping(person.last_name)]] TableScan: person diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs index 4a121e41d27e..3d101ac8908f 100644 --- a/datafusion/substrait/tests/cases/consumer_integration.rs +++ b/datafusion/substrait/tests/cases/consumer_integration.rs @@ -53,7 +53,7 @@ mod tests { assert_snapshot!( plan_str, @r#" - Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER + Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER Sort: LINEITEM.L_RETURNFLAG ASC NULLS LAST, LINEITEM.L_LINESTATUS ASC NULLS LAST Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]] Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, LINEITEM.L_QUANTITY, LINEITEM.L_EXTENDEDPRICE, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT), LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) * (CAST(Int32(1) AS Decimal128(15, 2)) + LINEITEM.L_TAX), LINEITEM.L_DISCOUNT @@ -214,11 +214,11 @@ mod tests { assert_snapshot!( plan_str, @r#" - Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT + Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) AS REVENUE, CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT Limit: skip=0, fetch=20 - Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST - Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT - Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Sort: sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) DESC NULLS FIRST + Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT + Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))]] Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE >= CAST(Utf8("1993-10-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RETURNFLAG = Utf8("R") AND CUSTOMER.C_NATIONKEY = NATION.N_NATIONKEY Cross Join: @@ -239,12 +239,12 @@ mod tests { assert_snapshot!( plan_str, @r#" - Projection: PARTSUPP.PS_PARTKEY, sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) AS value - Sort: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) DESC NULLS FIRST - Filter: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) > () + Projection: PARTSUPP.PS_PARTKEY, sum((PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)) AS value + Sort: sum((PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)) DESC NULLS FIRST + Filter: sum((PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)) > () Subquery: - Projection: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) * Decimal128(Some(1000000),11,10) - Aggregate: groupBy=[[]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]] + Projection: sum((PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)) * Decimal128(Some(1000000),11,10) + Aggregate: groupBy=[[]], aggr=[[sum((PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY))]] Projection: PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN") Cross Join: @@ -252,7 +252,7 @@ mod tests { TableScan: PARTSUPP TableScan: SUPPLIER TableScan: NATION - Aggregate: groupBy=[[PARTSUPP.PS_PARTKEY]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]] + Aggregate: groupBy=[[PARTSUPP.PS_PARTKEY]], aggr=[[sum((PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY))]] Projection: PARTSUPP.PS_PARTKEY, PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN") Cross Join: @@ -394,7 +394,7 @@ mod tests { assert_snapshot!( plan_str, @r#" - Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE]] + Aggregate: groupBy=[[]], aggr=[[sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT)) AS REVENUE]] Projection: LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) Filter: PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#12") AND (PART.P_CONTAINER = CAST(Utf8("SM CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(1) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(1) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(5) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#23") AND (PART.P_CONTAINER = CAST(Utf8("MED BAG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PKG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PACK") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(10) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(10) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(10) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#34") AND (PART.P_CONTAINER = CAST(Utf8("LG CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(20) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(20) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(15) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") Cross Join: diff --git a/datafusion/substrait/tests/cases/emit_kind_tests.rs b/datafusion/substrait/tests/cases/emit_kind_tests.rs index e916b4cb0e1a..d3d37f299fd9 100644 --- a/datafusion/substrait/tests/cases/emit_kind_tests.rs +++ b/datafusion/substrait/tests/cases/emit_kind_tests.rs @@ -99,7 +99,7 @@ mod tests { assert_snapshot!( plan2, @r#" - Projection: random() AS c1, data.a + Int64(1) AS c2 + Projection: random() AS c1, (data.a + Int64(1)) AS c2 Projection: data.a, data.b, data.c, data.d, data.e, data.f, random(), data.a + Int64(1) TableScan: data "# diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index 352cd12c4d12..a5aaabd9145e 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -486,7 +486,7 @@ async fn aggregate_case() -> Result<()> { assert_snapshot!( plan, @r#" - Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE Int64(NULL) END) AS sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE NULL END)]] + Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE Int64(NULL) END) AS sum(CASE WHEN (data.a > Int64(0)) THEN Int64(1) ELSE NULL END)]] TableScan: data projection=[a] "# ); @@ -1193,7 +1193,7 @@ async fn duplicate_column() -> Result<()> { assert_snapshot!( plan, @r#" - Projection: data.a + Int64(1) AS sum_a, data.a + Int64(1) AS data.a + Int64(1)__temp__0 AS sum_a_2 + Projection: (data.a + Int64(1)) AS sum_a, (data.a + Int64(1)) AS (data.a + Int64(1))__temp__0 AS sum_a_2 Projection: data.a + Int64(1) TableScan: data projection=[a] "# From 1c9c48aef213fcb9ac14f62658bb1578024b0de8 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Sat, 31 May 2025 18:33:22 +0200 Subject: [PATCH 07/12] more tests --- datafusion/core/src/dataframe/mod.rs | 10 +++--- datafusion/expr/src/expr.rs | 4 +-- .../optimizer/src/common_subexpr_eliminate.rs | 4 +-- .../src/decorrelate_predicate_subquery.rs | 2 +- .../optimizer/tests/optimizer_integration.rs | 4 +-- .../tests/cases/consumer_integration.rs | 32 +++++++++---------- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 69992e57ca7d..99277115cadd 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -358,11 +358,11 @@ impl DataFrame { /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; /// let df = df.select(vec![col("a"), col("b") * col("c")])?; /// let expected = vec![ - /// "+---+-----------------------+", - /// "| a | ?table?.b * ?table?.c |", - /// "+---+-----------------------+", - /// "| 1 | 6 |", - /// "+---+-----------------------+" + /// "+---+-------------------------+", + /// "| a | (?table?.b * ?table?.c) |", + /// "+---+-------------------------+", + /// "| 1 | 6 |", + /// "+---+-------------------------+" /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 5fcd00cfb282..ebe6a0534264 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1196,10 +1196,10 @@ impl Expr { /// ``` /// # use datafusion_expr::{col, lit}; /// let expr = col("foo").eq(lit(42)); - /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string()); + /// assert_eq!("(foo = Int32(42))", expr.schema_name().to_string()); /// /// let expr = col("foo").alias("bar").eq(lit(11)); - /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string()); + /// assert_eq!("(bar = Int32(11))", expr.schema_name().to_string()); /// ``` /// /// [`Schema`]: arrow::datatypes::Schema diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 2078d0b0a650..3ec0044e7f68 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -1024,7 +1024,7 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Projection: UInt32(1) + test.a, UInt32(1) + __common_expr_2 AS col1, UInt32(1) - __common_expr_2 AS col2, __common_expr_4 AS avg((UInt32(1) + test.a)), UInt32(1) + __common_expr_3 AS col3, UInt32(1) - __common_expr_3 AS col4, __common_expr_5 AS my_agg((UInt32(1) + test.a)) + Projection: (UInt32(1) + test.a), UInt32(1) + __common_expr_2 AS col1, UInt32(1) - __common_expr_2 AS col2, __common_expr_4 AS avg((UInt32(1) + test.a)), UInt32(1) + __common_expr_3 AS col3, UInt32(1) - __common_expr_3 AS col4, __common_expr_5 AS my_agg((UInt32(1) + test.a)) Aggregate: groupBy=[[__common_expr_1 AS (UInt32(1) + test.a)]], aggr=[[avg(__common_expr_1) AS __common_expr_2, my_agg(__common_expr_1) AS __common_expr_3, avg(__common_expr_1 AS (UInt32(1) + test.a)) AS __common_expr_4, my_agg(__common_expr_1 AS (UInt32(1) + test.a)) AS __common_expr_5]] Projection: UInt32(1) + test.a AS __common_expr_1, test.a, test.b, test.c TableScan: test @@ -1724,7 +1724,7 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Projection: __common_expr_1 AS test.a + test.b BETWEEN Int32(0) AND Int32(10), __common_expr_1 AS test.b + test.a BETWEEN Int32(0) AND Int32(10) + Projection: __common_expr_1 AS (test.a + test.b) BETWEEN Int32(0) AND Int32(10), __common_expr_1 AS (test.b + test.a) BETWEEN Int32(0) AND Int32(10) Projection: test.a + test.b BETWEEN Int32(0) AND Int32(10) AS __common_expr_1, test.a, test.b, test.c TableScan: test " diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs index 60a3632ce345..21dc0cc1b684 100644 --- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs +++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs @@ -1866,7 +1866,7 @@ mod tests { LeftSemi Join: Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] TableScan: test [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [(sq.b + sq.c):UInt32, a:UInt32] - Distinct: [(sq.b + sq.c:UInt32), a:UInt32] + Distinct: [(sq.b + sq.c):UInt32, a:UInt32] Projection: sq.b + sq.c, sq.a [(sq.b + sq.c):UInt32, a:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] " diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index 07379ffa0086..d5f34f039dab 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -65,7 +65,7 @@ Projection: CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END assert_snapshot!( format!("{plan}"), @r#" - Projection: CASE WHEN test.col_uint32 > UInt32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_uint32 > Int64(0) THEN Int64(1) ELSE Int64(0) END + Projection: CASE WHEN test.col_uint32 > UInt32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN (test.col_uint32 > Int64(0)) THEN Int64(1) ELSE Int64(0) END TableScan: test projection=[col_uint32] "# ); @@ -109,7 +109,7 @@ fn case_when_aggregate() -> Result<()> { format!("{plan}"), @r#" Projection: test.col_utf8, sum(CASE WHEN (test.col_int32 > Int64(0)) THEN Int64(1) ELSE Int64(0) END) AS n - Aggregate: groupBy=[[test.col_utf8]], aggr=[[sum(CASE WHEN (test.col_int32 > Int32(0)) THEN Int64(1) ELSE Int64(0) END) AS sum(CASE WHEN (test.col_int32 > Int64(0)) THEN Int64(1) ELSE Int64(0) END)]] + Aggregate: groupBy=[[test.col_utf8]], aggr=[[sum(CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END) AS sum(CASE WHEN (test.col_int32 > Int64(0)) THEN Int64(1) ELSE Int64(0) END)]] TableScan: test projection=[col_int32, col_utf8] "# ); diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs index 3d101ac8908f..940f8dbf3180 100644 --- a/datafusion/substrait/tests/cases/consumer_integration.rs +++ b/datafusion/substrait/tests/cases/consumer_integration.rs @@ -53,9 +53,9 @@ mod tests { assert_snapshot!( plan_str, @r#" - Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER + Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) AS SUM_DISC_PRICE, sum(((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT)) * (Int32(1) + LINEITEM.L_TAX))) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER Sort: LINEITEM.L_RETURNFLAG ASC NULLS LAST, LINEITEM.L_LINESTATUS ASC NULLS LAST - Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]] + Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))), sum(((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT)) * (Int32(1) + LINEITEM.L_TAX))), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]] Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, LINEITEM.L_QUANTITY, LINEITEM.L_EXTENDEDPRICE, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT), LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) * (CAST(Int32(1) AS Decimal128(15, 2)) + LINEITEM.L_TAX), LINEITEM.L_DISCOUNT Filter: LINEITEM.L_SHIPDATE <= Date32("1998-12-01") - IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 10368000 }") TableScan: LINEITEM @@ -105,11 +105,11 @@ mod tests { assert_snapshot!( plan_str, @r#" - Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY + Projection: LINEITEM.L_ORDERKEY, sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) AS REVENUE, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY Limit: skip=0, fetch=10 - Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST - Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY - Aggregate: groupBy=[[LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Sort: sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST + Projection: LINEITEM.L_ORDERKEY, sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))), ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY + Aggregate: groupBy=[[LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY]], aggr=[[sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT)))]] Projection: LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) Filter: CUSTOMER.C_MKTSEGMENT = Utf8("BUILDING") AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-03-15") AS Date32) AND LINEITEM.L_SHIPDATE > CAST(Utf8("1995-03-15") AS Date32) Cross Join: @@ -148,9 +148,9 @@ mod tests { assert_snapshot!( plan_str, @r#" - Projection: NATION.N_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE - Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST - Aggregate: groupBy=[[NATION.N_NAME]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Projection: NATION.N_NAME, sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) AS REVENUE + Sort: sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) DESC NULLS FIRST + Aggregate: groupBy=[[NATION.N_NAME]], aggr=[[sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT)))]] Projection: NATION.N_NAME, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND LINEITEM.L_SUPPKEY = SUPPLIER.S_SUPPKEY AND CUSTOMER.C_NATIONKEY = SUPPLIER.S_NATIONKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("ASIA") AND ORDERS.O_ORDERDATE >= CAST(Utf8("1994-01-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-01-01") AS Date32) Cross Join: @@ -175,7 +175,7 @@ mod tests { assert_snapshot!( plan_str, @r#" - Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT) AS REVENUE]] + Aggregate: groupBy=[[]], aggr=[[sum((LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT)) AS REVENUE]] Projection: LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT Filter: LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32) AND LINEITEM.L_DISCOUNT >= Decimal128(Some(5),3,2) AND LINEITEM.L_DISCOUNT <= Decimal128(Some(7),3,2) AND LINEITEM.L_QUANTITY < CAST(Int32(24) AS Decimal128(15, 2)) TableScan: LINEITEM @@ -218,7 +218,7 @@ mod tests { Limit: skip=0, fetch=20 Sort: sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) DESC NULLS FIRST Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT - Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))]] + Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT)))]] Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE >= CAST(Utf8("1993-10-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RETURNFLAG = Utf8("R") AND CUSTOMER.C_NATIONKEY = NATION.N_NATIONKEY Cross Join: @@ -271,9 +271,9 @@ mod tests { assert_snapshot!( plan_str, @r#" - Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT + Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ((ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT")) OR (ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH"))) THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT Sort: LINEITEM.L_SHIPMODE ASC NULLS LAST - Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END)]] + Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ((ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT")) AND (ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH"))) THEN Int32(1) ELSE Int32(0) END)]] Projection: LINEITEM.L_SHIPMODE, CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END, CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END Filter: ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND (LINEITEM.L_SHIPMODE = CAST(Utf8("MAIL") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("SHIP") AS Utf8)) AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE AND LINEITEM.L_SHIPDATE < LINEITEM.L_COMMITDATE AND LINEITEM.L_RECEIPTDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RECEIPTDATE < CAST(Utf8("1995-01-01") AS Date32) Cross Join: @@ -310,8 +310,8 @@ mod tests { assert_snapshot!( plan_str, @r#" - Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END) / sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS PROMO_REVENUE - Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN (LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT)) ELSE Decimal128(Some(0),19,4) END) / sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) AS PROMO_REVENUE + Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN (LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT)) ELSE Decimal128(Some(0),19,4) END), sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT)))]] Projection: CASE WHEN PART.P_TYPE LIKE CAST(Utf8("PROMO%") AS Utf8) THEN LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) ELSE Decimal128(Some(0),19,4) END, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) Filter: LINEITEM.L_PARTKEY = PART.P_PARTKEY AND LINEITEM.L_SHIPDATE >= Date32("1995-09-01") AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-10-01") AS Date32) Cross Join: @@ -394,7 +394,7 @@ mod tests { assert_snapshot!( plan_str, @r#" - Aggregate: groupBy=[[]], aggr=[[sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT)) AS REVENUE]] + Aggregate: groupBy=[[]], aggr=[[sum((LINEITEM.L_EXTENDEDPRICE * (Int32(1) - LINEITEM.L_DISCOUNT))) AS REVENUE]] Projection: LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) Filter: PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#12") AND (PART.P_CONTAINER = CAST(Utf8("SM CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(1) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(1) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(5) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#23") AND (PART.P_CONTAINER = CAST(Utf8("MED BAG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PKG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PACK") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(10) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(10) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(10) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#34") AND (PART.P_CONTAINER = CAST(Utf8("LG CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(20) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(20) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(15) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") Cross Join: From 8b1f34c68db2e29e34a1e197ea48fb293ec5dd79 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Sat, 31 May 2025 18:45:43 +0200 Subject: [PATCH 08/12] additional tests --- datafusion/optimizer/src/common_subexpr_eliminate.rs | 2 +- .../substrait/tests/cases/consumer_integration.rs | 4 ++-- docs/source/library-user-guide/adding-udfs.md | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 3ec0044e7f68..b10770001615 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -1740,7 +1740,7 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Projection: __common_expr_1 AS test.c BETWEEN test.a + test.b AND Int32(10), __common_expr_1 AS test.c BETWEEN test.b + test.a AND Int32(10) + Projection: __common_expr_1 AS test.c BETWEEN (test.a + test.b) AND Int32(10), __common_expr_1 AS test.c BETWEEN (test.b + test.a) AND Int32(10) Projection: test.c BETWEEN test.a + test.b AND Int32(10) AS __common_expr_1, test.a, test.b, test.c TableScan: test " diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs index 940f8dbf3180..2f9e625aa396 100644 --- a/datafusion/substrait/tests/cases/consumer_integration.rs +++ b/datafusion/substrait/tests/cases/consumer_integration.rs @@ -271,9 +271,9 @@ mod tests { assert_snapshot!( plan_str, @r#" - Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ((ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT")) OR (ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH"))) THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT + Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ((ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT")) OR (ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH"))) THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ((ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT")) AND (ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH"))) THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT Sort: LINEITEM.L_SHIPMODE ASC NULLS LAST - Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ((ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT")) AND (ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH"))) THEN Int32(1) ELSE Int32(0) END)]] + Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ((ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT")) OR (ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH"))) THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ((ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT")) AND (ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH"))) THEN Int32(1) ELSE Int32(0) END)]] Projection: LINEITEM.L_SHIPMODE, CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END, CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END Filter: ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND (LINEITEM.L_SHIPMODE = CAST(Utf8("MAIL") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("SHIP") AS Utf8)) AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE AND LINEITEM.L_SHIPDATE < LINEITEM.L_COMMITDATE AND LINEITEM.L_RECEIPTDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RECEIPTDATE < CAST(Utf8("1995-01-01") AS Date32) Cross Join: diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index 8fb8a59fb860..e428ece52623 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1232,11 +1232,11 @@ async fn main() -> Result<()> { let results = ctx.sql("select 'foo'->'bar';").await?.collect().await?; let expected = [ - "+----------------------------+", - "| Utf8(\"foo\") || Utf8(\"bar\") |", - "+----------------------------+", - "| foobar |", - "+----------------------------+", + "+------------------------------+", + "| (Utf8(\"foo\") || Utf8(\"bar\")) |", + "+------------------------------+", + "| foobar |", + "+------------------------------+", ]; assert_batches_eq!(&expected, &results); From 0981c991deb4eea63c9738c4f8147c970633a8ae Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Sat, 31 May 2025 19:24:16 +0200 Subject: [PATCH 09/12] More tests --- datafusion/expr/src/expr.rs | 2 +- datafusion/optimizer/src/common_subexpr_eliminate.rs | 2 +- datafusion/optimizer/src/push_down_filter.rs | 2 +- datafusion/substrait/tests/cases/roundtrip_logical_plan.rs | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index ebe6a0534264..d30207221224 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -3588,7 +3588,7 @@ mod test { let expr = -(lit(1) + (lit(2))); assert_eq!( format!("{}", SchemaDisplay(&expr)), - "(- (Int32(1) + (Int32(2)))" + "(- (Int32(1) + Int32(2)))" ); } diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index b10770001615..a6d61a07961f 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -1757,7 +1757,7 @@ mod test { assert_optimized_plan_equal!( plan, @ r" - Projection: __common_expr_1 AS my_udf(test.a + test.b), __common_expr_1 AS my_udf(test.b + test.a) + Projection: __common_expr_1 AS my_udf((test.a + test.b)), __common_expr_1 AS my_udf((test.b + test.a)) Projection: my_udf(test.a + test.b) AS __common_expr_1, test.a, test.b, test.c TableScan: test " diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index bbf0b0dd810e..108f9eaebe15 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -1550,7 +1550,7 @@ mod tests { fn push_agg_need_replace_expr() -> Result<()> { let plan = LogicalPlanBuilder::from(test_table_scan()?) .aggregate(vec![add(col("b"), col("a"))], vec![sum(col("a")), col("b")])? - .filter(col("test.b + test.a").gt(lit(10i64)))? + .filter(col("(test.b + test.a)").gt(lit(10i64)))? .build()?; assert_optimized_plan_equal!( plan, diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index a5aaabd9145e..75f40b464c61 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -800,7 +800,7 @@ async fn simple_intersect() -> Result<()> { check_constant("count(2)", "count(Int64(2))").await?; check_constant( "count(1 + 2)", - "count(Int64(3)) AS count(Int64(1) + Int64(2))", + "count(Int64(3)) AS count((Int64(1) + Int64(2)))", ) .await?; Ok(()) @@ -983,7 +983,7 @@ async fn simple_intersect_table_reuse() -> Result<()> { check_constant("count(2)", "count(Int64(2))").await?; check_constant( "count(1 + 2)", - "count(Int64(3)) AS count(Int64(1) + Int64(2))", + "count(Int64(3)) AS count((Int64(1) + Int64(2)))", ) .await?; From a7769e88b25e6ba39791df0b6f9df7dca89ea658 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Sat, 31 May 2025 19:25:17 +0200 Subject: [PATCH 10/12] sqllogictests --- .../test_files/agg_func_substitute.slt | 10 +-- .../sqllogictest/test_files/aggregate.slt | 2 +- datafusion/sqllogictest/test_files/binary.slt | 4 +- datafusion/sqllogictest/test_files/cte.slt | 4 +- datafusion/sqllogictest/test_files/delete.slt | 2 +- .../sqllogictest/test_files/group_by.slt | 2 +- datafusion/sqllogictest/test_files/imdb.slt | 2 +- datafusion/sqllogictest/test_files/joins.slt | 72 +++++++++---------- .../test_files/min_max/fixed_size_list.slt | 1 - .../test_files/min_max/large_list.slt | 5 -- .../sqllogictest/test_files/min_max/list.slt | 1 - .../optimizer_group_by_constant.slt | 2 +- datafusion/sqllogictest/test_files/scalar.slt | 6 +- datafusion/sqllogictest/test_files/select.slt | 26 ++++--- .../test_files/string/string_view.slt | 4 +- .../sqllogictest/test_files/subquery.slt | 26 +++---- .../sqllogictest/test_files/type_coercion.slt | 4 +- datafusion/sqllogictest/test_files/window.slt | 4 +- 18 files changed, 84 insertions(+), 93 deletions(-) diff --git a/datafusion/sqllogictest/test_files/agg_func_substitute.slt b/datafusion/sqllogictest/test_files/agg_func_substitute.slt index 9aeaaacb1071..cc32e419c8b8 100644 --- a/datafusion/sqllogictest/test_files/agg_func_substitute.slt +++ b/datafusion/sqllogictest/test_files/agg_func_substitute.slt @@ -78,16 +78,16 @@ EXPLAIN SELECT a, ARRAY_AGG(c ORDER BY c)[1 + 100] as result GROUP BY a; ---- logical_plan -01)Projection: multiple_ordered_table.a, nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] AS result -02)--Aggregate: groupBy=[[multiple_ordered_table.a]], aggr=[[nth_value(multiple_ordered_table.c, Int64(101)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] AS nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]]] +01)Projection: multiple_ordered_table.a, nth_value(multiple_ordered_table.c,(Int64(1) + Int64(100))) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] AS result +02)--Aggregate: groupBy=[[multiple_ordered_table.a]], aggr=[[nth_value(multiple_ordered_table.c, Int64(101)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] AS nth_value(multiple_ordered_table.c,(Int64(1) + Int64(100))) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]]] 03)----TableScan: multiple_ordered_table projection=[a, c] physical_plan -01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result] -02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted +01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,(Int64(1) + Int64(100))) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result] +02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,(Int64(1) + Int64(100))) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] 04)------CoalesceBatchesExec: target_batch_size=8192 05)--------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4 -06)----------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted +06)----------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,(Int64(1) + Int64(100))) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 002014827e11..eaa700a9cbef 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -6712,7 +6712,7 @@ logical_plan 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1)), count(Int64(2))]] 03)----TableScan: t projection=[] physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 * count(Int64(2))@1 as count(Int64(1)) * count(Int64(2))] +01)ProjectionExec: expr=[count(Int64(1))@0 * count(Int64(2))@1 as (count(Int64(1)) * count(Int64(2)))] 02)--AggregateExec: mode=Single, gby=[], aggr=[count(Int64(1)), count(Int64(2))] 03)----DataSourceExec: partitions=1, partition_sizes=[1] diff --git a/datafusion/sqllogictest/test_files/binary.slt b/datafusion/sqllogictest/test_files/binary.slt index 1077c32e46f3..ac2905dde331 100644 --- a/datafusion/sqllogictest/test_files/binary.slt +++ b/datafusion/sqllogictest/test_files/binary.slt @@ -181,10 +181,10 @@ query TT explain SELECT column1, column1 = X'000102' FROM t ---- logical_plan -01)Projection: t.column1, t.column1 = FixedSizeBinary(3, "0,1,2") AS t.column1 = Binary("0,1,2") +01)Projection: t.column1, t.column1 = FixedSizeBinary(3, "0,1,2") AS (t.column1 = Binary("0,1,2")) 02)--TableScan: t projection=[column1] physical_plan -01)ProjectionExec: expr=[column1@0 as column1, column1@0 = 000102 as t.column1 = Binary("0,1,2")] +01)ProjectionExec: expr=[column1@0 as column1, column1@0 = 000102 as (t.column1 = Binary("0,1,2"))] 02)--DataSourceExec: partitions=1, partition_sizes=[1] statement ok diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt index 32320a06f4fb..94cd185c331b 100644 --- a/datafusion/sqllogictest/test_files/cte.slt +++ b/datafusion/sqllogictest/test_files/cte.slt @@ -964,7 +964,7 @@ physical_plan 02)--ProjectionExec: expr=[1 as n] 03)----PlaceholderRowExec 04)--CoalescePartitionsExec -05)----ProjectionExec: expr=[n@0 + 1 as numbers.n + Int64(1)] +05)----ProjectionExec: expr=[n@0 + 1 as (numbers.n + Int64(1))] 06)------CoalesceBatchesExec: target_batch_size=8182 07)--------FilterExec: n@0 < 10 08)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 @@ -990,7 +990,7 @@ physical_plan 02)--ProjectionExec: expr=[1 as n] 03)----PlaceholderRowExec 04)--CoalescePartitionsExec -05)----ProjectionExec: expr=[n@0 + 1 as numbers.n + Int64(1)] +05)----ProjectionExec: expr=[n@0 + 1 as (numbers.n + Int64(1))] 06)------CoalesceBatchesExec: target_batch_size=8182 07)--------FilterExec: n@0 < 10 08)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 diff --git a/datafusion/sqllogictest/test_files/delete.slt b/datafusion/sqllogictest/test_files/delete.slt index d096aa9f43af..ecf3e44507d9 100644 --- a/datafusion/sqllogictest/test_files/delete.slt +++ b/datafusion/sqllogictest/test_files/delete.slt @@ -37,7 +37,7 @@ logical_plan physical_plan_error This feature is not implemented: Unsupported logical plan: Dml(Delete) -# Filtered by existing columns +# Filtered by existing columns query TT explain delete from t1 where a = 1 and b = 2 and c > 3 and d != 4; ---- diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt index 9e67018ecd0b..f8c9ec0d6890 100644 --- a/datafusion/sqllogictest/test_files/group_by.slt +++ b/datafusion/sqllogictest/test_files/group_by.slt @@ -3419,7 +3419,7 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [sn@0 ASC NULLS LAST] 02)--SortExec: expr=[sn@0 ASC NULLS LAST], preserve_partitioning=[true] -03)----ProjectionExec: expr=[sn@0 as sn, amount@1 as amount, 2 * CAST(sn@0 AS Int64) as Int64(2) * s.sn] +03)----ProjectionExec: expr=[sn@0 as sn, amount@1 as amount, 2 * CAST(sn@0 AS Int64) as (Int64(2) * s.sn)] 04)------AggregateExec: mode=FinalPartitioned, gby=[sn@0 as sn, amount@1 as amount], aggr=[] 05)--------CoalesceBatchesExec: target_batch_size=4 06)----------RepartitionExec: partitioning=Hash([sn@0, amount@1], 8), input_partitions=8 diff --git a/datafusion/sqllogictest/test_files/imdb.slt b/datafusion/sqllogictest/test_files/imdb.slt index 412e15f6800c..c17f9c47c745 100644 --- a/datafusion/sqllogictest/test_files/imdb.slt +++ b/datafusion/sqllogictest/test_files/imdb.slt @@ -1339,7 +1339,7 @@ WHERE k.keyword like '%sequel%' ---- Avengers: Endgame -# 4a - Query with certain actor names +# 4a - Query with certain actor names query TT SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index b5189d16ec24..3cf10dbe5f88 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -1491,11 +1491,11 @@ logical_plan 03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int] 04)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int] physical_plan -01)ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, t2_id@3 as t2_id, t2_name@4 as t2_name, t2_int@5 as t2_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)] +01)ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, t2_id@3 as t2_id, t2_name@4 as t2_name, t2_int@5 as t2_int, CAST(t1_id@0 AS Int64) + 11 as (join_t1.t1_id + Int64(11))] 02)--CoalesceBatchesExec: target_batch_size=2 -03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t1.t1_id + Int64(11)@3, CAST(join_t2.t2_id AS Int64)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@4, t2_name@5, t2_int@6] +03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[((join_t1.t1_id + Int64(11))@3, CAST(join_t2.t2_id AS Int64)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@4, t2_name@5, t2_int@6] 04)------CoalescePartitionsExec -05)--------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)] +05)--------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, CAST(t1_id@0 AS Int64) + 11 as (join_t1.t1_id + Int64(11))] 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 07)------------DataSourceExec: partitions=1, partition_sizes=[1] 08)------ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, CAST(t2_id@0 AS Int64) as CAST(join_t2.t2_id AS Int64)] @@ -1517,11 +1517,11 @@ logical_plan 03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int] 04)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int] physical_plan -01)ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, t2_id@3 as t2_id, t2_name@4 as t2_name, t2_int@5 as t2_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)] +01)ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, t2_id@3 as t2_id, t2_name@4 as t2_name, t2_int@5 as t2_int, CAST(t1_id@0 AS Int64) + 11 as (join_t1.t1_id + Int64(11))] 02)--CoalesceBatchesExec: target_batch_size=2 -03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t1.t1_id + Int64(11)@3, CAST(join_t2.t2_id AS Int64)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@4, t2_name@5, t2_int@6] +03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[((join_t1.t1_id + Int64(11))@3, CAST(join_t2.t2_id AS Int64)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@4, t2_name@5, t2_int@6] 04)------CoalescePartitionsExec -05)--------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)] +05)--------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, CAST(t1_id@0 AS Int64) + 11 as (join_t1.t1_id + Int64(11))] 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 07)------------DataSourceExec: partitions=1, partition_sizes=[1] 08)------ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, CAST(t2_id@0 AS Int64) as CAST(join_t2.t2_id AS Int64)] @@ -1547,12 +1547,12 @@ logical_plan physical_plan 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name] 02)--CoalesceBatchesExec: target_batch_size=2 -03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id + UInt32(1)@1, join_t1.t1_id + UInt32(12)@2)], projection=[t2_id@0, t1_id@2, t1_name@3] +03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[((join_t2.t2_id + UInt32(1))@1, (join_t1.t1_id + UInt32(12))@2)], projection=[t2_id@0, t1_id@2, t1_name@3] 04)------CoalescePartitionsExec -05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 + 1 as join_t2.t2_id + UInt32(1)] +05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 + 1 as (join_t2.t2_id + UInt32(1))] 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 07)------------DataSourceExec: partitions=1, partition_sizes=[1] -08)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as join_t1.t1_id + UInt32(12)] +08)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as (join_t1.t1_id + UInt32(12))] 09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 10)----------DataSourceExec: partitions=1, partition_sizes=[1] @@ -1573,12 +1573,12 @@ logical_plan physical_plan 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name] 02)--CoalesceBatchesExec: target_batch_size=2 -03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id + UInt32(1)@1, join_t1.t1_id + UInt32(12)@2)], projection=[t2_id@0, t1_id@2, t1_name@3] +03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[((join_t2.t2_id + UInt32(1))@1, (join_t1.t1_id + UInt32(12))@2)], projection=[t2_id@0, t1_id@2, t1_name@3] 04)------CoalescePartitionsExec -05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 + 1 as join_t2.t2_id + UInt32(1)] +05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 + 1 as (join_t2.t2_id + UInt32(1))] 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 07)------------DataSourceExec: partitions=1, partition_sizes=[1] -08)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as join_t1.t1_id + UInt32(12)] +08)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as (join_t1.t1_id + UInt32(12))] 09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 10)----------DataSourceExec: partitions=1, partition_sizes=[1] @@ -1602,9 +1602,9 @@ logical_plan physical_plan 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name] 02)--CoalesceBatchesExec: target_batch_size=2 -03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t2_id@0, join_t1.t1_id + UInt32(11)@2)], projection=[t2_id@0, t1_id@1, t1_name@2] +03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t2_id@0, (join_t1.t1_id + UInt32(11))@2)], projection=[t2_id@0, t1_id@1, t1_name@2] 04)------DataSourceExec: partitions=1, partition_sizes=[1] -05)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as join_t1.t1_id + UInt32(11)] +05)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as (join_t1.t1_id + UInt32(11))] 06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 07)----------DataSourceExec: partitions=1, partition_sizes=[1] @@ -1626,9 +1626,9 @@ logical_plan physical_plan 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name] 02)--CoalesceBatchesExec: target_batch_size=2 -03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t2_id@0, join_t1.t1_id + UInt32(11)@2)], projection=[t2_id@0, t1_id@1, t1_name@2] +03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t2_id@0, (join_t1.t1_id + UInt32(11))@2)], projection=[t2_id@0, t1_id@1, t1_name@2] 04)------DataSourceExec: partitions=1, partition_sizes=[1] -05)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as join_t1.t1_id + UInt32(11)] +05)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as (join_t1.t1_id + UInt32(11))] 06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 07)----------DataSourceExec: partitions=1, partition_sizes=[1] @@ -1652,9 +1652,9 @@ logical_plan physical_plan 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name] 02)--CoalesceBatchesExec: target_batch_size=2 -03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id - UInt32(11)@1, t1_id@0)], projection=[t2_id@0, t1_id@2, t1_name@3] +03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[((join_t2.t2_id - UInt32(11))@1, t1_id@0)], projection=[t2_id@0, t1_id@2, t1_name@3] 04)------CoalescePartitionsExec -05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)] +05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 - 11 as (join_t2.t2_id - UInt32(11))] 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 07)------------DataSourceExec: partitions=1, partition_sizes=[1] 08)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 @@ -1678,9 +1678,9 @@ logical_plan physical_plan 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name] 02)--CoalesceBatchesExec: target_batch_size=2 -03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id - UInt32(11)@1, t1_id@0)], projection=[t2_id@0, t1_id@2, t1_name@3] +03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[((join_t2.t2_id - UInt32(11))@1, t1_id@0)], projection=[t2_id@0, t1_id@2, t1_name@3] 04)------CoalescePartitionsExec -05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)] +05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 - 11 as (join_t2.t2_id - UInt32(11))] 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 07)------------DataSourceExec: partitions=1, partition_sizes=[1] 08)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 @@ -1704,9 +1704,9 @@ logical_plan 03)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int] physical_plan 01)CoalesceBatchesExec: target_batch_size=2 -02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, join_t2.t2_id - UInt32(11)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@3, t2_name@4, t2_int@5] +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, (join_t2.t2_id - UInt32(11))@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@3, t2_name@4, t2_int@5] 03)----DataSourceExec: partitions=1, partition_sizes=[1] -04)----ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)] +04)----ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as (join_t2.t2_id - UInt32(11))] 05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 06)--------DataSourceExec: partitions=1, partition_sizes=[1] @@ -1726,9 +1726,9 @@ logical_plan 03)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int] physical_plan 01)CoalesceBatchesExec: target_batch_size=2 -02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, join_t2.t2_id - UInt32(11)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@3, t2_name@4, t2_int@5] +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, (join_t2.t2_id - UInt32(11))@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@3, t2_name@4, t2_int@5] 03)----DataSourceExec: partitions=1, partition_sizes=[1] -04)----ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)] +04)----ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as (join_t2.t2_id - UInt32(11))] 05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 06)--------DataSourceExec: partitions=1, partition_sizes=[1] @@ -1831,7 +1831,7 @@ from join_t1 where join_t1.t1_id + 12 in (select join_t2.t2_id + 1 from join_t2) ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) +01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.(join_t2.t2_id + Int64(1)) 02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] 03)--SubqueryAlias: __correlated_sq_1 04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) @@ -1860,7 +1860,7 @@ where join_t1.t1_id + 12 in ) ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int +01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.(join_t2.t2_id + Int64(1)) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int 02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] 03)--SubqueryAlias: __correlated_sq_1 04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int @@ -1896,7 +1896,7 @@ where join_t1.t1_id + 12 in ) ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int AND join_t1.t1_name != __correlated_sq_1.t2_name +01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.(join_t2.t2_id + Int64(1)) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int AND join_t1.t1_name != __correlated_sq_1.t2_name 02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] 03)--SubqueryAlias: __correlated_sq_1 04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name @@ -1928,7 +1928,7 @@ where join_t1.t1_id + 12 in (select join_t2.t2_id + 1 from join_t2 where join_t1.t1_int > 0) ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) +01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.(join_t2.t2_id + Int64(1)) 02)--Filter: join_t1.t1_int > UInt32(0) 03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int] 04)--SubqueryAlias: __correlated_sq_1 @@ -1945,7 +1945,7 @@ where join_t1.t1_id + 12 not in (select join_t2.t2_id + 1 from join_t2 where join_t1.t1_int > 0) ---- logical_plan -01)LeftAnti Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int > UInt32(0) +01)LeftAnti Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.(join_t2.t2_id + Int64(1)) Filter: join_t1.t1_int > UInt32(0) 02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] 03)--SubqueryAlias: __correlated_sq_1 04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) @@ -1967,7 +1967,7 @@ where join_t1.t1_id + 12 in and join_t1.t1_id > 0 ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int AND join_t1.t1_name != __correlated_sq_1.t2_name +01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.(join_t2.t2_id + Int64(1)) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int AND join_t1.t1_name != __correlated_sq_1.t2_name 02)--Filter: join_t1.t1_id > UInt32(0) 03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int] 04)--SubqueryAlias: __correlated_sq_1 @@ -2000,8 +2000,8 @@ where join_t1.t1_id + 12 in (select join_t2.t2_id + 1 from join_t2) and join_t1.t1_id > 0 ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_int AS Int64) = __correlated_sq_2.join_t2.t2_int + Int64(1) -02)--LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) +01)LeftSemi Join: CAST(join_t1.t1_int AS Int64) = __correlated_sq_2.(join_t2.t2_int + Int64(1)) +02)--LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.(join_t2.t2_id + Int64(1)) 03)----Filter: join_t1.t1_id > UInt32(0) 04)------TableScan: join_t1 projection=[t1_id, t1_name, t1_int] 05)----SubqueryAlias: __correlated_sq_1 @@ -2379,7 +2379,7 @@ logical_plan 02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] 03)--SubqueryAlias: __correlated_sq_1 04)----Projection: join_t2.t2_id -05)------Aggregate: groupBy=[[join_t2.t2_id + join_t2.t2_int, join_t2.t2_int, join_t2.t2_id]], aggr=[[]] +05)------Aggregate: groupBy=[[(join_t2.t2_id + join_t2.t2_int), join_t2.t2_int, join_t2.t2_id]], aggr=[[]] 06)--------Projection: join_t2.t2_id + join_t2.t2_int, join_t2.t2_int, join_t2.t2_id 07)----------TableScan: join_t2 projection=[t2_id, t2_int] @@ -4731,10 +4731,10 @@ logical_plan 03)--TableScan: orders projection=[customer_id] physical_plan 01)CoalesceBatchesExec: target_batch_size=3 -02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(person.id + Int64(10)@1, orders.customer_id * Int64(2)@1)], projection=[id@0, customer_id@2] -03)----ProjectionExec: expr=[id@0 as id, CAST(id@0 AS Int64) + 10 as person.id + Int64(10)] +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[((person.id + Int64(10))@1, (orders.customer_id * Int64(2))@1)], projection=[id@0, customer_id@2] +03)----ProjectionExec: expr=[id@0 as id, CAST(id@0 AS Int64) + 10 as (person.id + Int64(10))] 04)------DataSourceExec: partitions=1, partition_sizes=[0] -05)----ProjectionExec: expr=[customer_id@0 as customer_id, CAST(customer_id@0 AS Int64) * 2 as orders.customer_id * Int64(2)] +05)----ProjectionExec: expr=[customer_id@0 as customer_id, CAST(customer_id@0 AS Int64) * 2 as (orders.customer_id * Int64(2))] 06)------DataSourceExec: partitions=1, partition_sizes=[0] statement count 0 diff --git a/datafusion/sqllogictest/test_files/min_max/fixed_size_list.slt b/datafusion/sqllogictest/test_files/min_max/fixed_size_list.slt index 164daec228aa..aa623b63cdc7 100644 --- a/datafusion/sqllogictest/test_files/min_max/fixed_size_list.slt +++ b/datafusion/sqllogictest/test_files/min_max/fixed_size_list.slt @@ -131,4 +131,3 @@ SELECT max(column2) OVER (ORDER BY column1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOW ---- [4, 5] [4, 5] - diff --git a/datafusion/sqllogictest/test_files/min_max/large_list.slt b/datafusion/sqllogictest/test_files/min_max/large_list.slt index 0dd7b5631bdc..44789e9dd786 100644 --- a/datafusion/sqllogictest/test_files/min_max/large_list.slt +++ b/datafusion/sqllogictest/test_files/min_max/large_list.slt @@ -141,8 +141,3 @@ create table max_window_different_column as (select from max_base_window_different_column); include ./queries.slt.part - - - - - diff --git a/datafusion/sqllogictest/test_files/min_max/list.slt b/datafusion/sqllogictest/test_files/min_max/list.slt index da56b1a8ea02..e63e8303c7d5 100644 --- a/datafusion/sqllogictest/test_files/min_max/list.slt +++ b/datafusion/sqllogictest/test_files/min_max/list.slt @@ -130,4 +130,3 @@ create table max_window_different_column as ( ; include ./queries.slt.part - diff --git a/datafusion/sqllogictest/test_files/optimizer_group_by_constant.slt b/datafusion/sqllogictest/test_files/optimizer_group_by_constant.slt index de6a153f58d9..57d97982481f 100644 --- a/datafusion/sqllogictest/test_files/optimizer_group_by_constant.slt +++ b/datafusion/sqllogictest/test_files/optimizer_group_by_constant.slt @@ -48,7 +48,7 @@ FROM test_table t GROUP BY 1, 2, 3, 4 ---- logical_plan -01)Projection: t.c1, Int64(99999), t.c5 + t.c8, Utf8("test"), count(Int64(1)) +01)Projection: t.c1, Int64(99999), (t.c5 + t.c8), Utf8("test"), count(Int64(1)) 02)--Aggregate: groupBy=[[t.c1, t.c5 + t.c8]], aggr=[[count(Int64(1))]] 03)----SubqueryAlias: t 04)------TableScan: test_table projection=[c1, c5, c8] diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index f583d659fd4f..7c9cb55c519a 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -1832,10 +1832,10 @@ query TT EXPLAIN SELECT letter, letter = LEFT('APACHE', 1) FROM simple_string; ---- logical_plan -01)Projection: simple_string.letter, simple_string.letter = Utf8("A") AS simple_string.letter = left(Utf8("APACHE"),Int64(1)) +01)Projection: simple_string.letter, simple_string.letter = Utf8("A") AS (simple_string.letter = left(Utf8("APACHE"),Int64(1))) 02)--TableScan: simple_string projection=[letter] physical_plan -01)ProjectionExec: expr=[letter@0 as letter, letter@0 = A as simple_string.letter = left(Utf8("APACHE"),Int64(1))] +01)ProjectionExec: expr=[letter@0 as letter, letter@0 = A as (simple_string.letter = left(Utf8("APACHE"),Int64(1)))] 02)--DataSourceExec: partitions=1, partition_sizes=[1] query TB @@ -1854,7 +1854,7 @@ logical_plan 01)Projection: simple_string.letter, simple_string.letter = left(simple_string.letter2, Int64(1)) 02)--TableScan: simple_string projection=[letter, letter2] physical_plan -01)ProjectionExec: expr=[letter@0 as letter, letter@0 = left(letter2@1, 1) as simple_string.letter = left(simple_string.letter2,Int64(1))] +01)ProjectionExec: expr=[letter@0 as letter, letter@0 = left(letter2@1, 1) as (simple_string.letter = left(simple_string.letter2,Int64(1)))] 02)--DataSourceExec: partitions=1, partition_sizes=[1] query TB diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index aa14faf984e4..ab7ee13bbe37 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -435,7 +435,7 @@ physical_plan DataSourceExec: partitions=1, partition_sizes=[1] query TT EXPLAIN VALUES (('1'||'2')::int unsigned) ---- -logical_plan Values: (UInt32(12) AS Utf8("1") || Utf8("2")) +logical_plan Values: (UInt32(12) AS (Utf8("1") || Utf8("2"))) physical_plan DataSourceExec: partitions=1, partition_sizes=[1] @@ -1403,7 +1403,7 @@ logical_plan 03)----TableScan: annotated_data_finite2 projection=[a, b] physical_plan 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST] -02)--ProjectionExec: expr=[a@0 as a, a@0 + b@1 as annotated_data_finite2.a + annotated_data_finite2.b] +02)--ProjectionExec: expr=[a@0 as a, a@0 + b@1 as (annotated_data_finite2.a + annotated_data_finite2.b)] 03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true @@ -1598,11 +1598,11 @@ query TT EXPLAIN SELECT x/2, x/2+1 FROM t; ---- logical_plan -01)Projection: __common_expr_1 AS t.x / Int64(2), __common_expr_1 AS t.x / Int64(2) + Int64(1) +01)Projection: __common_expr_1 AS (t.x / Int64(2)), __common_expr_1 AS (t.x / Int64(2)) + Int64(1) 02)--Projection: t.x / Int64(2) AS __common_expr_1 03)----TableScan: t projection=[x] physical_plan -01)ProjectionExec: expr=[__common_expr_1@0 as t.x / Int64(2), __common_expr_1@0 + 1 as t.x / Int64(2) + Int64(1)] +01)ProjectionExec: expr=[__common_expr_1@0 as (t.x / Int64(2)), __common_expr_1@0 + 1 as ((t.x / Int64(2)) + Int64(1))] 02)--ProjectionExec: expr=[x@0 / 2 as __common_expr_1] 03)----DataSourceExec: partitions=1, partition_sizes=[1] @@ -1620,7 +1620,7 @@ logical_plan 02)--Projection: abs(t.x) AS __common_expr_1, t.y 03)----TableScan: t projection=[x, y] physical_plan -01)ProjectionExec: expr=[__common_expr_1@0 as abs(t.x), __common_expr_1@0 + abs(y@1) as abs(t.x) + abs(t.y)] +01)ProjectionExec: expr=[__common_expr_1@0 as abs(t.x), __common_expr_1@0 + abs(y@1) as (abs(t.x) + abs(t.y))] 02)--ProjectionExec: expr=[abs(x@0) as __common_expr_1, y@1 as y] 03)----DataSourceExec: partitions=1, partition_sizes=[1] @@ -1659,18 +1659,18 @@ logical_plan 01)Projection: coalesce(Int64(1), CAST(t.y / t.x AS Int64)), coalesce(Int64(2), CAST(t.y / t.x AS Int64)) 02)--TableScan: t projection=[x, y] physical_plan -01)ProjectionExec: expr=[coalesce(1, CAST(y@1 / x@0 AS Int64)) as coalesce(Int64(1),t.y / t.x), coalesce(2, CAST(y@1 / x@0 AS Int64)) as coalesce(Int64(2),t.y / t.x)] +01)ProjectionExec: expr=[coalesce(1, CAST(y@1 / x@0 AS Int64)) as coalesce(Int64(1),(t.y / t.x)), coalesce(2, CAST(y@1 / x@0 AS Int64)) as coalesce(Int64(2),(t.y / t.x))] 02)--DataSourceExec: partitions=1, partition_sizes=[1] query TT EXPLAIN SELECT y > 0 and 1 / y < 1, x > 0 and y > 0 and 1 / y < 1 / x from t; ---- logical_plan -01)Projection: __common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y > Int64(0) AND Int64(1) / t.y < Int64(1), t.x > Int32(0) AND __common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x +01)Projection: __common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS ((t.y > Int64(0)) AND ((Int64(1) / t.y) < Int64(1))), t.x > Int32(0) AND __common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS (((t.x > Int64(0)) AND (t.y > Int64(0))) AND ((Int64(1) / t.y) < (Int64(1) / t.x))) 02)--Projection: t.y > Int32(0) AS __common_expr_1, t.x, t.y 03)----TableScan: t projection=[x, y] physical_plan -01)ProjectionExec: expr=[__common_expr_1@0 AND 1 / CAST(y@2 AS Int64) < 1 as t.y > Int64(0) AND Int64(1) / t.y < Int64(1), x@1 > 0 AND __common_expr_1@0 AND 1 / CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x] +01)ProjectionExec: expr=[__common_expr_1@0 AND 1 / CAST(y@2 AS Int64) < 1 as ((t.y > Int64(0)) AND ((Int64(1) / t.y) < Int64(1))), x@1 > 0 AND __common_expr_1@0 AND 1 / CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as (((t.x > Int64(0)) AND (t.y > Int64(0))) AND ((Int64(1) / t.y) < (Int64(1) / t.x)))] 02)--ProjectionExec: expr=[y@1 > 0 as __common_expr_1, x@0 as x, y@1 as y] 03)----DataSourceExec: partitions=1, partition_sizes=[1] @@ -1678,11 +1678,11 @@ query TT EXPLAIN SELECT y = 0 or 1 / y < 1, x = 0 or y = 0 or 1 / y < 1 / x from t; ---- logical_plan -01)Projection: __common_expr_1 OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y = Int64(0) OR Int64(1) / t.y < Int64(1), t.x = Int32(0) OR __common_expr_1 OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x +01)Projection: __common_expr_1 OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS ((t.y = Int64(0)) OR ((Int64(1) / t.y) < Int64(1))), t.x = Int32(0) OR __common_expr_1 OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS (((t.x = Int64(0)) OR (t.y = Int64(0))) OR ((Int64(1) / t.y) < (Int64(1) / t.x))) 02)--Projection: t.y = Int32(0) AS __common_expr_1, t.x, t.y 03)----TableScan: t projection=[x, y] physical_plan -01)ProjectionExec: expr=[__common_expr_1@0 OR 1 / CAST(y@2 AS Int64) < 1 as t.y = Int64(0) OR Int64(1) / t.y < Int64(1), x@1 = 0 OR __common_expr_1@0 OR 1 / CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x] +01)ProjectionExec: expr=[__common_expr_1@0 OR 1 / CAST(y@2 AS Int64) < 1 as ((t.y = Int64(0)) OR ((Int64(1) / t.y) < Int64(1))), x@1 = 0 OR __common_expr_1@0 OR 1 / CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as (((t.x = Int64(0)) OR (t.y = Int64(0))) OR ((Int64(1) / t.y) < (Int64(1) / t.x)))] 02)--ProjectionExec: expr=[y@1 = 0 as __common_expr_1, x@0 as x, y@1 as y] 03)----DataSourceExec: partitions=1, partition_sizes=[1] @@ -1801,13 +1801,11 @@ select a + b from (select 1 as a, 2 as b, 1 as "a + b"); 3 # Can't reference an output column by expression over projection. -query error DataFusion error: Schema error: No field named a\. Valid fields are "a \+ Int64\(1\)"\. +query error DataFusion error: Schema error: No field named a\. Valid fields are "\(a \+ Int64\(1\)\)"\. select a + 1 from (select a+1 from (select 1 as a)); -query I +query error DataFusion error: Schema error: No field named "a \+ Int64\(1\)"\. Did you mean '\(a \+ Int64\(1\)\)'\?\. select "a + Int64(1)" + 10 from (select a+1 from (select 1 as a)); ----- -12 # run below query without logical optimizations statement ok diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index a72c8f574484..64f62cb7ade3 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -1164,7 +1164,7 @@ query TT explain select column2 || 'is' || column3 from temp; ---- logical_plan -01)Projection: temp.column2 || Utf8View("is") || temp.column3 AS temp.column2 || Utf8("is") || temp.column3 +01)Projection: temp.column2 || Utf8View("is") || temp.column3 AS ((temp.column2 || Utf8("is")) || temp.column3) 02)--TableScan: temp projection=[column2, column3] # should not cast the column2 to utf8 @@ -1172,7 +1172,7 @@ query TT explain select column2||' is fast' from temp; ---- logical_plan -01)Projection: temp.column2 || Utf8View(" is fast") AS temp.column2 || Utf8(" is fast") +01)Projection: temp.column2 || Utf8View(" is fast") AS (temp.column2 || Utf8(" is fast")) 02)--TableScan: temp projection=[column2] query TT diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt index 796570633f67..fe155ad076c3 100644 --- a/datafusion/sqllogictest/test_files/subquery.slt +++ b/datafusion/sqllogictest/test_files/subquery.slt @@ -227,23 +227,23 @@ query TT explain SELECT t1_id, (SELECT sum(t2_int * 1.0) + 1 FROM t2 WHERE t2.t2_id = t1.t1_id) as t2_sum from t1 ---- logical_plan -01)Projection: t1.t1_id, __scalar_sq_1.sum(t2.t2_int * Float64(1)) + Int64(1) AS t2_sum +01)Projection: t1.t1_id, __scalar_sq_1.(sum((t2.t2_int * Float64(1))) + Int64(1)) AS t2_sum 02)--Left Join: t1.t1_id = __scalar_sq_1.t2_id 03)----TableScan: t1 projection=[t1_id] 04)----SubqueryAlias: __scalar_sq_1 -05)------Projection: sum(t2.t2_int * Float64(1)) + Float64(1) AS sum(t2.t2_int * Float64(1)) + Int64(1), t2.t2_id -06)--------Aggregate: groupBy=[[t2.t2_id]], aggr=[[sum(CAST(t2.t2_int AS Float64)) AS sum(t2.t2_int * Float64(1))]] +05)------Projection: sum((t2.t2_int * Float64(1))) + Float64(1) AS (sum((t2.t2_int * Float64(1))) + Int64(1)), t2.t2_id +06)--------Aggregate: groupBy=[[t2.t2_id]], aggr=[[sum(CAST(t2.t2_int AS Float64)) AS sum((t2.t2_int * Float64(1)))]] 07)----------TableScan: t2 projection=[t2_id, t2_int] physical_plan -01)ProjectionExec: expr=[t1_id@1 as t1_id, sum(t2.t2_int * Float64(1)) + Int64(1)@0 as t2_sum] +01)ProjectionExec: expr=[t1_id@1 as t1_id, (sum((t2.t2_int * Float64(1))) + Int64(1))@0 as t2_sum] 02)--CoalesceBatchesExec: target_batch_size=2 -03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(t2_id@1, t1_id@0)], projection=[sum(t2.t2_int * Float64(1)) + Int64(1)@0, t1_id@2] +03)----HashJoinExec: mode=CollectLeft, join_type=Right, on=[(t2_id@1, t1_id@0)], projection=[(sum((t2.t2_int * Float64(1))) + Int64(1))@0, t1_id@2] 04)------CoalescePartitionsExec -05)--------ProjectionExec: expr=[sum(t2.t2_int * Float64(1))@1 + 1 as sum(t2.t2_int * Float64(1)) + Int64(1), t2_id@0 as t2_id] -06)----------AggregateExec: mode=FinalPartitioned, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int * Float64(1))] +05)--------ProjectionExec: expr=[sum((t2.t2_int * Float64(1)))@1 + 1 as (sum((t2.t2_int * Float64(1))) + Int64(1)), t2_id@0 as t2_id] +06)----------AggregateExec: mode=FinalPartitioned, gby=[t2_id@0 as t2_id], aggr=[sum((t2.t2_int * Float64(1)))] 07)------------CoalesceBatchesExec: target_batch_size=2 08)--------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4 -09)----------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int * Float64(1))] +09)----------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum((t2.t2_int * Float64(1)))] 10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 11)--------------------DataSourceExec: partitions=1, partition_sizes=[1] 12)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 @@ -535,7 +535,7 @@ logical_plan 01)Projection: t1.t1_id, t1.t1_name 02)--Filter: EXISTS () 03)----Subquery: -04)------Projection: sum(outer_ref(t1.t1_int) + t2.t2_id) +04)------Projection: sum((outer_ref(t1.t1_int) + t2.t2_id)) 05)--------Aggregate: groupBy=[[]], aggr=[[sum(CAST(outer_ref(t1.t1_int) + t2.t2_id AS Int64))]] 06)----------Filter: outer_ref(t1.t1_name) = t2.t2_name 07)------------TableScan: t2 @@ -550,7 +550,7 @@ logical_plan 02)--Filter: EXISTS () 03)----Subquery: 04)------Projection: count(Int64(1)) AS count(*) -05)--------Filter: sum(outer_ref(t1.t1_int) + t2.t2_id) > Int64(0) +05)--------Filter: sum((outer_ref(t1.t1_int) + t2.t2_id)) > Int64(0) 06)----------Aggregate: groupBy=[[]], aggr=[[count(Int64(1)), sum(CAST(outer_ref(t1.t1_int) + t2.t2_id AS Int64))]] 07)------------Filter: outer_ref(t1.t1_name) = t2.t2_name 08)--------------TableScan: t2 @@ -1127,7 +1127,7 @@ where t1.t1_id = 11 or t1.t1_id + 12 not in (select t2.t2_id + 1 from t2 where t logical_plan 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int 02)--Filter: t1.t1_id = Int32(11) OR NOT __correlated_sq_1.mark -03)----LeftMark Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) Filter: t1.t1_int > Int32(0) +03)----LeftMark Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.(t2.t2_id + Int64(1)) Filter: t1.t1_int > Int32(0) 04)------TableScan: t1 projection=[t1_id, t1_name, t1_int] 05)------SubqueryAlias: __correlated_sq_1 06)--------Projection: CAST(t2.t2_id AS Int64) + Int64(1) @@ -1316,7 +1316,7 @@ query TT explain select a/2, a/2 + 1 from t ---- logical_plan -01)Projection: __common_expr_1 AS t.a / Int64(2), __common_expr_1 AS t.a / Int64(2) + Int64(1) +01)Projection: __common_expr_1 AS (t.a / Int64(2)), __common_expr_1 AS (t.a / Int64(2)) + Int64(1) 02)--Projection: t.a / Int64(2) AS __common_expr_1 03)----TableScan: t projection=[a] @@ -1327,7 +1327,7 @@ query TT explain select a/2, a/2 + 1 from t ---- logical_plan -01)Projection: __common_expr_1 AS t.a / Int64(2), __common_expr_1 AS t.a / Int64(2) + Int64(1) +01)Projection: __common_expr_1 AS (t.a / Int64(2)), __common_expr_1 AS (t.a / Int64(2)) + Int64(1) 02)--Projection: t.a / Int64(2) AS __common_expr_1 03)----TableScan: t projection=[a] diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt index 2c6079bc7039..eee4c6e46477 100644 --- a/datafusion/sqllogictest/test_files/type_coercion.slt +++ b/datafusion/sqllogictest/test_files/type_coercion.slt @@ -208,12 +208,12 @@ EXPLAIN SELECT cast(0.0 + a as integer) FROM (select 1 a) x GROUP BY 1 ---- logical_plan 01)Union -02)--Projection: CAST(Float64(0) + x.a AS Float64) AS Float64(0) + x.a +02)--Projection: CAST((Float64(0) + x.a) AS Float64) AS (Float64(0) + x.a) 03)----Aggregate: groupBy=[[CAST(Float64(0) + CAST(x.a AS Float64) AS Int32)]], aggr=[[]] 04)------SubqueryAlias: x 05)--------Projection: Int64(1) AS a 06)----------EmptyRelation -07)--Projection: Float64(2.1) + x.a AS Float64(0) + x.a +07)--Projection: (Float64(2.1) + x.a) AS (Float64(0) + x.a) 08)----Aggregate: groupBy=[[Float64(2.1) + CAST(x.a AS Float64)]], aggr=[[]] 09)------SubqueryAlias: x 10)--------Projection: Int64(1) AS a diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index c68385c49b48..d8934a39d570 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1719,9 +1719,9 @@ EXPLAIN SELECT c3, logical_plan 01)Projection: aggregate_test_100.c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum2 02)--Limit: skip=0, fetch=5 -03)----WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [__common_expr_1 AS aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +03)----WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [__common_expr_1 AS (aggregate_test_100.c3 + aggregate_test_100.c4) ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] 04)------Projection: __common_expr_1, aggregate_test_100.c3, aggregate_test_100.c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW -05)--------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [__common_expr_1 AS aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +05)--------WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [__common_expr_1 AS (aggregate_test_100.c3 + aggregate_test_100.c4) DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] 06)----------Projection: aggregate_test_100.c3 + aggregate_test_100.c4 AS __common_expr_1, aggregate_test_100.c2, aggregate_test_100.c3, aggregate_test_100.c9 07)------------TableScan: aggregate_test_100 projection=[c2, c3, c4, c9] physical_plan From 6d85f17144c89efbc82355fcaa18d55978a6c5e0 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Wed, 4 Jun 2025 08:41:04 +0200 Subject: [PATCH 11/12] Adjust example --- .../examples/date_time_functions.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/datafusion-examples/examples/date_time_functions.rs b/datafusion-examples/examples/date_time_functions.rs index dbe9970439df..e15296ca8637 100644 --- a/datafusion-examples/examples/date_time_functions.rs +++ b/datafusion-examples/examples/date_time_functions.rs @@ -122,15 +122,15 @@ async fn query_make_date() -> Result<()> { let df = ctx.sql("select make_date(y + 1, m, d) from t").await?; let expected = [ - "+-----------------------------------+", - "| make_date(t.y + Int64(1),t.m,t.d) |", - "+-----------------------------------+", - "| 2021-01-15 |", - "| 2022-02-16 |", - "| 2023-03-17 |", - "| 2024-04-18 |", - "| 2025-05-19 |", - "+-----------------------------------+", + "+-------------------------------------+", + "| make_date((t.y + Int64(1)),t.m,t.d) |", + "+-------------------------------------+", + "| 2021-01-15 |", + "| 2022-02-16 |", + "| 2023-03-17 |", + "| 2024-04-18 |", + "| 2025-05-19 |", + "+-------------------------------------+", ]; assert_batches_eq!(expected, &df.collect().await?); From eab812a9fc87cc975716b2fdefea032d7f388d60 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Wed, 4 Jun 2025 09:38:12 +0200 Subject: [PATCH 12/12] Adjust TPC-H --- .../test_files/tpch/plans/q1.slt.part | 10 +++++----- .../test_files/tpch/plans/q10.slt.part | 10 +++++----- .../test_files/tpch/plans/q11.slt.part | 20 +++++++++---------- .../test_files/tpch/plans/q12.slt.part | 8 ++++---- .../test_files/tpch/plans/q14.slt.part | 10 +++++----- .../test_files/tpch/plans/q15.slt.part | 20 +++++++++---------- .../test_files/tpch/plans/q17.slt.part | 6 +++--- .../test_files/tpch/plans/q19.slt.part | 10 +++++----- .../test_files/tpch/plans/q20.slt.part | 6 +++--- .../test_files/tpch/plans/q3.slt.part | 10 +++++----- .../test_files/tpch/plans/q5.slt.part | 10 +++++----- .../test_files/tpch/plans/q6.slt.part | 8 ++++---- .../test_files/tpch/plans/q8.slt.part | 10 +++++----- 13 files changed, 69 insertions(+), 69 deletions(-) diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part index 4a6ad5eddfb7..a30016f3d8b2 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part @@ -41,19 +41,19 @@ explain select ---- logical_plan 01)Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST -02)--Projection: lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity) AS sum_qty, sum(lineitem.l_extendedprice) AS sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge, avg(lineitem.l_quantity) AS avg_qty, avg(lineitem.l_extendedprice) AS avg_price, avg(lineitem.l_discount) AS avg_disc, count(Int64(1)) AS count(*) AS count_order -03)----Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(__common_expr_1 * (Decimal128(Some(1),20,0) + lineitem.l_tax)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1))]] +02)--Projection: lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity) AS sum_qty, sum(lineitem.l_extendedprice) AS sum_base_price, sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))) AS sum_disc_price, sum(((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)) * (Int64(1) + lineitem.l_tax))) AS sum_charge, avg(lineitem.l_quantity) AS avg_qty, avg(lineitem.l_extendedprice) AS avg_price, avg(lineitem.l_discount) AS avg_disc, count(Int64(1)) AS count(*) AS count_order +03)----Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(__common_expr_1) AS sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))), sum(__common_expr_1 * (Decimal128(Some(1),20,0) + lineitem.l_tax)) AS sum(((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)) * (Int64(1) + lineitem.l_tax))), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1))]] 04)------Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus 05)--------Filter: lineitem.l_shipdate <= Date32("1998-09-02") 06)----------TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], partial_filters=[lineitem.l_shipdate <= Date32("1998-09-02")] physical_plan 01)SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST, l_linestatus@1 ASC NULLS LAST] 02)--SortExec: expr=[l_returnflag@0 ASC NULLS LAST, l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] -03)----ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(Int64(1))@9 as count_order] -04)------AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1))] +03)----ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))@4 as sum_disc_price, sum(((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)) * (Int64(1) + lineitem.l_tax)))@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(Int64(1))@9 as count_order] +04)------AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)) * (Int64(1) + lineitem.l_tax))), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1))] 05)--------CoalesceBatchesExec: target_batch_size=8192 06)----------RepartitionExec: partitioning=Hash([l_returnflag@0, l_linestatus@1], 4), input_partitions=4 -07)------------AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1))] +07)------------AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))), sum(((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)) * (Int64(1) + lineitem.l_tax))), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1))] 08)--------------ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_1, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] 09)----------------CoalesceBatchesExec: target_batch_size=8192 10)------------------FilterExec: l_shipdate@6 <= 1998-09-02, projection=[l_quantity@0, l_extendedprice@1, l_discount@2, l_tax@3, l_returnflag@4, l_linestatus@5] diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part index fee496f92055..4280f608bf94 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part @@ -52,8 +52,8 @@ limit 10; ---- logical_plan 01)Sort: revenue DESC NULLS FIRST, fetch=10 -02)--Projection: customer.c_custkey, customer.c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment -03)----Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] +02)--Projection: customer.c_custkey, customer.c_name, sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment +03)----Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))]] 04)------Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name 05)--------Inner Join: customer.c_nationkey = nation.n_nationkey 06)----------Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount @@ -71,11 +71,11 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [revenue@2 DESC], fetch=10 02)--SortExec: TopK(fetch=10), expr=[revenue@2 DESC], preserve_partitioning=[true] -03)----ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] -04)------AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +03)----ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] +04)------AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 05)--------CoalesceBatchesExec: target_batch_size=8192 06)----------RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4 -07)------------AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +07)------------AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 08)--------------CoalesceBatchesExec: target_batch_size=8192 09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@3, n_nationkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@7, l_discount@8, n_name@10] 10)------------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part index 1dba8c053720..9c0c48c78cdf 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part @@ -48,8 +48,8 @@ limit 10; ---- logical_plan 01)Sort: value DESC NULLS FIRST, fetch=10 -02)--Projection: partsupp.ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS value -03)----Inner Join: Filter: CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001) +02)--Projection: partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty)) AS value +03)----Inner Join: Filter: CAST(sum((partsupp.ps_supplycost * partsupp.ps_availqty)) AS Decimal128(38, 15)) > __scalar_sq_1.(sum((partsupp.ps_supplycost * partsupp.ps_availqty)) * Float64(0.0001)) 04)------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]] 05)--------Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost 06)----------Inner Join: supplier.s_nationkey = nation.n_nationkey @@ -61,7 +61,7 @@ logical_plan 12)--------------Filter: nation.n_name = Utf8("GERMANY") 13)----------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")] 14)------SubqueryAlias: __scalar_sq_1 -15)--------Projection: CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15)) +15)--------Projection: CAST(CAST(sum((partsupp.ps_supplycost * partsupp.ps_availqty)) AS Float64) * Float64(0.0001) AS Decimal128(38, 15)) 16)----------Aggregate: groupBy=[[]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]] 17)------------Projection: partsupp.ps_availqty, partsupp.ps_supplycost 18)--------------Inner Join: supplier.s_nationkey = nation.n_nationkey @@ -74,13 +74,13 @@ logical_plan 25)--------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")] physical_plan 01)SortExec: TopK(fetch=10), expr=[value@1 DESC], preserve_partitioning=[false] -02)--ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value] -03)----NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1, projection=[ps_partkey@0, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1] +02)--ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty))@1 as value] +03)----NestedLoopJoinExec: join_type=Inner, filter=CAST(sum((partsupp.ps_supplycost * partsupp.ps_availqty))@0 AS Decimal128(38, 15)) > (sum((partsupp.ps_supplycost * partsupp.ps_availqty)) * Float64(0.0001))@1, projection=[ps_partkey@0, sum((partsupp.ps_supplycost * partsupp.ps_availqty))@1] 04)------CoalescePartitionsExec -05)--------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] +05)--------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum((partsupp.ps_supplycost * partsupp.ps_availqty))] 06)----------CoalesceBatchesExec: target_batch_size=8192 07)------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 -08)--------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] +08)--------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum((partsupp.ps_supplycost * partsupp.ps_availqty))] 09)----------------CoalesceBatchesExec: target_batch_size=8192 10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[ps_partkey@0, ps_availqty@1, ps_supplycost@2] 11)--------------------CoalesceBatchesExec: target_batch_size=8192 @@ -100,10 +100,10 @@ physical_plan 25)--------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0] 26)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 27)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false -28)------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] -29)--------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] +28)------ProjectionExec: expr=[CAST(CAST(sum((partsupp.ps_supplycost * partsupp.ps_availqty))@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as (sum((partsupp.ps_supplycost * partsupp.ps_availqty)) * Float64(0.0001))] +29)--------AggregateExec: mode=Final, gby=[], aggr=[sum((partsupp.ps_supplycost * partsupp.ps_availqty))] 30)----------CoalescePartitionsExec -31)------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] +31)------------AggregateExec: mode=Partial, gby=[], aggr=[sum((partsupp.ps_supplycost * partsupp.ps_availqty))] 32)--------------CoalesceBatchesExec: target_batch_size=8192 33)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1] 34)------------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q12.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q12.slt.part index 3757fc48dba0..2af13a5beeac 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q12.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q12.slt.part @@ -50,7 +50,7 @@ order by ---- logical_plan 01)Sort: lineitem.l_shipmode ASC NULLS LAST -02)--Projection: lineitem.l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS low_line_count +02)--Projection: lineitem.l_shipmode, sum(CASE WHEN ((orders.o_orderpriority = Utf8("1-URGENT")) OR (orders.o_orderpriority = Utf8("2-HIGH"))) THEN Int64(1) ELSE Int64(0) END) AS high_line_count, sum(CASE WHEN ((orders.o_orderpriority != Utf8("1-URGENT")) AND (orders.o_orderpriority != Utf8("2-HIGH"))) THEN Int64(1) ELSE Int64(0) END) AS low_line_count 03)----Aggregate: groupBy=[[lineitem.l_shipmode]], aggr=[[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)]] 04)------Projection: lineitem.l_shipmode, orders.o_orderpriority 05)--------Inner Join: lineitem.l_orderkey = orders.o_orderkey @@ -61,11 +61,11 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] 02)--SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true] -03)----ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] -04)------AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] +03)----ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN ((orders.o_orderpriority = Utf8("1-URGENT")) OR (orders.o_orderpriority = Utf8("2-HIGH"))) THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN ((orders.o_orderpriority != Utf8("1-URGENT")) AND (orders.o_orderpriority != Utf8("2-HIGH"))) THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] +04)------AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN ((orders.o_orderpriority = Utf8("1-URGENT")) OR (orders.o_orderpriority = Utf8("2-HIGH"))) THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN ((orders.o_orderpriority != Utf8("1-URGENT")) AND (orders.o_orderpriority != Utf8("2-HIGH"))) THEN Int64(1) ELSE Int64(0) END)] 05)--------CoalesceBatchesExec: target_batch_size=8192 06)----------RepartitionExec: partitioning=Hash([l_shipmode@0], 4), input_partitions=4 -07)------------AggregateExec: mode=Partial, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] +07)------------AggregateExec: mode=Partial, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN ((orders.o_orderpriority = Utf8("1-URGENT")) OR (orders.o_orderpriority = Utf8("2-HIGH"))) THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN ((orders.o_orderpriority != Utf8("1-URGENT")) AND (orders.o_orderpriority != Utf8("2-HIGH"))) THEN Int64(1) ELSE Int64(0) END)] 08)--------------CoalesceBatchesExec: target_batch_size=8192 09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] 10)------------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part index 1104af2bdc64..76836db21dd1 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part @@ -32,8 +32,8 @@ where and l_shipdate < date '1995-10-01'; ---- logical_plan -01)Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64) AS promo_revenue -02)--Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN __common_expr_1 ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] +01)Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN (lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)) ELSE Int64(0) END) AS Float64) / CAST(sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))) AS Float64) AS promo_revenue +02)--Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN __common_expr_1 ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN (lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)) ELSE Int64(0) END), sum(__common_expr_1) AS sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))]] 03)----Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, part.p_type 04)------Inner Join: lineitem.l_partkey = part.p_partkey 05)--------Projection: lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount @@ -41,10 +41,10 @@ logical_plan 07)------------TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1995-09-01"), lineitem.l_shipdate < Date32("1995-10-01")] 08)--------TableScan: part projection=[p_partkey, p_type] physical_plan -01)ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] -02)--AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +01)ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN (lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)) ELSE Int64(0) END)@0 AS Float64) / CAST(sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))@1 AS Float64) as promo_revenue] +02)--AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN (lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)) ELSE Int64(0) END), sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN (lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)) ELSE Int64(0) END), sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 05)--------ProjectionExec: expr=[l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as __common_expr_1, p_type@2 as p_type] 06)----------CoalesceBatchesExec: target_batch_size=8192 07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_type@4] diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part index 0636a033b25a..88a3324505ef 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part @@ -57,16 +57,16 @@ logical_plan 05)--------Inner Join: supplier.s_suppkey = revenue0.supplier_no 06)----------TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone], partial_filters=[Boolean(true)] 07)----------SubqueryAlias: revenue0 -08)------------Projection: lineitem.l_suppkey AS supplier_no, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue -09)--------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] +08)------------Projection: lineitem.l_suppkey AS supplier_no, sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))) AS total_revenue +09)--------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))]] 10)----------------Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount 11)------------------Filter: lineitem.l_shipdate >= Date32("1996-01-01") AND lineitem.l_shipdate < Date32("1996-04-01") 12)--------------------TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1996-01-01"), lineitem.l_shipdate < Date32("1996-04-01")] 13)------SubqueryAlias: __scalar_sq_1 14)--------Aggregate: groupBy=[[]], aggr=[[max(revenue0.total_revenue)]] 15)----------SubqueryAlias: revenue0 -16)------------Projection: sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue -17)--------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] +16)------------Projection: sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))) AS total_revenue +17)--------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))]] 18)----------------Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount 19)------------------Filter: lineitem.l_shipdate >= Date32("1996-01-01") AND lineitem.l_shipdate < Date32("1996-04-01") 20)--------------------TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1996-01-01"), lineitem.l_shipdate < Date32("1996-04-01")] @@ -78,11 +78,11 @@ physical_plan 05)--------AggregateExec: mode=Final, gby=[], aggr=[max(revenue0.total_revenue)] 06)----------CoalescePartitionsExec 07)------------AggregateExec: mode=Partial, gby=[], aggr=[max(revenue0.total_revenue)] -08)--------------ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue] -09)----------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +08)--------------ProjectionExec: expr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))@1 as total_revenue] +09)----------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 10)------------------CoalesceBatchesExec: target_batch_size=8192 11)--------------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4 -12)----------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +12)----------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 13)------------------------CoalesceBatchesExec: target_batch_size=8192 14)--------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2] 15)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false @@ -92,11 +92,11 @@ physical_plan 19)--------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 20)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 21)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_phone], file_type=csv, has_header=false -22)------------ProjectionExec: expr=[l_suppkey@0 as supplier_no, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue] -23)--------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +22)------------ProjectionExec: expr=[l_suppkey@0 as supplier_no, sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))@1 as total_revenue] +23)--------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 24)----------------CoalesceBatchesExec: target_batch_size=8192 25)------------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4 -26)--------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +26)--------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 27)----------------------CoalesceBatchesExec: target_batch_size=8192 28)------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2] 29)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part index 02553890bcf5..fe1a56af76e3 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part @@ -39,7 +39,7 @@ logical_plan 01)Projection: CAST(sum(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly 02)--Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice)]] 03)----Projection: lineitem.l_extendedprice -04)------Inner Join: part.p_partkey = __scalar_sq_1.l_partkey Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < __scalar_sq_1.Float64(0.2) * avg(lineitem.l_quantity) +04)------Inner Join: part.p_partkey = __scalar_sq_1.l_partkey Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < __scalar_sq_1.(Float64(0.2) * avg(lineitem.l_quantity)) 05)--------Projection: lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey 06)----------Inner Join: lineitem.l_partkey = part.p_partkey 07)------------TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice] @@ -56,7 +56,7 @@ physical_plan 03)----CoalescePartitionsExec 04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] 05)--------CoalesceBatchesExec: target_batch_size=8192 -06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] +06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < (Float64(0.2) * avg(lineitem.l_quantity))@1, projection=[l_extendedprice@1] 07)------------CoalesceBatchesExec: target_batch_size=8192 08)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_quantity@1, l_extendedprice@2, p_partkey@3] 09)----------------CoalesceBatchesExec: target_batch_size=8192 @@ -68,7 +68,7 @@ physical_plan 15)----------------------FilterExec: p_brand@1 = Brand#23 AND p_container@2 = MED BOX, projection=[p_partkey@0] 16)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 17)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_container], file_type=csv, has_header=false -18)------------ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] +18)------------ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as (Float64(0.2) * avg(lineitem.l_quantity)), l_partkey@0 as l_partkey] 19)--------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] 20)----------------CoalesceBatchesExec: target_batch_size=8192 21)------------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q19.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q19.slt.part index b0e5b2e904d0..b9aa7f33102b 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q19.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q19.slt.part @@ -54,8 +54,8 @@ where ); ---- logical_plan -01)Projection: sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue -02)--Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] +01)Projection: sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))) AS revenue +02)--Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))]] 03)----Projection: lineitem.l_extendedprice, lineitem.l_discount 04)------Inner Join: lineitem.l_partkey = part.p_partkey Filter: part.p_brand = Utf8("Brand#12") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2) AND part.p_size <= Int32(15) 05)--------Projection: lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount @@ -64,10 +64,10 @@ logical_plan 08)--------Filter: (part.p_brand = Utf8("Brand#12") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)) AND part.p_size >= Int32(1) 09)----------TableScan: part projection=[p_partkey, p_brand, p_size, p_container], partial_filters=[part.p_size >= Int32(1), part.p_brand = Utf8("Brand#12") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)] physical_plan -01)ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] -02)--AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +01)ProjectionExec: expr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))@0 as revenue] +02)--AggregateExec: mode=Final, gby=[], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +04)------AggregateExec: mode=Partial, gby=[], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 05)--------CoalesceBatchesExec: target_batch_size=8192 06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], filter=p_brand@1 = Brand#12 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(100),15,2 AND l_quantity@0 <= Some(1100),15,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(1000),15,2 AND l_quantity@0 <= Some(2000),15,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(2000),15,2 AND l_quantity@0 <= Some(3000),15,2 AND p_size@2 <= 15, projection=[l_extendedprice@2, l_discount@3] 07)------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part index 4844d5fae60b..0f365f73f83f 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part @@ -67,7 +67,7 @@ logical_plan 09)--------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("CANADA")] 10)------SubqueryAlias: __correlated_sq_2 11)--------Projection: partsupp.ps_suppkey -12)----------Inner Join: partsupp.ps_partkey = __scalar_sq_3.l_partkey, partsupp.ps_suppkey = __scalar_sq_3.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_3.Float64(0.5) * sum(lineitem.l_quantity) +12)----------Inner Join: partsupp.ps_partkey = __scalar_sq_3.l_partkey, partsupp.ps_suppkey = __scalar_sq_3.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_3.(Float64(0.5) * sum(lineitem.l_quantity)) 13)------------LeftSemi Join: partsupp.ps_partkey = __correlated_sq_1.p_partkey 14)--------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty] 15)--------------SubqueryAlias: __correlated_sq_1 @@ -102,7 +102,7 @@ physical_plan 19)--------CoalesceBatchesExec: target_batch_size=8192 20)----------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 21)------------CoalesceBatchesExec: target_batch_size=8192 -22)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] +22)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > (Float64(0.5) * sum(lineitem.l_quantity))@1, projection=[ps_suppkey@1] 23)----------------CoalesceBatchesExec: target_batch_size=8192 24)------------------RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4 25)--------------------CoalesceBatchesExec: target_batch_size=8192 @@ -116,7 +116,7 @@ physical_plan 33)------------------------------FilterExec: p_name@1 LIKE forest%, projection=[p_partkey@0] 34)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 35)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], file_type=csv, has_header=false -36)----------------ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] +36)----------------ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as (Float64(0.5) * sum(lineitem.l_quantity)), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] 37)------------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] 38)--------------------CoalesceBatchesExec: target_batch_size=8192 39)----------------------RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4 diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q3.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q3.slt.part index 2ad496ef26fd..8d57b306739d 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q3.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q3.slt.part @@ -43,8 +43,8 @@ limit 10; ---- logical_plan 01)Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST, fetch=10 -02)--Projection: lineitem.l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority -03)----Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] +02)--Projection: lineitem.l_orderkey, sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))) AS revenue, orders.o_orderdate, orders.o_shippriority +03)----Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))]] 04)------Projection: orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount 05)--------Inner Join: orders.o_orderkey = lineitem.l_orderkey 06)----------Projection: orders.o_orderkey, orders.o_orderdate, orders.o_shippriority @@ -60,11 +60,11 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [revenue@1 DESC, o_orderdate@2 ASC NULLS LAST], fetch=10 02)--SortExec: TopK(fetch=10), expr=[revenue@1 DESC, o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] -03)----ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] -04)------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +03)----ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] +04)------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 05)--------CoalesceBatchesExec: target_batch_size=8192 06)----------RepartitionExec: partitioning=Hash([l_orderkey@0, o_orderdate@1, o_shippriority@2], 4), input_partitions=4 -07)------------AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +07)------------AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 08)--------------CoalesceBatchesExec: target_batch_size=8192 09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] 10)------------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part index f192f987b3ef..514e78b7e9a1 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part @@ -44,8 +44,8 @@ order by ---- logical_plan 01)Sort: revenue DESC NULLS FIRST -02)--Projection: nation.n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue -03)----Aggregate: groupBy=[[nation.n_name]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] +02)--Projection: nation.n_name, sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount))) AS revenue +03)----Aggregate: groupBy=[[nation.n_name]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))]] 04)------Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name 05)--------Inner Join: nation.n_regionkey = region.r_regionkey 06)----------Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey @@ -69,11 +69,11 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [revenue@1 DESC] 02)--SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] -03)----ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] -04)------AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +03)----ProjectionExec: expr=[n_name@0 as n_name, sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))@1 as revenue] +04)------AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 05)--------CoalesceBatchesExec: target_batch_size=8192 06)----------RepartitionExec: partitioning=Hash([n_name@0], 4), input_partitions=4 -07)------------AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +07)------------AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum((lineitem.l_extendedprice * (Int64(1) - lineitem.l_discount)))] 08)--------------CoalesceBatchesExec: target_batch_size=8192 09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@3, r_regionkey@0)], projection=[l_extendedprice@0, l_discount@1, n_name@2] 10)------------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q6.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q6.slt.part index b1e5d2869a8c..19929c7f6d4a 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q6.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q6.slt.part @@ -28,16 +28,16 @@ where and l_quantity < 24; ---- logical_plan -01)Projection: sum(lineitem.l_extendedprice * lineitem.l_discount) AS revenue +01)Projection: sum((lineitem.l_extendedprice * lineitem.l_discount)) AS revenue 02)--Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice * lineitem.l_discount)]] 03)----Projection: lineitem.l_extendedprice, lineitem.l_discount 04)------Filter: lineitem.l_shipdate >= Date32("1994-01-01") AND lineitem.l_shipdate < Date32("1995-01-01") AND lineitem.l_discount >= Decimal128(Some(5),15,2) AND lineitem.l_discount <= Decimal128(Some(7),15,2) AND lineitem.l_quantity < Decimal128(Some(2400),15,2) 05)--------TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1994-01-01"), lineitem.l_shipdate < Date32("1995-01-01"), lineitem.l_discount >= Decimal128(Some(5),15,2), lineitem.l_discount <= Decimal128(Some(7),15,2), lineitem.l_quantity < Decimal128(Some(2400),15,2)] physical_plan -01)ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] -02)--AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] +01)ProjectionExec: expr=[sum((lineitem.l_extendedprice * lineitem.l_discount))@0 as revenue] +02)--AggregateExec: mode=Final, gby=[], aggr=[sum((lineitem.l_extendedprice * lineitem.l_discount))] 03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] +04)------AggregateExec: mode=Partial, gby=[], aggr=[sum((lineitem.l_extendedprice * lineitem.l_discount))] 05)--------CoalesceBatchesExec: target_batch_size=8192 06)----------FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(5),15,2 AND l_discount@2 <= Some(7),15,2 AND l_quantity@0 < Some(2400),15,2, projection=[l_extendedprice@1, l_discount@2] 07)------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part index 88ceffd62ad3..02bb021e114e 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part @@ -57,8 +57,8 @@ order by ---- logical_plan 01)Sort: all_nations.o_year ASC NULLS LAST -02)--Projection: all_nations.o_year, CAST(CAST(sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END) AS Decimal128(12, 2)) / CAST(sum(all_nations.volume) AS Decimal128(12, 2)) AS Decimal128(15, 2)) AS mkt_share -03)----Aggregate: groupBy=[[all_nations.o_year]], aggr=[[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]] +02)--Projection: all_nations.o_year, CAST(CAST(sum(CASE WHEN (all_nations.nation = Utf8("BRAZIL")) THEN all_nations.volume ELSE Int64(0) END) AS Decimal128(12, 2)) / CAST(sum(all_nations.volume) AS Decimal128(12, 2)) AS Decimal128(15, 2)) AS mkt_share +03)----Aggregate: groupBy=[[all_nations.o_year]], aggr=[[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN (all_nations.nation = Utf8("BRAZIL")) THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]] 04)------SubqueryAlias: all_nations 05)--------Projection: date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume, n2.n_name AS nation 06)----------Inner Join: n1.n_regionkey = region.r_regionkey @@ -92,11 +92,11 @@ logical_plan physical_plan 01)SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] 02)--SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] -03)----ProjectionExec: expr=[o_year@0 as o_year, CAST(CAST(sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END)@1 AS Decimal128(12, 2)) / CAST(sum(all_nations.volume)@2 AS Decimal128(12, 2)) AS Decimal128(15, 2)) as mkt_share] -04)------AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] +03)----ProjectionExec: expr=[o_year@0 as o_year, CAST(CAST(sum(CASE WHEN (all_nations.nation = Utf8("BRAZIL")) THEN all_nations.volume ELSE Int64(0) END)@1 AS Decimal128(12, 2)) / CAST(sum(all_nations.volume)@2 AS Decimal128(12, 2)) AS Decimal128(15, 2)) as mkt_share] +04)------AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN (all_nations.nation = Utf8("BRAZIL")) THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] 05)--------CoalesceBatchesExec: target_batch_size=8192 06)----------RepartitionExec: partitioning=Hash([o_year@0], 4), input_partitions=4 -07)------------AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] +07)------------AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN (all_nations.nation = Utf8("BRAZIL")) THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] 08)--------------ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] 09)----------------CoalesceBatchesExec: target_batch_size=8192 10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@3, r_regionkey@0)], projection=[l_extendedprice@0, l_discount@1, o_orderdate@2, n_name@4]