From dd69ebfb9386ff806dce062289439cb33202429a Mon Sep 17 00:00:00 2001 From: Yiqun Liu Date: Tue, 15 Jul 2025 23:27:48 +0800 Subject: [PATCH] * Display type for expression in explain stmt * Format window function type field of Sort function in explain(logical) stmt --- src/query/expression/src/utils/display.rs | 18 +++++ ...ulti_join_avg_case_expression_physical.txt | 40 +++++------ ...ulti_join_sum_case_expression_physical.txt | 40 +++++------ .../data/results/tpcds/Q01_physical.txt | 46 ++++++------- .../data/results/tpcds/Q03_physical.txt | 18 ++--- src/query/sql/src/executor/format.rs | 66 +++++++++++-------- .../planner/format/display_rel_operator.rs | 61 ++++++++++++++++- 7 files changed, 188 insertions(+), 101 deletions(-) diff --git a/src/query/expression/src/utils/display.rs b/src/query/expression/src/utils/display.rs index ab0b28f3874ca..221e74f0ff3f5 100755 --- a/src/query/expression/src/utils/display.rs +++ b/src/query/expression/src/utils/display.rs @@ -1080,6 +1080,24 @@ impl Expr { write_expr(self, 0) } + + pub fn typed_sql_display(&self) -> String { + #[recursive::recursive] + fn get_datatype(expr: &Expr) -> &DataType { + match expr { + Expr::Constant(Constant { data_type, .. }) => data_type, + Expr::ColumnRef(ColumnRef { data_type, .. }) => data_type, + Expr::Cast(Cast { expr, .. }) => get_datatype(expr.as_ref()), + Expr::FunctionCall(FunctionCall { return_type, .. }) => return_type, + Expr::LambdaFunctionCall(LambdaFunctionCall { return_type, .. }) => return_type, + } + } + + match self { + Expr::Cast(_) => format!("{} FROM {}", self.sql_display(), get_datatype(self)), + _ => format!("{}: {}", self.sql_display(), get_datatype(self)), + } + } } impl Display for Value { diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt index 63de3e7d22ec8..3d48ba9d402ce 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt @@ -3,11 +3,11 @@ Exchange ├── exchange type: Merge └── EvalScalar ├── output columns: [sell_mnt = 0 (#170)] - ├── expressions: [t.sell_mnt (#169) = 0] + ├── expressions: [t.sell_mnt (#169) = 0: Boolean NULL] ├── estimated rows: 7119376617326129446912.00 └── EvalScalar ├── output columns: [sell_mnt (#169)] - ├── expressions: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167) / CAST(if(CAST(count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168) = 0 AS Boolean NULL), 1, count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168)) AS UInt64 NULL) + 3] + ├── expressions: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167) / CAST(if(CAST(count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168) = 0 AS Boolean NULL), 1, count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168)) AS UInt64 NULL) + 3: Float64 NULL] ├── estimated rows: 7119376617326129446912.00 └── AggregateFinal ├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)] @@ -16,31 +16,31 @@ Exchange ├── estimated rows: 7119376617326129446912.00 └── Exchange ├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)] - ├── exchange type: Hash(0, 1, 2, 3) + ├── exchange type: Hash(0: String NULL, 1: String NULL, 2: String NULL, 3: String NULL) └── AggregatePartial ├── group by: [a0d, a0k, a0m, a5m] ├── aggregate functions: [sum(sum_arg_0), count()] ├── estimated rows: 7119376617326129446912.00 └── EvalScalar ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144), sum_arg_0 (#166)] - ├── expressions: [if(d.a1v (#154) = '603020', 1, 0)] + ├── expressions: [if(d.a1v (#154) = '603020', 1, 0): UInt8] ├── estimated rows: 7119376617326129446912.00 └── HashJoin ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), d.a1v (#154), c.a5m (#144)] ├── join type: INNER - ├── build keys: [c.a0m (#149)] - ├── probe keys: [a.a0m (#9)] + ├── build keys: [c.a0m (#149): String NULL] + ├── probe keys: [a.a0m (#9): String NULL] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: - │ └── filter id:3, build key:c.a0m (#149), probe key:a.a0m (#9), filter type:inlist,min_max + │ └── filter id:3, build key:c.a0m (#149): String NULL, probe key:a.a0m (#9): String NULL, filter type:inlist,min_max ├── estimated rows: 7119376617326129446912.00 ├── Exchange(Build) │ ├── output columns: [c.a5m (#144), a0m (#149)] │ ├── exchange type: Broadcast │ └── EvalScalar │ ├── output columns: [c.a5m (#144), a0m (#149)] - │ ├── expressions: [CAST(c.a0m (#74) AS String NULL)] + │ ├── expressions: [CAST(c.a0m (#74) AS String NULL) FROM String] │ ├── estimated rows: 63773.60 │ └── TableScan │ ├── table: default.default.a2x @@ -49,18 +49,18 @@ Exchange │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(substr(a2x.a4m (#118), 20, 1) = '1')], limit: NONE] + │ ├── push downs: [filters: [is_true(substr(a2x.a4m (#118), 20, 1) = '1'): Boolean], limit: NONE] │ └── estimated rows: 63773.60 └── HashJoin(Probe) ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), d.a1v (#154)] ├── join type: INNER - ├── build keys: [b.a0k (#48), b.a0n (#50)] - ├── probe keys: [a.a0k (#7), a.a0n (#10)] + ├── build keys: [b.a0k (#48): String NULL, b.a0n (#50): String NULL] + ├── probe keys: [a.a0k (#7): String NULL, a.a0n (#10): String NULL] ├── keys is null equal: [false, false] - ├── filters: [b.a2c (#52) <= a.a0d (#0), b.a2k (#61) > a.a0d (#0)] + ├── filters: [b.a2c (#52) <= a.a0d (#0): Boolean NULL, b.a2k (#61) > a.a0d (#0): Boolean NULL] ├── build join filters: - │ ├── filter id:1, build key:b.a0k (#48), probe key:a.a0k (#7), filter type:inlist,min_max - │ └── filter id:2, build key:b.a0n (#50), probe key:a.a0n (#10), filter type:inlist,min_max + │ ├── filter id:1, build key:b.a0k (#48): String NULL, probe key:a.a0k (#7): String NULL, filter type:inlist,min_max + │ └── filter id:2, build key:b.a0n (#50): String NULL, probe key:a.a0n (#10): String NULL, filter type:inlist,min_max ├── estimated rows: 111635169056257280.00 ├── Exchange(Build) │ ├── output columns: [b.a0k (#48), b.a0n (#50), b.a2c (#52), b.a2k (#61)] @@ -72,17 +72,17 @@ Exchange │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(a1z.a2t (#70) = '624100')], limit: NONE] + │ ├── push downs: [filters: [is_true(a1z.a2t (#70) = '624100'): Boolean], limit: NONE] │ └── estimated rows: 45493.85 └── HashJoin(Probe) ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), a.a0n (#10), d.a1v (#154)] ├── join type: INNER - ├── build keys: [d.a5t (#151)] - ├── probe keys: [a.a0l (#8)] + ├── build keys: [d.a5t (#151): String NULL] + ├── probe keys: [a.a0l (#8): String NULL] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: - │ └── filter id:0, build key:d.a5t (#151), probe key:a.a0l (#8), filter type:inlist,min_max + │ └── filter id:0, build key:d.a5t (#151): String NULL, probe key:a.a0l (#8): String NULL, filter type:inlist,min_max ├── estimated rows: 2453851765646.43 ├── Exchange(Build) │ ├── output columns: [d.a5t (#151), d.a1v (#154)] @@ -94,7 +94,7 @@ Exchange │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(substr(a5r.a5w (#156), 1, 1) = '1')], limit: NONE] + │ ├── push downs: [filters: [is_true(substr(a5r.a5w (#156), 1, 1) = '1'): Boolean], limit: NONE] │ └── estimated rows: 806.60 └── TableScan(Probe) ├── table: default.default.a0c @@ -103,7 +103,7 @@ Exchange ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(a0c.a0d (#0) >= '20240526', a0c.a0d (#0) <= '20250525')], limit: NONE] + ├── push downs: [filters: [and_filters(a0c.a0d (#0) >= '20240526', a0c.a0d (#0) <= '20250525'): Boolean], limit: NONE] ├── apply join filters: [#3, #1, #2, #0] └── estimated rows: 3042216421.58 diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt index 4e8a8c0d72d69..a6c6e515fd2a2 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt @@ -3,7 +3,7 @@ Exchange ├── exchange type: Merge └── EvalScalar ├── output columns: [sell_mnt = 0 (#168)] - ├── expressions: [t.sell_mnt (#173) = 0] + ├── expressions: [t.sell_mnt (#173) = 0: Boolean NULL] ├── estimated rows: 376949189626631488.00 └── AggregateFinal ├── output columns: [_eager_final_sum (#173), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)] @@ -12,7 +12,7 @@ Exchange ├── estimated rows: 376949189626631488.00 └── Exchange ├── output columns: [_eager_final_sum (#173), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)] - ├── exchange type: Hash(0, 1, 2, 3) + ├── exchange type: Hash(0: String NULL, 1: String NULL, 2: String NULL, 3: String NULL) └── AggregatePartial ├── group by: [a0d, a0k, a0m, a5m] ├── aggregate functions: [sum(sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END))] @@ -20,19 +20,19 @@ Exchange └── HashJoin ├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)] ├── join type: INNER - ├── build keys: [c.a0m (#149)] - ├── probe keys: [a.a0m (#9)] + ├── build keys: [c.a0m (#149): String NULL] + ├── probe keys: [a.a0m (#9): String NULL] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: - │ └── filter id:3, build key:c.a0m (#149), probe key:a.a0m (#9), filter type:inlist,min_max + │ └── filter id:3, build key:c.a0m (#149): String NULL, probe key:a.a0m (#9): String NULL, filter type:inlist,min_max ├── estimated rows: 376949189626631488.00 ├── Exchange(Build) │ ├── output columns: [c.a5m (#144), a0m (#149)] │ ├── exchange type: Broadcast │ └── EvalScalar │ ├── output columns: [c.a5m (#144), a0m (#149)] - │ ├── expressions: [CAST(c.a0m (#74) AS String NULL)] + │ ├── expressions: [CAST(c.a0m (#74) AS String NULL) FROM String] │ ├── estimated rows: 63773.60 │ └── TableScan │ ├── table: default.default.a2x @@ -41,7 +41,7 @@ Exchange │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(substr(a2x.a4m (#118), 20, 1) = '1')], limit: NONE] + │ ├── push downs: [filters: [is_true(substr(a2x.a4m (#118), 20, 1) = '1'): Boolean], limit: NONE] │ └── estimated rows: 63773.60 └── AggregateFinal(Probe) ├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), a.a0d (#0), a.a0k (#7), a.a0m (#9)] @@ -50,25 +50,25 @@ Exchange ├── estimated rows: 5910740331840.00 └── Exchange ├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), a.a0d (#0), a.a0k (#7), a.a0m (#9)] - ├── exchange type: Hash(0, 1, 2) + ├── exchange type: Hash(0: String NULL, 1: String NULL, 2: String NULL) └── AggregatePartial ├── group by: [a0d, a0k, a0m] ├── aggregate functions: [sum(sum_arg_0)] ├── estimated rows: 5910740331840.00 └── EvalScalar ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), sum_arg_0 (#166)] - ├── expressions: [if(d.a1v (#154) = '603020', 1, 0)] + ├── expressions: [if(d.a1v (#154) = '603020', 1, 0): UInt8] ├── estimated rows: 111635169056257280.00 └── HashJoin ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), d.a1v (#154)] ├── join type: INNER - ├── build keys: [b.a0k (#48), b.a0n (#50)] - ├── probe keys: [a.a0k (#7), a.a0n (#10)] + ├── build keys: [b.a0k (#48): String NULL, b.a0n (#50): String NULL] + ├── probe keys: [a.a0k (#7): String NULL, a.a0n (#10): String NULL] ├── keys is null equal: [false, false] - ├── filters: [b.a2c (#52) <= a.a0d (#0), b.a2k (#61) > a.a0d (#0)] + ├── filters: [b.a2c (#52) <= a.a0d (#0): Boolean NULL, b.a2k (#61) > a.a0d (#0): Boolean NULL] ├── build join filters: - │ ├── filter id:1, build key:b.a0k (#48), probe key:a.a0k (#7), filter type:inlist,min_max - │ └── filter id:2, build key:b.a0n (#50), probe key:a.a0n (#10), filter type:inlist,min_max + │ ├── filter id:1, build key:b.a0k (#48): String NULL, probe key:a.a0k (#7): String NULL, filter type:inlist,min_max + │ └── filter id:2, build key:b.a0n (#50): String NULL, probe key:a.a0n (#10): String NULL, filter type:inlist,min_max ├── estimated rows: 111635169056257280.00 ├── Exchange(Build) │ ├── output columns: [b.a0k (#48), b.a0n (#50), b.a2c (#52), b.a2k (#61)] @@ -80,17 +80,17 @@ Exchange │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(a1z.a2t (#70) = '624100')], limit: NONE] + │ ├── push downs: [filters: [is_true(a1z.a2t (#70) = '624100'): Boolean], limit: NONE] │ └── estimated rows: 45493.85 └── HashJoin(Probe) ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), a.a0n (#10), d.a1v (#154)] ├── join type: INNER - ├── build keys: [d.a5t (#151)] - ├── probe keys: [a.a0l (#8)] + ├── build keys: [d.a5t (#151): String NULL] + ├── probe keys: [a.a0l (#8): String NULL] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: - │ └── filter id:0, build key:d.a5t (#151), probe key:a.a0l (#8), filter type:inlist,min_max + │ └── filter id:0, build key:d.a5t (#151): String NULL, probe key:a.a0l (#8): String NULL, filter type:inlist,min_max ├── estimated rows: 2453851765646.43 ├── Exchange(Build) │ ├── output columns: [d.a5t (#151), d.a1v (#154)] @@ -102,7 +102,7 @@ Exchange │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(substr(a5r.a5w (#156), 1, 1) = '1')], limit: NONE] + │ ├── push downs: [filters: [is_true(substr(a5r.a5w (#156), 1, 1) = '1'): Boolean], limit: NONE] │ └── estimated rows: 806.60 └── TableScan(Probe) ├── table: default.default.a0c @@ -111,7 +111,7 @@ Exchange ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(a0c.a0d (#0) >= '20240526', a0c.a0d (#0) <= '20250525')], limit: NONE] + ├── push downs: [filters: [and_filters(a0c.a0d (#0) >= '20240526', a0c.a0d (#0) <= '20250525'): Boolean], limit: NONE] ├── apply join filters: [#3, #1, #2, #0] └── estimated rows: 3042216421.58 diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt index bf34e3e48aa1e..ee0eaa20e5922 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt @@ -17,12 +17,12 @@ Limit └── HashJoin ├── output columns: [customer.c_customer_id (#79)] ├── join type: INNER - ├── build keys: [ctr1.ctr_customer_sk (#3)] - ├── probe keys: [customer.c_customer_sk (#78)] + ├── build keys: [ctr1.ctr_customer_sk (#3): Int32 NULL] + ├── probe keys: [customer.c_customer_sk (#78): Int32 NULL] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: - │ └── filter id:4, build key:ctr1.ctr_customer_sk (#3), probe key:customer.c_customer_sk (#78), filter type:bloom,inlist,min_max + │ └── filter id:4, build key:ctr1.ctr_customer_sk (#3): Int32 NULL, probe key:customer.c_customer_sk (#78): Int32 NULL, filter type:bloom,inlist,min_max ├── estimated rows: 0.00 ├── Exchange(Build) │ ├── output columns: [store_returns.sr_customer_sk (#3)] @@ -30,12 +30,12 @@ Limit │ └── HashJoin │ ├── output columns: [store_returns.sr_customer_sk (#3)] │ ├── join type: INNER - │ ├── build keys: [sr_store_sk (#103)] - │ ├── probe keys: [sr_store_sk (#7)] + │ ├── build keys: [sr_store_sk (#103): Int32 NULL] + │ ├── probe keys: [sr_store_sk (#7): Int32 NULL] │ ├── keys is null equal: [false] - │ ├── filters: [ctr1.ctr_total_return (#48) > scalar_subquery_147 (#147)] + │ ├── filters: [ctr1.ctr_total_return (#48) > scalar_subquery_147 (#147): Boolean NULL] │ ├── build join filters: - │ │ └── filter id:3, build key:sr_store_sk (#103), probe key:sr_store_sk (#7), filter type:bloom,inlist,min_max + │ │ └── filter id:3, build key:sr_store_sk (#103): Int32 NULL, probe key:sr_store_sk (#7): Int32 NULL, filter type:bloom,inlist,min_max │ ├── estimated rows: 0.00 │ ├── Exchange(Build) │ │ ├── output columns: [sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147), store_returns.sr_store_sk (#103)] @@ -43,19 +43,19 @@ Limit │ │ └── HashJoin │ │ ├── output columns: [sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147), store_returns.sr_store_sk (#103)] │ │ ├── join type: INNER - │ │ ├── build keys: [sr_store_sk (#103)] - │ │ ├── probe keys: [store.s_store_sk (#49)] + │ │ ├── build keys: [sr_store_sk (#103): Int32 NULL] + │ │ ├── probe keys: [store.s_store_sk (#49): Int32 NULL] │ │ ├── keys is null equal: [false] │ │ ├── filters: [] │ │ ├── build join filters: - │ │ │ └── filter id:2, build key:sr_store_sk (#103), probe key:store.s_store_sk (#49), filter type:bloom,inlist,min_max + │ │ │ └── filter id:2, build key:sr_store_sk (#103): Int32 NULL, probe key:store.s_store_sk (#49): Int32 NULL, filter type:bloom,inlist,min_max │ │ ├── estimated rows: 0.00 │ │ ├── Exchange(Build) │ │ │ ├── output columns: [store_returns.sr_store_sk (#103), sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147)] │ │ │ ├── exchange type: Broadcast │ │ │ └── EvalScalar │ │ │ ├── output columns: [store_returns.sr_store_sk (#103), sum(ctr_total_return) / if(count(ctr_total_return) = 0, 1, count(ctr_total_return)) * 1.2 (#147)] - │ │ │ ├── expressions: [sum(ctr_total_return) (#145) / CAST(if(CAST(count(ctr_total_return) (#146) = 0 AS Boolean NULL), 1, count(ctr_total_return) (#146)) AS UInt64 NULL) * 1.2] + │ │ │ ├── expressions: [sum(ctr_total_return) (#145) / CAST(if(CAST(count(ctr_total_return) (#146) = 0 AS Boolean NULL), 1, count(ctr_total_return) (#146)) AS UInt64 NULL) * 1.2: Decimal(26, 9) NULL] │ │ │ ├── estimated rows: 0.00 │ │ │ └── AggregateFinal │ │ │ ├── output columns: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146), store_returns.sr_store_sk (#103)] @@ -64,7 +64,7 @@ Limit │ │ │ ├── estimated rows: 0.00 │ │ │ └── Exchange │ │ │ ├── output columns: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146), store_returns.sr_store_sk (#103)] - │ │ │ ├── exchange type: Hash(0) + │ │ │ ├── exchange type: Hash(0: Int32 NULL) │ │ │ └── AggregatePartial │ │ │ ├── group by: [sr_store_sk] │ │ │ ├── aggregate functions: [sum(Sum(sr_return_amt)), count(Sum(sr_return_amt))] @@ -76,7 +76,7 @@ Limit │ │ │ ├── estimated rows: 0.00 │ │ │ └── Exchange │ │ │ ├── output columns: [Sum(sr_return_amt) (#144), store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] - │ │ │ ├── exchange type: Hash(0, 1) + │ │ │ ├── exchange type: Hash(0: Int32 NULL, 1: Int32 NULL) │ │ │ └── AggregatePartial │ │ │ ├── group by: [sr_customer_sk, sr_store_sk] │ │ │ ├── aggregate functions: [sum(sr_return_amt)] @@ -84,12 +84,12 @@ Limit │ │ │ └── HashJoin │ │ │ ├── output columns: [store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103), store_returns.sr_return_amt (#107)] │ │ │ ├── join type: INNER - │ │ │ ├── build keys: [date_dim.d_date_sk (#116)] - │ │ │ ├── probe keys: [store_returns.sr_returned_date_sk (#96)] + │ │ │ ├── build keys: [date_dim.d_date_sk (#116): Int32 NULL] + │ │ │ ├── probe keys: [store_returns.sr_returned_date_sk (#96): Int32 NULL] │ │ │ ├── keys is null equal: [false] │ │ │ ├── filters: [] │ │ │ ├── build join filters: - │ │ │ │ └── filter id:1, build key:date_dim.d_date_sk (#116), probe key:store_returns.sr_returned_date_sk (#96), filter type:bloom,inlist,min_max + │ │ │ │ └── filter id:1, build key:date_dim.d_date_sk (#116): Int32 NULL, probe key:store_returns.sr_returned_date_sk (#96): Int32 NULL, filter type:bloom,inlist,min_max │ │ │ ├── estimated rows: 0.00 │ │ │ ├── Exchange(Build) │ │ │ │ ├── output columns: [date_dim.d_date_sk (#116)] @@ -101,7 +101,7 @@ Limit │ │ │ │ ├── read size: 0 │ │ │ │ ├── partitions total: 0 │ │ │ │ ├── partitions scanned: 0 - │ │ │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#122) = 2001)], limit: NONE] + │ │ │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#122) = 2001): Boolean], limit: NONE] │ │ │ │ └── estimated rows: 0.00 │ │ │ └── TableScan(Probe) │ │ │ ├── table: default.default.store_returns @@ -120,7 +120,7 @@ Limit │ │ ├── read size: 0 │ │ ├── partitions total: 0 │ │ ├── partitions scanned: 0 - │ │ ├── push downs: [filters: [is_true(store.s_state (#73) = 'TN')], limit: NONE] + │ │ ├── push downs: [filters: [is_true(store.s_state (#73) = 'TN'): Boolean], limit: NONE] │ │ ├── apply join filters: [#2] │ │ └── estimated rows: 0.16 │ └── AggregateFinal(Probe) @@ -130,7 +130,7 @@ Limit │ ├── estimated rows: 841298963.13 │ └── Exchange │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] - │ ├── exchange type: Hash(0, 1) + │ ├── exchange type: Hash(0: Int32 NULL, 1: Int32 NULL) │ └── AggregatePartial │ ├── group by: [sr_customer_sk, sr_store_sk] │ ├── aggregate functions: [sum(sr_return_amt)] @@ -138,12 +138,12 @@ Limit │ └── HashJoin │ ├── output columns: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7), store_returns.sr_return_amt (#11)] │ ├── join type: INNER - │ ├── build keys: [date_dim.d_date_sk (#20)] - │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] + │ ├── build keys: [date_dim.d_date_sk (#20): Int32 NULL] + │ ├── probe keys: [store_returns.sr_returned_date_sk (#0): Int32 NULL] │ ├── keys is null equal: [false] │ ├── filters: [] │ ├── build join filters: - │ │ └── filter id:0, build key:date_dim.d_date_sk (#20), probe key:store_returns.sr_returned_date_sk (#0), filter type:inlist,min_max + │ │ └── filter id:0, build key:date_dim.d_date_sk (#20): Int32 NULL, probe key:store_returns.sr_returned_date_sk (#0): Int32 NULL, filter type:inlist,min_max │ ├── estimated rows: 841298963.13 │ ├── Exchange(Build) │ │ ├── output columns: [date_dim.d_date_sk (#20)] @@ -155,7 +155,7 @@ Limit │ │ ├── read size: 0 │ │ ├── partitions total: 0 │ │ ├── partitions scanned: 0 - │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#26) = 2001)], limit: NONE] + │ │ ├── push downs: [filters: [is_true(date_dim.d_year (#26) = 2001): Boolean], limit: NONE] │ │ └── estimated rows: 29.22 │ └── TableScan(Probe) │ ├── table: default.default.store_returns diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q03_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q03_physical.txt index 9bc7008a03981..3470f08c75878 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q03_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q03_physical.txt @@ -21,7 +21,7 @@ Limit ├── estimated rows: 143057683321996.78 └── Exchange ├── output columns: [SUM(ss_ext_sales_price) (#73), dt.d_year (#6), item.i_brand (#59), item.i_brand_id (#58)] - ├── exchange type: Hash(0, 1, 2) + ├── exchange type: Hash(0: Int32 NULL, 1: String NULL, 2: Int32 NULL) └── AggregatePartial ├── group by: [d_year, i_brand, i_brand_id] ├── aggregate functions: [sum(ss_ext_sales_price)] @@ -29,12 +29,12 @@ Limit └── HashJoin ├── output columns: [store_sales.ss_ext_sales_price (#43), item.i_brand_id (#58), item.i_brand (#59), dt.d_year (#6)] ├── join type: INNER - ├── build keys: [dt.d_date_sk (#0)] - ├── probe keys: [store_sales.ss_sold_date_sk (#28)] + ├── build keys: [dt.d_date_sk (#0): Int32 NULL] + ├── probe keys: [store_sales.ss_sold_date_sk (#28): Int32 NULL] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: - │ └── filter id:1, build key:dt.d_date_sk (#0), probe key:store_sales.ss_sold_date_sk (#28), filter type:inlist,min_max + │ └── filter id:1, build key:dt.d_date_sk (#0): Int32 NULL, probe key:store_sales.ss_sold_date_sk (#28): Int32 NULL, filter type:inlist,min_max ├── estimated rows: 143057683321996.78 ├── Exchange(Build) │ ├── output columns: [dt.d_date_sk (#0), dt.d_year (#6)] @@ -46,17 +46,17 @@ Limit │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(date_dim.d_moy (#8) = 11)], limit: NONE] + │ ├── push downs: [filters: [is_true(date_dim.d_moy (#8) = 11): Boolean], limit: NONE] │ └── estimated rows: 6087.42 └── HashJoin(Probe) ├── output columns: [store_sales.ss_sold_date_sk (#28), store_sales.ss_ext_sales_price (#43), item.i_brand_id (#58), item.i_brand (#59)] ├── join type: INNER - ├── build keys: [item.i_item_sk (#51)] - ├── probe keys: [store_sales.ss_item_sk (#30)] + ├── build keys: [item.i_item_sk (#51): Int32 NULL] + ├── probe keys: [store_sales.ss_item_sk (#30): Int32 NULL] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: - │ └── filter id:0, build key:item.i_item_sk (#51), probe key:store_sales.ss_item_sk (#30), filter type:inlist,min_max + │ └── filter id:0, build key:item.i_item_sk (#51): Int32 NULL, probe key:store_sales.ss_item_sk (#30): Int32 NULL, filter type:inlist,min_max ├── estimated rows: 23500557158.40 ├── Exchange(Build) │ ├── output columns: [item.i_item_sk (#51), item.i_brand_id (#58), item.i_brand (#59)] @@ -68,7 +68,7 @@ Limit │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(item.i_manufact_id (#64) = 128)], limit: NONE] + │ ├── push downs: [filters: [is_true(item.i_manufact_id (#64) = 128): Boolean], limit: NONE] │ └── estimated rows: 81.60 └── TableScan(Probe) ├── table: default.default.store_sales diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index b825e831de176..e74b73c43efe8 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -236,7 +236,7 @@ pub fn format_partial_tree( let filter = plan .predicates .iter() - .map(|pred| pred.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|pred| pred.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .join(", "); let mut children = vec![FormatTreeNode::new(format!("filters: [{filter}]"))]; if let Some(info) = &plan.stat_info { @@ -312,7 +312,10 @@ pub fn format_partial_tree( let table_name = format!("{}.{}.{}", table.catalog(), table.database(), table.name()); let mut children = vec![FormatTreeNode::new(format!("table: {table_name}"))]; if let Some(filters) = &plan.filters { - let filter = filters.filter.as_expr(&BUILTIN_FUNCTIONS).sql_display(); + let filter = filters + .filter + .as_expr(&BUILTIN_FUNCTIONS) + .typed_sql_display(); children.push(FormatTreeNode::new(format!("filters: [{filter}]"))); } append_output_rows_info(&mut children, profs, plan.plan_id); @@ -453,7 +456,7 @@ fn to_format_tree( children.push(FormatTreeNode::new(format!( "branch {}: {}", i, - predicate.as_expr(&BUILTIN_FUNCTIONS).sql_display() + predicate.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display() ))); } else { children.push(FormatTreeNode::new(format!("branch {}: None", i))); @@ -479,7 +482,7 @@ fn to_format_tree( eval_scalar .remote_exprs .iter() - .map(|x| x.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|x| x.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .join(", ") ))); } else { @@ -581,7 +584,12 @@ fn format_mutation_source( let filters = plan .filters .as_ref() - .map(|filters| filters.filter.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|filters| { + filters + .filter + .as_expr(&BUILTIN_FUNCTIONS) + .typed_sql_display() + }) .unwrap_or_default(); let mut children = vec![ FormatTreeNode::new(format!("table: {table_name}")), @@ -657,7 +665,7 @@ fn format_merge_into( |predicate| { format!( "condition: {}", - predicate.as_expr(&BUILTIN_FUNCTIONS).sql_display() + predicate.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display() ) }, ); @@ -675,7 +683,7 @@ fn format_merge_into( format!( "{} = {}", target_schema.field(*field_idx).name(), - expr.as_expr(&BUILTIN_FUNCTIONS).sql_display() + expr.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display() ) }) .join(","); @@ -694,7 +702,7 @@ fn format_merge_into( |predicate| { format!( "condition: {}", - predicate.as_expr(&BUILTIN_FUNCTIONS).sql_display() + predicate.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display() ) }, ); @@ -707,7 +715,7 @@ fn format_merge_into( let values_format = evaluator .2 .iter() - .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .join(","); let unmatched_format = format!( "insert into ({}) values({})", @@ -807,10 +815,12 @@ fn table_scan_to_format_tree( .push_downs .as_ref() .and_then(|extras| { - extras - .filters - .as_ref() - .map(|filters| filters.filter.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + extras.filters.as_ref().map(|filters| { + filters + .filter + .as_expr(&BUILTIN_FUNCTIONS) + .typed_sql_display() + }) }) .unwrap_or_default(); @@ -887,12 +897,12 @@ fn table_scan_to_format_tree( let agg_sel = agg_index .selection .iter() - .map(|(expr, _)| expr.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|(expr, _)| expr.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .join(", "); let agg_filter = agg_index .filter .as_ref() - .map(|f| f.as_expr(&BUILTIN_FUNCTIONS).sql_display()); + .map(|f| f.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()); let text = if let Some(f) = agg_filter { format!("rewritten query: [selection: [{agg_sel}], filter: {f}]") } else { @@ -951,7 +961,7 @@ fn expression_scan_to_format_tree( for (i, value) in plan.values.iter().enumerate() { let column = value .iter() - .map(|val| val.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|val| val.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .join(", "); children.push(FormatTreeNode::new(format!("column {}: [{}]", i, column))); } @@ -1001,7 +1011,7 @@ fn filter_to_format_tree( let filter = plan .predicates .iter() - .map(|pred| pred.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|pred| pred.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .join(", "); let mut children = vec![ FormatTreeNode::new(format!( @@ -1038,7 +1048,7 @@ fn eval_scalar_to_format_tree( let scalars = plan .exprs .iter() - .map(|(expr, _)| expr.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|(expr, _)| expr.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .collect::>() .join(", "); let mut children = vec![ @@ -1466,11 +1476,11 @@ fn range_join_to_format_tree( let left = condition .left_expr .as_expr(&BUILTIN_FUNCTIONS) - .sql_display(); + .typed_sql_display(); let right = condition .right_expr .as_expr(&BUILTIN_FUNCTIONS) - .sql_display(); + .typed_sql_display(); format!("{left} {:?} {right}", condition.operator) }) .collect::>() @@ -1478,7 +1488,7 @@ fn range_join_to_format_tree( let other_conditions = plan .other_conditions .iter() - .map(|filter| filter.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|filter| filter.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .collect::>() .join(", "); @@ -1533,20 +1543,20 @@ fn hash_join_to_format_tree( let build_keys = plan .build_keys .iter() - .map(|scalar| scalar.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|scalar| scalar.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .collect::>() .join(", "); let probe_keys = plan .probe_keys .iter() - .map(|scalar| scalar.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|scalar| scalar.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .collect::>() .join(", "); let is_null_equal = plan.is_null_equal.iter().map(|b| format!("{b}")).join(", "); let filters = plan .non_equi_conditions .iter() - .map(|filter| filter.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|filter| filter.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .collect::>() .join(", "); @@ -1561,8 +1571,8 @@ fn hash_join_to_format_tree( let mut s = format!( "filter id:{}, build key:{}, probe key:{}, filter type:", rf.id, - rf.build_key.as_expr(&BUILTIN_FUNCTIONS).sql_display(), - rf.probe_key.as_expr(&BUILTIN_FUNCTIONS).sql_display(), + rf.build_key.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display(), + rf.probe_key.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display(), ); if rf.enable_bloom_runtime_filter { s += "bloom,"; @@ -1646,7 +1656,7 @@ fn exchange_to_format_tree( "Hash({})", plan.keys .iter() - .map(|key| { key.as_expr(&BUILTIN_FUNCTIONS).sql_display() }) + .map(|key| { key.as_expr(&BUILTIN_FUNCTIONS).typed_sql_display() }) .collect::>() .join(", ") ), @@ -1876,7 +1886,7 @@ fn project_set_to_format_tree( "set returning functions: {}", plan.srf_exprs .iter() - .map(|(expr, _)| expr.clone().as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|(expr, _)| expr.clone().as_expr(&BUILTIN_FUNCTIONS).typed_sql_display()) .collect::>() .join(", ") ))]); diff --git a/src/query/sql/src/planner/format/display_rel_operator.rs b/src/query/sql/src/planner/format/display_rel_operator.rs index 9835bbedf0cdd..fe788002a4667 100644 --- a/src/query/sql/src/planner/format/display_rel_operator.rs +++ b/src/query/sql/src/planner/format/display_rel_operator.rs @@ -36,6 +36,7 @@ use crate::plans::Sort; use crate::plans::Udf; use crate::plans::UnionAll; use crate::plans::Window; +use crate::plans::WindowFuncType; impl OperatorHumanizer for DefaultOperatorHumanizer { fn humanize_operator(&self, id_humanizer: &I, op: &RelOperator) -> FormatTreeNode { @@ -146,6 +147,61 @@ fn format_scalar_item(id_humanizer: &I, item: &ScalarItem) -> St ) } +fn format_window_function_type( + id_humanizer: &I, + func_type: &WindowFuncType, +) -> String { + match func_type { + WindowFuncType::Aggregate(agg) => { + let params = agg + .params + .iter() + .map(|arg| arg.to_string()) + .collect::>() + .join(", "); + let args = agg + .args + .iter() + .map(|item| format_scalar(id_humanizer, item)) + .collect::>() + .join(", "); + format!("{}(param: {}, args: {})", agg.func_name, params, args) + } + WindowFuncType::RowNumber => "row_number".to_string(), + WindowFuncType::Rank => "rank".to_string(), + WindowFuncType::DenseRank => "dense_rank".to_string(), + WindowFuncType::PercentRank => "percent_rank".to_string(), + WindowFuncType::LagLead(ll) => { + let func_name = if ll.is_lag { + "lag".to_string() + } else { + "lead".to_string() + }; + let arg = format_scalar(id_humanizer, ll.arg.as_ref()); + let default = ll.default.as_ref().map_or("NULL".to_string(), |item| { + format_scalar(id_humanizer, item.as_ref()) + }); + format!( + "{}(args: {}, offset: {}, default: {})", + func_name, arg, ll.offset, default + ) + } + WindowFuncType::NthValue(nth) => { + let func_name = nth.n.map_or("last_value".to_string(), |n| { + if n == 1 { + "first_value".to_string() + } else { + format!("{}th_value", n) + } + }); + let arg = format_scalar(id_humanizer, nth.arg.as_ref()); + format!("{}({})", func_name, arg) + } + WindowFuncType::Ntile(ntile) => format!("{} tiles", ntile.n), + WindowFuncType::CumeDist => "cume_dist".to_string(), + } +} + fn scan_to_format_tree(id_humanizer: &I, op: &Scan) -> FormatTreeNode { FormatTreeNode::with_children("Scan".to_string(), vec![ FormatTreeNode::new(format!( @@ -352,7 +408,10 @@ fn sort_to_format_tree(id_humanizer: &I, op: &Sort) -> FormatTre "window top: {}", window.top.map_or("NONE".to_string(), |n| n.to_string()) )), - FormatTreeNode::new(format!("window function: {:?}", window.func)), + FormatTreeNode::new(format!( + "window function: {}", + format_window_function_type(id_humanizer, &window.func) + )), ], None => vec![ FormatTreeNode::new(format!("sort keys: [{}]", scalars)),