Skip to content

Commit 4f7fbc4

Browse files
authored
fix(query): fix union all project error (#18323)
* fix(query): fix union all project error * fix(query): fix union all project error * update
1 parent 1c4a25a commit 4f7fbc4

File tree

2 files changed

+203
-68
lines changed

2 files changed

+203
-68
lines changed

src/query/sql/src/executor/physical_plans/physical_union_all.rs

Lines changed: 73 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -62,41 +62,47 @@ impl PhysicalPlanBuilder {
6262
let lazy_columns = metadata.lazy_columns();
6363
required.extend(lazy_columns);
6464

65+
// Use left's output columns as the offset indices
6566
// if the union has a CTE, the output columns are not filtered
6667
// otherwise, if the output columns of the union do not contain the columns used by the plan in the union, the expression will fail to obtain data.
67-
let (left_required, right_required) = if !union_all.cte_scan_names.is_empty() {
68-
let left: ColumnSet = union_all
69-
.left_outputs
70-
.iter()
71-
.map(|(index, _)| *index)
72-
.collect();
73-
let right: ColumnSet = union_all
74-
.right_outputs
75-
.iter()
76-
.map(|(index, _)| *index)
77-
.collect();
78-
79-
(left, right)
80-
} else {
81-
let indices: Vec<usize> = (0..union_all.left_outputs.len())
82-
.filter(|index| required.contains(&union_all.output_indexes[*index]))
83-
.collect();
84-
if indices.is_empty() {
85-
(
86-
ColumnSet::from([union_all.left_outputs[0].0]),
87-
ColumnSet::from([union_all.right_outputs[0].0]),
88-
)
68+
let (offset_indices, left_required, right_required) =
69+
if !union_all.cte_scan_names.is_empty() {
70+
let left: ColumnSet = union_all
71+
.left_outputs
72+
.iter()
73+
.map(|(index, _)| *index)
74+
.collect();
75+
let right: ColumnSet = union_all
76+
.right_outputs
77+
.iter()
78+
.map(|(index, _)| *index)
79+
.collect();
80+
81+
let offset_indices: Vec<usize> = (0..union_all.left_outputs.len()).collect();
82+
(offset_indices, left, right)
8983
} else {
90-
indices.iter().fold(
91-
(ColumnSet::default(), ColumnSet::default()),
92-
|(mut left, mut right), &index| {
93-
left.insert(union_all.left_outputs[index].0);
94-
right.insert(union_all.right_outputs[index].0);
95-
(left, right)
96-
},
97-
)
98-
}
99-
};
84+
let offset_indices: Vec<usize> = (0..union_all.left_outputs.len())
85+
.filter(|index| required.contains(&union_all.output_indexes[*index]))
86+
.collect();
87+
88+
if offset_indices.is_empty() {
89+
(
90+
vec![0],
91+
ColumnSet::from([union_all.left_outputs[0].0]),
92+
ColumnSet::from([union_all.right_outputs[0].0]),
93+
)
94+
} else {
95+
offset_indices.iter().fold(
96+
(vec![], ColumnSet::default(), ColumnSet::default()),
97+
|(mut offset_indices, mut left, mut right), &index| {
98+
left.insert(union_all.left_outputs[index].0);
99+
right.insert(union_all.right_outputs[index].0);
100+
offset_indices.push(index);
101+
(offset_indices, left, right)
102+
},
103+
)
104+
}
105+
};
100106

101107
// 2. Build physical plan.
102108
let left_plan = self.build(s_expr.child(0)?, left_required.clone()).await?;
@@ -105,28 +111,28 @@ impl PhysicalPlanBuilder {
105111
let left_schema = left_plan.output_schema()?;
106112
let right_schema = right_plan.output_schema()?;
107113

108-
let fields = union_all
109-
.left_outputs
110-
.iter()
111-
.enumerate()
112-
.filter(|(_, (index, _))| left_required.contains(index))
113-
.map(|(i, (index, expr))| {
114-
let data_type = if let Some(expr) = expr {
115-
expr.data_type()?
116-
} else {
117-
left_schema
118-
.field_with_name(&index.to_string())?
119-
.data_type()
120-
.clone()
121-
};
122-
let output_index = union_all.output_indexes[i];
123-
Ok(DataField::new(&output_index.to_string(), data_type))
124-
})
125-
.collect::<Result<Vec<_>>>()?;
126-
127-
let left_outputs = process_outputs(&union_all.left_outputs, &left_required, &left_schema)?;
114+
let left_outputs = process_outputs(&union_all.left_outputs, &offset_indices, &left_schema)?;
128115
let right_outputs =
129-
process_outputs(&union_all.right_outputs, &right_required, &right_schema)?;
116+
process_outputs(&union_all.right_outputs, &offset_indices, &right_schema)?;
117+
118+
let mut fields = Vec::with_capacity(offset_indices.len());
119+
for offset in offset_indices {
120+
let index = union_all.output_indexes[offset];
121+
let data_type = if let Some(scalar_expr) = &union_all.left_outputs[offset].1 {
122+
let expr = scalar_expr
123+
.type_check(left_schema.as_ref())?
124+
.project_column_ref(|idx| left_schema.index_of(&idx.to_string()).unwrap());
125+
expr.data_type().clone()
126+
} else {
127+
let col_index = union_all.left_outputs[offset].0;
128+
left_schema
129+
.field_with_name(&col_index.to_string())?
130+
.data_type()
131+
.clone()
132+
};
133+
134+
fields.push(DataField::new(&index.to_string(), data_type));
135+
}
130136

131137
Ok(PhysicalPlan::UnionAll(UnionAll {
132138
plan_id: 0,
@@ -144,21 +150,20 @@ impl PhysicalPlanBuilder {
144150

145151
fn process_outputs(
146152
outputs: &[(IndexType, Option<ScalarExpr>)],
147-
required: &ColumnSet,
153+
offset_indices: &[usize],
148154
schema: &DataSchema,
149155
) -> Result<Vec<(IndexType, Option<RemoteExpr>)>> {
150-
outputs
151-
.iter()
152-
.filter(|(index, _)| required.contains(index))
153-
.map(|(index, scalar_expr)| {
154-
if let Some(scalar_expr) = scalar_expr {
155-
let expr = scalar_expr
156-
.type_check(schema)?
157-
.project_column_ref(|idx| schema.index_of(&idx.to_string()).unwrap());
158-
Ok((*index, Some(expr.as_remote_expr())))
159-
} else {
160-
Ok((*index, None))
161-
}
162-
})
163-
.collect()
156+
let mut results = Vec::with_capacity(offset_indices.len());
157+
for index in offset_indices {
158+
let output = &outputs[*index];
159+
if let Some(scalar_expr) = &output.1 {
160+
let expr = scalar_expr
161+
.type_check(schema)?
162+
.project_column_ref(|idx| schema.index_of(&idx.to_string()).unwrap());
163+
results.push((output.0, Some(expr.as_remote_expr())));
164+
} else {
165+
results.push((output.0, None));
166+
}
167+
}
168+
Ok(results)
164169
}
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
statement ok
2+
create or replace database union_case;
3+
4+
statement ok
5+
use union_case;
6+
7+
statement ok
8+
CREATE OR REPLACE TABLE abc (
9+
id STRING,
10+
timestamp_col TIMESTAMP,
11+
category STRING,
12+
type_code STRING,
13+
type_name STRING,
14+
level_code STRING,
15+
class_code STRING,
16+
class_name STRING,
17+
sub_class_code STRING,
18+
region_code STRING,
19+
region_name STRING,
20+
sub_region_code STRING,
21+
sub_region_name STRING
22+
);
23+
24+
statement ok
25+
INSERT INTO abc VALUES
26+
('001', '2024-01-15 10:00:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1000', 'region1', '', ''),
27+
('002', '2024-02-20 14:30:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1000', 'region1', '', ''),
28+
('003', '2024-03-10 09:15:00', 'cat2', 'T002', 'type2', 'L1', 'C002', 'class2', 'SC002', 'R1000', 'region1', '', ''),
29+
('004', '2024-04-05 16:45:00', 'cat2', 'T002', 'type2', 'L1', 'C002', 'class2', 'SC002', 'R1000', 'region1', '', ''),
30+
('005', '2024-01-25 11:20:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1100', 'region2', '', ''),
31+
('006', '2024-02-28 13:40:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1100', 'region2', '', ''),
32+
('007', '2024-03-15 10:30:00', 'cat2', 'T002', 'type2', 'L1', 'C002', 'class2', 'SC002', 'R1200', 'region3', '', ''),
33+
('008', '2024-04-10 15:20:00', 'cat2', 'T002', 'type2', 'L1', 'C002', 'class2', 'SC002', 'R1200', 'region3', '', ''),
34+
('009', '2024-01-30 09:45:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1100', 'region2', 'SR102', 'subregion1'),
35+
('010', '2024-02-15 14:10:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1100', 'region2', 'SR103', 'subregion2'),
36+
('011', '2024-03-20 11:50:00', 'cat2', 'T002', 'type2', 'L1', 'C002', 'class2', 'SC002', 'R1200', 'region3', 'SR203', 'subregion3'),
37+
('012', '2024-04-15 16:30:00', 'cat2', 'T002', 'type2', 'L1', 'C002', 'class2', 'SC002', 'R1200', 'region3', 'SR205', 'subregion4'),
38+
('101', '2023-01-15 10:00:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1000', 'region1', '', ''),
39+
('102', '2023-02-20 14:30:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1000', 'region1', '', ''),
40+
('103', '2023-03-10 09:15:00', 'cat2', 'T002', 'type2', 'L1', 'C002', 'class2', 'SC002', 'R1000', 'region1', '', ''),
41+
('105', '2023-01-25 11:20:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1100', 'region2', '', ''),
42+
('106', '2023-02-28 13:40:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1100', 'region2', '', ''),
43+
('107', '2023-03-15 10:30:00', 'cat2', 'T002', 'type2', 'L1', 'C002', 'class2', 'SC002', 'R1200', 'region3', '', ''),
44+
('109', '2023-01-30 09:45:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1100', 'region2', 'SR102', 'subregion5'),
45+
('110', '2023-02-15 14:10:00', 'cat1', 'T001', 'type1', 'L1', 'C001', 'class1', 'SC001', 'R1100', 'region2', 'SR103', 'subregion6'),
46+
('111', '2023-03-20 11:50:00', 'cat2', 'T002', 'type2', 'L1', 'C002', 'class2', 'SC002', 'R1200', 'region3', 'SR203', 'subregion7');
47+
48+
49+
statement ok
50+
create or replace table test3 as SELECT 'CODE1' AS code_id, location AS location_name FROM (SELECT '' AS location, '' AS parent_region, a.count_val AS count_val, CASE WHEN b.count_val IS NULL THEN - 1 ELSE round(a.count_val / b.count_val - 1, 6) * 100 END AS percent_diff, 1 AS rank, a.sub_class_code, '' AS current_name, '' AS parent_name FROM (SELECT category, type_code, type_name, level_code, class_code, class_name, sub_class_code, COUNT(DISTINCT id) AS count_val FROM abc GROUP BY category, type_code, type_name, level_code, class_code, class_name, sub_class_code) AS a LEFT OUTER JOIN (SELECT category, type_code, type_name, level_code, class_code, class_name, 3 AS count_val FROM abc) AS b ON a.category = b.category AND a.type_name = b.type_name AND a.level_code = b.level_code AND a.class_code = b.class_code AND a.class_name = b.class_name UNION ALL SELECT a.sub_region_name AS location, a.region_code AS parent_region, a.count_val AS count_val, CASE WHEN b.count_val IS NULL THEN - 1 ELSE round(a.count_val / b.count_val - 1, 6) * 100 END AS percent_diff, 1 AS rank, a.sub_class_code, a.sub_region_name AS current_name, a.region_name AS parent_name FROM (SELECT category, type_code, type_name, level_code, class_code, class_name, sub_class_code, sub_region_code, sub_region_name, region_code, region_name, COUNT(DISTINCT id) AS count_val FROM abc AS t1 GROUP BY category, type_code, type_name, level_code, class_code, class_name, sub_class_code, sub_region_code, sub_region_name, region_code, region_name) AS a LEFT OUTER JOIN (SELECT category, type_code, type_name, level_code, class_code, class_name, sub_region_code, sub_region_name, region_code, COUNT(DISTINCT id) AS count_val FROM abc AS t2 GROUP BY category, type_code, type_name, level_code, class_code, class_name, sub_region_code, sub_region_name, region_code) AS b ON a.category = b.category AND a.type_name = b.type_name AND a.level_code = b.level_code AND a.class_code = b.class_code AND a.class_name = b.class_name AND a.sub_region_code = b.sub_region_code) AS t3;
51+
52+
53+
statement ok
54+
insert into test3 SELECT 'CODE1' AS code_id, location AS location_name FROM (SELECT '' AS location, '' AS parent_region, a.count_val AS count_val, CASE WHEN b.count_val IS NULL THEN - 1 ELSE round(a.count_val / b.count_val - 1, 6) * 100 END AS percent_diff, 1 AS rank, a.sub_class_code, '' AS current_name, '' AS parent_name FROM (SELECT category, type_code, type_name, level_code, class_code, class_name, sub_class_code, COUNT(DISTINCT id) AS count_val FROM abc GROUP BY category, type_code, type_name, level_code, class_code, class_name, sub_class_code) AS a LEFT OUTER JOIN (SELECT category, type_code, type_name, level_code, class_code, class_name, 3 AS count_val FROM abc) AS b ON a.category = b.category AND a.type_name = b.type_name AND a.level_code = b.level_code AND a.class_code = b.class_code AND a.class_name = b.class_name UNION ALL SELECT a.sub_region_name AS location, a.region_code AS parent_region, a.count_val AS count_val, CASE WHEN b.count_val IS NULL THEN - 1 ELSE round(a.count_val / b.count_val - 1, 6) * 100 END AS percent_diff, 1 AS rank, a.sub_class_code, a.sub_region_name AS current_name, a.region_name AS parent_name FROM (SELECT category, type_code, type_name, level_code, class_code, class_name, sub_class_code, sub_region_code, sub_region_name, region_code, region_name, COUNT(DISTINCT id) AS count_val FROM abc AS t1 GROUP BY category, type_code, type_name, level_code, class_code, class_name, sub_class_code, sub_region_code, sub_region_name, region_code, region_name) AS a LEFT OUTER JOIN (SELECT category, type_code, type_name, level_code, class_code, class_name, sub_region_code, sub_region_name, region_code, COUNT(DISTINCT id) AS count_val FROM abc AS t2 GROUP BY category, type_code, type_name, level_code, class_code, class_name, sub_region_code, sub_region_name, region_code) AS b ON a.category = b.category AND a.type_name = b.type_name AND a.level_code = b.level_code AND a.class_code = b.class_code AND a.class_name = b.class_name AND a.sub_region_code = b.sub_region_code) AS t3;
55+
56+
57+
query TT rowsort
58+
SELECT 'CODE1' AS code_id, location AS location_name FROM (SELECT '' AS location, '' AS parent_region, a.count_val AS count_val, CASE WHEN b.count_val IS NULL THEN - 1 ELSE round(a.count_val / b.count_val - 1, 6) * 100 END AS percent_diff, 1 AS rank, a.sub_class_code, '' AS current_name, '' AS parent_name FROM (SELECT category, type_code, type_name, level_code, class_code, class_name, sub_class_code, COUNT(DISTINCT id) AS count_val FROM abc GROUP BY category, type_code, type_name, level_code, class_code, class_name, sub_class_code) AS a LEFT OUTER JOIN (SELECT category, type_code, type_name, level_code, class_code, class_name, 3 AS count_val FROM abc) AS b ON a.category = b.category AND a.type_name = b.type_name AND a.level_code = b.level_code AND a.class_code = b.class_code AND a.class_name = b.class_name UNION ALL SELECT a.sub_region_name AS location, a.region_code AS parent_region, a.count_val AS count_val, CASE WHEN b.count_val IS NULL THEN - 1 ELSE round(a.count_val / b.count_val - 1, 6) * 100 END AS percent_diff, 1 AS rank, a.sub_class_code, a.sub_region_name AS current_name, a.region_name AS parent_name FROM (SELECT category, type_code, type_name, level_code, class_code, class_name, sub_class_code, sub_region_code, sub_region_name, region_code, region_name, COUNT(DISTINCT id) AS count_val FROM abc AS t1 GROUP BY category, type_code, type_name, level_code, class_code, class_name, sub_class_code, sub_region_code, sub_region_name, region_code, region_name) AS a LEFT OUTER JOIN (SELECT category, type_code, type_name, level_code, class_code, class_name, sub_region_code, sub_region_name, region_code, COUNT(DISTINCT id) AS count_val FROM abc AS t2 GROUP BY category, type_code, type_name, level_code, class_code, class_name, sub_region_code, sub_region_name, region_code) AS b ON a.category = b.category AND a.type_name = b.type_name AND a.level_code = b.level_code AND a.class_code = b.class_code AND a.class_name = b.class_name AND a.sub_region_code = b.sub_region_code) AS t3;
59+
----
60+
CODE1 (empty)
61+
CODE1 (empty)
62+
CODE1 (empty)
63+
CODE1 (empty)
64+
CODE1 (empty)
65+
CODE1 (empty)
66+
CODE1 (empty)
67+
CODE1 (empty)
68+
CODE1 (empty)
69+
CODE1 (empty)
70+
CODE1 (empty)
71+
CODE1 (empty)
72+
CODE1 (empty)
73+
CODE1 (empty)
74+
CODE1 (empty)
75+
CODE1 (empty)
76+
CODE1 (empty)
77+
CODE1 (empty)
78+
CODE1 (empty)
79+
CODE1 (empty)
80+
CODE1 (empty)
81+
CODE1 (empty)
82+
CODE1 (empty)
83+
CODE1 (empty)
84+
CODE1 (empty)
85+
CODE1 (empty)
86+
CODE1 (empty)
87+
CODE1 (empty)
88+
CODE1 (empty)
89+
CODE1 subregion1
90+
CODE1 subregion1
91+
CODE1 subregion2
92+
CODE1 subregion2
93+
CODE1 subregion3
94+
CODE1 subregion3
95+
CODE1 subregion4
96+
CODE1 subregion5
97+
CODE1 subregion5
98+
CODE1 subregion6
99+
CODE1 subregion6
100+
CODE1 subregion7
101+
CODE1 subregion7
102+
103+
104+
statement ok
105+
create or replace table test(a string, b string, c string);
106+
107+
statement ok
108+
insert into test values ('a', 'b', 'c'), ('d', 'e', 'f');
109+
110+
query TTT rowsort
111+
select '1' as a , b, c from test union all select a, a as b, a as c from test;
112+
----
113+
1 b c
114+
1 e f
115+
a a a
116+
d d d
117+
118+
query TTT rowsort
119+
select 'xx' as j, a, b from (select '1' as a , b, c from test union all select a, a as b, a as c from test);
120+
----
121+
xx 1 b
122+
xx 1 e
123+
xx a a
124+
xx d d
125+
126+
statement ok
127+
drop table abc;
128+
129+
statement ok
130+
drop table test3;

0 commit comments

Comments
 (0)