Skip to content

Commit 5d4007e

Browse files
committed
Move all filter preds to prewhere.
1 parent 7976d88 commit 5d4007e

File tree

5 files changed

+59
-95
lines changed

5 files changed

+59
-95
lines changed

src/query/service/src/sql/planner/optimizer/heuristic/prewhere_optimization.rs

Lines changed: 18 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -48,59 +48,50 @@ impl PrewhereOptimizer {
4848
}
4949
}
5050

51-
fn collect_columns(expr: &Scalar, columns: &mut ColumnSet) {
51+
fn collect_columns_impl(expr: &Scalar, columns: &mut ColumnSet) {
5252
match expr {
5353
Scalar::BoundColumnRef(column) => {
5454
columns.insert(column.column.index);
5555
}
5656
Scalar::AndExpr(and) => {
57-
Self::collect_columns(and.left.as_ref(), columns);
58-
Self::collect_columns(and.right.as_ref(), columns);
57+
Self::collect_columns_impl(and.left.as_ref(), columns);
58+
Self::collect_columns_impl(and.right.as_ref(), columns);
5959
}
6060
Scalar::OrExpr(or) => {
61-
Self::collect_columns(or.left.as_ref(), columns);
62-
Self::collect_columns(or.right.as_ref(), columns);
61+
Self::collect_columns_impl(or.left.as_ref(), columns);
62+
Self::collect_columns_impl(or.right.as_ref(), columns);
6363
}
6464
Scalar::ComparisonExpr(cmp) => {
65-
Self::collect_columns(cmp.left.as_ref(), columns);
66-
Self::collect_columns(cmp.right.as_ref(), columns);
65+
Self::collect_columns_impl(cmp.left.as_ref(), columns);
66+
Self::collect_columns_impl(cmp.right.as_ref(), columns);
6767
}
6868
Scalar::FunctionCall(func) => {
6969
for arg in func.arguments.iter() {
70-
Self::collect_columns(arg, columns);
70+
Self::collect_columns_impl(arg, columns);
7171
}
7272
}
7373
Scalar::CastExpr(cast) => {
74-
Self::collect_columns(cast.argument.as_ref(), columns);
74+
Self::collect_columns_impl(cast.argument.as_ref(), columns);
7575
}
7676
// 1. ConstantExpr is not collected.
77-
// 2. SubqueryExpr is not collected.
78-
// 3. AggregateFunction will not appear in where clause.
77+
// 2. SubqueryExpr and AggregateFunction will not appear in Filter-LogicalGet
7978
_ => {}
8079
}
8180
}
8281

8382
// analyze if the expression can be moved to prewhere
84-
fn analyze(expr: &Scalar, columns_to_scan: usize) -> (bool, ColumnSet) {
83+
fn collect_columns(expr: &Scalar) -> ColumnSet {
8584
let mut columns = ColumnSet::new();
86-
8785
// columns in subqueries are not considered
88-
Self::collect_columns(expr, &mut columns);
86+
Self::collect_columns_impl(expr, &mut columns);
8987

90-
// viable conditions:
91-
// 1. Condition depend on some column. Constant expressions are not moved.
92-
// 2. Do not move conditions involving all queried columns.
93-
// 3. Only current table columns are considered. (This condition is always true in current Pattern (Filter -> LogicalGet)).
94-
(
95-
!columns.is_empty() && columns.len() < columns_to_scan,
96-
columns,
97-
)
88+
columns
9889
}
9990

10091
pub fn prewhere_optimize(&self, s_expr: SExpr) -> Result<SExpr> {
10192
let rel_op = s_expr.plan();
10293
if s_expr.match_pattern(&self.pattern) {
103-
let mut filter: Filter = s_expr.plan().clone().try_into()?;
94+
let filter: Filter = s_expr.plan().clone().try_into()?;
10495
let mut get: LogicalGet = s_expr.child(0)?.plan().clone().try_into()?;
10596
let metadata = self.metadata.read().clone();
10697

@@ -112,19 +103,12 @@ impl PrewhereOptimizer {
112103

113104
let mut prewhere_columns = ColumnSet::new();
114105
let mut prewhere_pred = Vec::new();
115-
let mut remain_pred = Vec::new();
116-
117-
let columns_to_scan = get.columns.len();
118106

119107
// filter.predicates are already splited by AND
120108
for pred in filter.predicates.iter() {
121-
let (viable, columns) = Self::analyze(pred, columns_to_scan);
122-
if viable {
123-
prewhere_pred.push(pred.clone());
124-
prewhere_columns.extend(&columns);
125-
} else {
126-
remain_pred.push(pred.clone());
127-
}
109+
let columns = Self::collect_columns(pred);
110+
prewhere_pred.push(pred.clone());
111+
prewhere_columns.extend(&columns);
128112
}
129113

130114
get.prewhere = if prewhere_pred.is_empty() {
@@ -137,15 +121,7 @@ impl PrewhereOptimizer {
137121
})
138122
};
139123

140-
if !remain_pred.is_empty() {
141-
filter.predicates = remain_pred;
142-
Ok(SExpr::create_unary(
143-
filter.into(),
144-
SExpr::create_leaf(get.into()),
145-
))
146-
} else {
147-
Ok(SExpr::create_leaf(get.into()))
148-
}
124+
Ok(SExpr::create_leaf(get.into()))
149125
} else {
150126
let children = s_expr
151127
.children()

tests/logictest/suites/mode/cluster/04_0002_explain_v2

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,13 @@ Exchange
1818
├── exchange type: Merge
1919
└── Project
2020
├── columns: [a (#0)]
21-
└── Filter
22-
├── filters: [>(t1.a (#0), 0)]
23-
└── TableScan
24-
├── table: default.default.t1
25-
├── read rows: 0
26-
├── read bytes: 0
27-
├── partitions total: 0
28-
├── partitions scanned: 0
29-
└── push downs: [filters: [(a > 0)], limit: NONE]
21+
└── TableScan
22+
├── table: default.default.t1
23+
├── read rows: 0
24+
├── read bytes: 0
25+
├── partitions total: 0
26+
├── partitions scanned: 0
27+
└── push downs: [filters: [(a > 0)], limit: NONE]
3028

3129
statement query T
3230
explain select * from t1, t2 where (t1.a = t2.a and t1.a > 3) or (t1.a = t2.a and t2.a > 5 and t1.a > 1);

tests/logictest/suites/mode/standalone/explain/explain.test

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,13 @@ explain select t1.a from t1 where a > 0;
1616
----
1717
Project
1818
├── columns: [a (#0)]
19-
└── Filter
20-
├── filters: [>(t1.a (#0), 0)]
21-
└── TableScan
22-
├── table: default.default.t1
23-
├── read rows: 0
24-
├── read bytes: 0
25-
├── partitions total: 0
26-
├── partitions scanned: 0
27-
└── push downs: [filters: [(a > 0)], limit: NONE]
19+
└── TableScan
20+
├── table: default.default.t1
21+
├── read rows: 0
22+
├── read bytes: 0
23+
├── partitions total: 0
24+
├── partitions scanned: 0
25+
└── push downs: [filters: [(a > 0)], limit: NONE]
2826

2927
statement query T
3028
explain select * from t1, t2 where (t1.a = t2.a and t1.a > 3) or (t1.a = t2.a and t2.a > 5 and t1.a > 1);

tests/logictest/suites/mode/standalone/explain/fold_count.test

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,13 @@ Project
4545
└── AggregatePartial
4646
├── group by: []
4747
├── aggregate functions: [count()]
48-
└── Filter
49-
├── filters: [>(t.number (#0), 10)]
50-
└── TableScan
51-
├── table: default.default.t
52-
├── read rows: 1000
53-
├── read bytes: 4028
54-
├── partitions total: 2
55-
├── partitions scanned: 1
56-
└── push downs: [filters: [(number > 10)], limit: NONE]
48+
└── TableScan
49+
├── table: default.default.t
50+
├── read rows: 1000
51+
├── read bytes: 4028
52+
├── partitions total: 2
53+
├── partitions scanned: 1
54+
└── push downs: [filters: [(number > 10)], limit: NONE]
5755

5856
statement ok
5957
drop table t;

tests/logictest/suites/mode/standalone/explain/where_optimizaiton.test renamed to tests/logictest/suites/mode/standalone/explain/prewhere_optimizaiton.test

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,43 +10,37 @@ explain select a from t_where_optimizer where a = 1;
1010
----
1111
Project
1212
├── columns: [a (#0)]
13-
└── Filter
14-
├── filters: [=(t_where_optimizer.a (#0), 1)]
15-
└── TableScan
16-
├── table: default.default.t_where_optimizer
17-
├── read rows: 0
18-
├── read bytes: 0
19-
├── partitions total: 0
20-
├── partitions scanned: 0
21-
└── push downs: [filters: [(a = 1)], limit: NONE]
22-
23-
statement query T
24-
explain select * from t_where_optimizer where a = b;
25-
26-
----
27-
Filter
28-
├── filters: [=(t_where_optimizer.a (#0), t_where_optimizer.b (#1))]
2913
└── TableScan
3014
├── table: default.default.t_where_optimizer
3115
├── read rows: 0
3216
├── read bytes: 0
3317
├── partitions total: 0
3418
├── partitions scanned: 0
35-
└── push downs: [filters: [(a = b)], limit: NONE]
19+
└── push downs: [filters: [(a = 1)], limit: NONE]
20+
21+
statement query T
22+
explain select * from t_where_optimizer where a = b;
23+
24+
----
25+
TableScan
26+
├── table: default.default.t_where_optimizer
27+
├── read rows: 0
28+
├── read bytes: 0
29+
├── partitions total: 0
30+
├── partitions scanned: 0
31+
└── push downs: [filters: [(a = b)], limit: NONE]
3632

3733
statement query T
3834
explain select * from t_where_optimizer where a = 1 or b > 2;
3935

4036
----
41-
Filter
42-
├── filters: [or(=(t_where_optimizer.a (#0), 1), >(t_where_optimizer.b (#1), 2))]
43-
└── TableScan
44-
├── table: default.default.t_where_optimizer
45-
├── read rows: 0
46-
├── read bytes: 0
47-
├── partitions total: 0
48-
├── partitions scanned: 0
49-
└── push downs: [filters: [((a = 1) or (b > 2))], limit: NONE]
37+
TableScan
38+
├── table: default.default.t_where_optimizer
39+
├── read rows: 0
40+
├── read bytes: 0
41+
├── partitions total: 0
42+
├── partitions scanned: 0
43+
└── push downs: [filters: [((a = 1) or (b > 2))], limit: NONE]
5044

5145
statement query T
5246
explain select * from t_where_optimizer where a = 1 and b > 2;

0 commit comments

Comments
 (0)