Skip to content

Commit 4381967

Browse files
authored
feat: implement is_not_null selectivity based on null count in stats (#16730)
* feat: implement is_not_null selectivity based on null count in stats * fix test
1 parent 959e416 commit 4381967

File tree

8 files changed

+91
-24
lines changed

8 files changed

+91
-24
lines changed

โ€Žsrc/query/sql/src/planner/optimizer/dynamic_sample/filter_selectivity_sample.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,13 @@ pub async fn filter_selectivity_sample(
9595
if let Some(number_scalar) = count.index(0) {
9696
// Compute and return selectivity
9797
let selectivity = number_scalar.to_f64().to_f64().unwrap() / sample_size;
98-
let mut statistics = child_rel_expr.derive_cardinality()?.statistics.clone();
99-
let mut sb = SelectivityEstimator::new(&mut statistics, HashSet::new());
98+
let stat_info = child_rel_expr.derive_cardinality()?;
99+
let mut statistics = stat_info.statistics.clone();
100+
let mut sb = SelectivityEstimator::new(
101+
&mut statistics,
102+
stat_info.cardinality,
103+
HashSet::new(),
104+
);
100105
sb.update_other_statistic_by_selectivity(selectivity);
101106
let stat_info = Arc::new(StatInfo {
102107
cardinality: (selectivity * num_rows as f64).ceil(),

โ€Žsrc/query/sql/src/planner/optimizer/property/selectivity.rs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,19 @@ const ANY_CHAR_SEL: f64 = 0.9; // not 1, since it won't match end-of-string
5252
const FULL_WILDCARD_SEL: f64 = 2.0;
5353

5454
pub struct SelectivityEstimator<'a> {
55+
pub cardinality: f64,
5556
pub input_stat: &'a mut Statistics,
5657
pub updated_column_indexes: HashSet<IndexType>,
5758
}
5859

5960
impl<'a> SelectivityEstimator<'a> {
60-
pub fn new(input_stat: &'a mut Statistics, updated_column_indexes: HashSet<IndexType>) -> Self {
61+
pub fn new(
62+
input_stat: &'a mut Statistics,
63+
cardinality: f64,
64+
updated_column_indexes: HashSet<IndexType>,
65+
) -> Self {
6166
Self {
67+
cardinality,
6268
input_stat,
6369
updated_column_indexes,
6470
}
@@ -102,6 +108,9 @@ impl<'a> SelectivityEstimator<'a> {
102108
if func.func_name.eq("like") {
103109
return self.compute_like_selectivity(func);
104110
}
111+
if func.func_name.eq("is_not_null") {
112+
return self.compute_is_not_null_selectivity(&func.arguments[0]);
113+
}
105114
if let Some(op) = ComparisonOp::try_from_func_name(&func.func_name) {
106115
return self.compute_selectivity_comparison_expr(
107116
op,
@@ -159,6 +168,29 @@ impl<'a> SelectivityEstimator<'a> {
159168
}
160169
}
161170

171+
fn compute_is_not_null_selectivity(&mut self, expr: &ScalarExpr) -> Result<f64> {
172+
match expr {
173+
ScalarExpr::BoundColumnRef(column_ref) => {
174+
let column_stat = if let Some(stat) = self
175+
.input_stat
176+
.column_stats
177+
.get_mut(&column_ref.column.index)
178+
{
179+
stat
180+
} else {
181+
return Ok(DEFAULT_SELECTIVITY);
182+
};
183+
if self.cardinality == 0.0 {
184+
return Ok(0.0);
185+
}
186+
let selectivity =
187+
(self.cardinality - column_stat.null_count as f64) / self.cardinality;
188+
Ok(selectivity)
189+
}
190+
_ => Ok(DEFAULT_SELECTIVITY),
191+
}
192+
}
193+
162194
fn compute_selectivity_comparison_expr(
163195
&mut self,
164196
mut op: ComparisonOp,

โ€Žsrc/query/sql/src/planner/plans/filter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ impl Operator for Filter {
8787
let (input_cardinality, mut statistics) =
8888
(stat_info.cardinality, stat_info.statistics.clone());
8989
// Derive cardinality
90-
let mut sb = SelectivityEstimator::new(&mut statistics, HashSet::new());
90+
let mut sb = SelectivityEstimator::new(&mut statistics, input_cardinality, HashSet::new());
9191
let mut selectivity = MAX_SELECTIVITY;
9292
for pred in self.predicates.iter() {
9393
// Compute selectivity for each conjunction

โ€Žsrc/query/sql/src/planner/plans/scan.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,11 @@ impl Operator for Scan {
271271
column_stats,
272272
};
273273
// Derive cardinality
274-
let mut sb = SelectivityEstimator::new(&mut statistics, HashSet::new());
274+
let mut sb = SelectivityEstimator::new(
275+
&mut statistics,
276+
precise_cardinality as f64,
277+
HashSet::new(),
278+
);
275279
let mut selectivity = MAX_SELECTIVITY;
276280
for pred in prewhere.predicates.iter() {
277281
// Compute selectivity for each conjunction

โ€Žtests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,11 @@ HashJoin
112112
โ”œโ”€โ”€ build keys: [t1.a (#1)]
113113
โ”œโ”€โ”€ probe keys: [t.a (#0)]
114114
โ”œโ”€โ”€ filters: []
115-
โ”œโ”€โ”€ estimated rows: 2.00
115+
โ”œโ”€โ”€ estimated rows: 10.00
116116
โ”œโ”€โ”€ Filter(Build)
117117
โ”‚ โ”œโ”€โ”€ output columns: [t1.a (#1)]
118118
โ”‚ โ”œโ”€โ”€ filters: [is_not_null(t1.a (#1))]
119-
โ”‚ โ”œโ”€โ”€ estimated rows: 2.00
119+
โ”‚ โ”œโ”€โ”€ estimated rows: 10.00
120120
โ”‚ โ””โ”€โ”€ TableScan
121121
โ”‚ โ”œโ”€โ”€ table: default.eliminate_outer_join.t
122122
โ”‚ โ”œโ”€โ”€ output columns: [a (#1)]
@@ -130,7 +130,7 @@ HashJoin
130130
โ””โ”€โ”€ Filter(Probe)
131131
โ”œโ”€โ”€ output columns: [t.a (#0)]
132132
โ”œโ”€โ”€ filters: [is_not_null(t.a (#0))]
133-
โ”œโ”€โ”€ estimated rows: 2.00
133+
โ”œโ”€โ”€ estimated rows: 10.00
134134
โ””โ”€โ”€ TableScan
135135
โ”œโ”€โ”€ table: default.eliminate_outer_join.t
136136
โ”œโ”€โ”€ output columns: [a (#0)]
@@ -151,11 +151,11 @@ HashJoin
151151
โ”œโ”€โ”€ build keys: [t1.a (#1)]
152152
โ”œโ”€โ”€ probe keys: [t.a (#0)]
153153
โ”œโ”€โ”€ filters: []
154-
โ”œโ”€โ”€ estimated rows: 2.00
154+
โ”œโ”€โ”€ estimated rows: 10.00
155155
โ”œโ”€โ”€ Filter(Build)
156156
โ”‚ โ”œโ”€โ”€ output columns: [t1.a (#1)]
157157
โ”‚ โ”œโ”€โ”€ filters: [is_not_null(t1.a (#1))]
158-
โ”‚ โ”œโ”€โ”€ estimated rows: 2.00
158+
โ”‚ โ”œโ”€โ”€ estimated rows: 10.00
159159
โ”‚ โ””โ”€โ”€ TableScan
160160
โ”‚ โ”œโ”€โ”€ table: default.eliminate_outer_join.t
161161
โ”‚ โ”œโ”€โ”€ output columns: [a (#1)]
@@ -169,7 +169,7 @@ HashJoin
169169
โ””โ”€โ”€ Filter(Probe)
170170
โ”œโ”€โ”€ output columns: [t.a (#0)]
171171
โ”œโ”€โ”€ filters: [is_not_null(t.a (#0))]
172-
โ”œโ”€โ”€ estimated rows: 2.00
172+
โ”œโ”€โ”€ estimated rows: 10.00
173173
โ””โ”€โ”€ TableScan
174174
โ”œโ”€โ”€ table: default.eliminate_outer_join.t
175175
โ”œโ”€โ”€ output columns: [a (#0)]
@@ -190,11 +190,11 @@ HashJoin
190190
โ”œโ”€โ”€ build keys: [t1.a (#1)]
191191
โ”œโ”€โ”€ probe keys: [t.a (#0)]
192192
โ”œโ”€โ”€ filters: []
193-
โ”œโ”€โ”€ estimated rows: 2.00
193+
โ”œโ”€โ”€ estimated rows: 10.00
194194
โ”œโ”€โ”€ Filter(Build)
195195
โ”‚ โ”œโ”€โ”€ output columns: [t1.a (#1)]
196196
โ”‚ โ”œโ”€โ”€ filters: [is_not_null(t1.a (#1))]
197-
โ”‚ โ”œโ”€โ”€ estimated rows: 2.00
197+
โ”‚ โ”œโ”€โ”€ estimated rows: 10.00
198198
โ”‚ โ””โ”€โ”€ TableScan
199199
โ”‚ โ”œโ”€โ”€ table: default.eliminate_outer_join.t
200200
โ”‚ โ”œโ”€โ”€ output columns: [a (#1)]
@@ -208,7 +208,7 @@ HashJoin
208208
โ””โ”€โ”€ Filter(Probe)
209209
โ”œโ”€โ”€ output columns: [t.a (#0)]
210210
โ”œโ”€โ”€ filters: [is_not_null(t.a (#0))]
211-
โ”œโ”€โ”€ estimated rows: 2.00
211+
โ”œโ”€โ”€ estimated rows: 10.00
212212
โ””โ”€โ”€ TableScan
213213
โ”œโ”€โ”€ table: default.eliminate_outer_join.t
214214
โ”œโ”€โ”€ output columns: [a (#0)]
@@ -229,11 +229,11 @@ HashJoin
229229
โ”œโ”€โ”€ build keys: [t.a (#0)]
230230
โ”œโ”€โ”€ probe keys: [t1.a (#1)]
231231
โ”œโ”€โ”€ filters: []
232-
โ”œโ”€โ”€ estimated rows: 2.00
232+
โ”œโ”€โ”€ estimated rows: 10.00
233233
โ”œโ”€โ”€ Filter(Build)
234234
โ”‚ โ”œโ”€โ”€ output columns: [t.a (#0)]
235235
โ”‚ โ”œโ”€โ”€ filters: [is_not_null(t.a (#0))]
236-
โ”‚ โ”œโ”€โ”€ estimated rows: 2.00
236+
โ”‚ โ”œโ”€โ”€ estimated rows: 10.00
237237
โ”‚ โ””โ”€โ”€ TableScan
238238
โ”‚ โ”œโ”€โ”€ table: default.eliminate_outer_join.t
239239
โ”‚ โ”œโ”€โ”€ output columns: [a (#0)]
@@ -247,7 +247,7 @@ HashJoin
247247
โ””โ”€โ”€ Filter(Probe)
248248
โ”œโ”€โ”€ output columns: [t1.a (#1)]
249249
โ”œโ”€โ”€ filters: [is_not_null(t1.a (#1))]
250-
โ”œโ”€โ”€ estimated rows: 2.00
250+
โ”œโ”€โ”€ estimated rows: 10.00
251251
โ””โ”€โ”€ TableScan
252252
โ”œโ”€โ”€ table: default.eliminate_outer_join.t
253253
โ”œโ”€โ”€ output columns: [a (#1)]
@@ -268,11 +268,11 @@ HashJoin
268268
โ”œโ”€โ”€ build keys: [t1.a (#1)]
269269
โ”œโ”€โ”€ probe keys: [t.a (#0)]
270270
โ”œโ”€โ”€ filters: []
271-
โ”œโ”€โ”€ estimated rows: 2.00
271+
โ”œโ”€โ”€ estimated rows: 10.00
272272
โ”œโ”€โ”€ Filter(Build)
273273
โ”‚ โ”œโ”€โ”€ output columns: [t1.a (#1)]
274274
โ”‚ โ”œโ”€โ”€ filters: [is_not_null(t1.a (#1))]
275-
โ”‚ โ”œโ”€โ”€ estimated rows: 2.00
275+
โ”‚ โ”œโ”€โ”€ estimated rows: 10.00
276276
โ”‚ โ””โ”€โ”€ TableScan
277277
โ”‚ โ”œโ”€โ”€ table: default.eliminate_outer_join.t
278278
โ”‚ โ”œโ”€โ”€ output columns: [a (#1)]
@@ -286,7 +286,7 @@ HashJoin
286286
โ””โ”€โ”€ Filter(Probe)
287287
โ”œโ”€โ”€ output columns: [t.a (#0)]
288288
โ”œโ”€โ”€ filters: [is_not_null(t.a (#0))]
289-
โ”œโ”€โ”€ estimated rows: 2.00
289+
โ”œโ”€โ”€ estimated rows: 10.00
290290
โ””โ”€โ”€ TableScan
291291
โ”œโ”€โ”€ table: default.eliminate_outer_join.t
292292
โ”œโ”€โ”€ output columns: [a (#0)]

โ€Žtests/sqllogictests/suites/mode/standalone/explain/nullable_prune.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ explain select * from t_nullable_prune where a is not null
3333
Filter
3434
โ”œโ”€โ”€ output columns: [t_nullable_prune.a (#0)]
3535
โ”œโ”€โ”€ filters: [is_not_null(t_nullable_prune.a (#0))]
36-
โ”œโ”€โ”€ estimated rows: 1.20
36+
โ”œโ”€โ”€ estimated rows: 3.00
3737
โ””โ”€โ”€ TableScan
3838
โ”œโ”€โ”€ table: default.default.t_nullable_prune
3939
โ”œโ”€โ”€ output columns: [a (#0)]
@@ -51,7 +51,7 @@ explain select * from t_nullable_prune where a is null
5151
Filter
5252
โ”œโ”€โ”€ output columns: [t_nullable_prune.a (#0)]
5353
โ”œโ”€โ”€ filters: [NOT is_not_null(t_nullable_prune.a (#0))]
54-
โ”œโ”€โ”€ estimated rows: 4.80
54+
โ”œโ”€โ”€ estimated rows: 3.00
5555
โ””โ”€โ”€ TableScan
5656
โ”œโ”€โ”€ table: default.default.t_nullable_prune
5757
โ”œโ”€โ”€ output columns: [a (#0)]
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
statement ok
2+
CREATE OR REPLACE TABLE twocolumn (x INT NULL, y INT NULL);
3+
4+
statement ok
5+
INSERT INTO twocolumn(x, y) VALUES (44,51), (NULL,52), (42,53), (45,45);
6+
7+
query T
8+
explain select * from twocolumn where x is not NULL;
9+
----
10+
Filter
11+
โ”œโ”€โ”€ output columns: [twocolumn.x (#0), twocolumn.y (#1)]
12+
โ”œโ”€โ”€ filters: [is_not_null(twocolumn.x (#0))]
13+
โ”œโ”€โ”€ estimated rows: 3.00
14+
โ””โ”€โ”€ TableScan
15+
โ”œโ”€โ”€ table: default.default.twocolumn
16+
โ”œโ”€โ”€ output columns: [x (#0), y (#1)]
17+
โ”œโ”€โ”€ read rows: 4
18+
โ”œโ”€โ”€ read size: < 1 KiB
19+
โ”œโ”€โ”€ partitions total: 1
20+
โ”œโ”€โ”€ partitions scanned: 1
21+
โ”œโ”€โ”€ pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
22+
โ”œโ”€โ”€ push downs: [filters: [is_not_null(twocolumn.x (#0))], limit: NONE]
23+
โ””โ”€โ”€ estimated rows: 4.00
24+
25+
statement ok
26+
DROP TABLE twocolumn;

โ€Žtests/sqllogictests/suites/mode/standalone/explain_native/nullable_prune.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ TableScan
3939
โ”œโ”€โ”€ partitions scanned: 1
4040
โ”œโ”€โ”€ pruning stats: [segments: <range pruning: 2 to 1>, blocks: <range pruning: 1 to 1>]
4141
โ”œโ”€โ”€ push downs: [filters: [is_not_null(t_nullable_prune.a (#0))], limit: NONE]
42-
โ””โ”€โ”€ estimated rows: 1.20
42+
โ””โ”€โ”€ estimated rows: 3.00
4343

4444
query T
4545
explain select * from t_nullable_prune where a is null
@@ -53,7 +53,7 @@ TableScan
5353
โ”œโ”€โ”€ partitions scanned: 1
5454
โ”œโ”€โ”€ pruning stats: [segments: <range pruning: 2 to 1>, blocks: <range pruning: 1 to 1>]
5555
โ”œโ”€โ”€ push downs: [filters: [NOT is_not_null(t_nullable_prune.a (#0))], limit: NONE]
56-
โ””โ”€โ”€ estimated rows: 4.80
56+
โ””โ”€โ”€ estimated rows: 3.00
5757

5858
statement ok
5959
DROP TABLE default.default.t_nullable_prune

0 commit comments

Comments
ย (0)