Skip to content

Commit 6517e89

Browse files
authored
feat: impl like selectivity compution by probability predication (#14846)
1 parent 0ca00f9 commit 6517e89

File tree

2 files changed

+59
-10
lines changed

2 files changed

+59
-10
lines changed

โ€Žsrc/query/sql/src/planner/optimizer/property/selectivity.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ pub const DEFAULT_SELECTIVITY: f64 = 1f64 / 5f64;
4747
pub const SMALL_SELECTIVITY: f64 = 1f64 / 2500f64;
4848
pub const MAX_SELECTIVITY: f64 = 1f64;
4949

50+
/// Some constants for like predicate selectivity estimation.
51+
const FIXED_CHAR_SEL: f64 = 0.5;
52+
const ANY_CHAR_SEL: f64 = 0.9; // not 1, since it won't match end-of-string
53+
const FULL_WILDCARD_SEL: f64 = 2.0;
54+
5055
pub struct SelectivityEstimator<'a> {
5156
pub input_stat: &'a mut Statistics,
5257
pub updated_column_indexes: HashSet<IndexType>,
@@ -95,6 +100,9 @@ impl<'a> SelectivityEstimator<'a> {
95100
}
96101

97102
ScalarExpr::FunctionCall(func) => {
103+
if func.func_name.eq("like") {
104+
return self.compute_like_selectivity(func);
105+
}
98106
if let Some(op) = ComparisonOp::try_from_func_name(&func.func_name) {
99107
return self.compute_selectivity_comparison_expr(
100108
op,
@@ -111,6 +119,47 @@ impl<'a> SelectivityEstimator<'a> {
111119
})
112120
}
113121

122+
// The method uses probability predication to compute like selectivity.
123+
// The core idea is from postgresql.
124+
fn compute_like_selectivity(&mut self, func: &FunctionCall) -> Result<f64> {
125+
let right = &func.arguments[1];
126+
if let ScalarExpr::ConstantExpr(ConstantExpr {
127+
value: Scalar::String(patt),
128+
..
129+
}) = right
130+
{
131+
let mut sel = 1.0_f64;
132+
133+
// Skip any leading %; it's already factored into initial sel
134+
let mut chars = patt.chars().peekable();
135+
if matches!(chars.peek(), Some(&'%') | Some(&'_')) {
136+
chars.next(); // consume the leading %
137+
}
138+
139+
while let Some(c) = chars.next() {
140+
match c {
141+
'%' => sel *= FULL_WILDCARD_SEL,
142+
'_' => sel *= ANY_CHAR_SEL,
143+
'\\' => {
144+
if chars.peek().is_some() {
145+
chars.next();
146+
}
147+
sel *= FIXED_CHAR_SEL;
148+
}
149+
_ => sel *= FIXED_CHAR_SEL,
150+
}
151+
}
152+
153+
// Could get sel > 1 if multiple wildcards
154+
if sel > 1.0 {
155+
sel = 1.0;
156+
}
157+
Ok(sel)
158+
} else {
159+
Ok(DEFAULT_SELECTIVITY)
160+
}
161+
}
162+
114163
fn compute_selectivity_comparison_expr(
115164
&mut self,
116165
op: ComparisonOp,

โ€Žtests/sqllogictests/suites/tpch/queries.test

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,17 +1562,17 @@ HashJoin: INNER
15621562
โ”‚ โ”‚ โ””โ”€โ”€ Probe
15631563
โ”‚ โ”‚ โ””โ”€โ”€ Scan: default.tpch_test.supplier (#1) (read rows: 1000)
15641564
โ”‚ โ””โ”€โ”€ Probe
1565-
โ”‚ โ””โ”€โ”€ Scan: default.tpch_test.partsupp (#3) (read rows: 80000)
1565+
โ”‚ โ””โ”€โ”€ HashJoin: INNER
1566+
โ”‚ โ”œโ”€โ”€ Build
1567+
โ”‚ โ”‚ โ””โ”€โ”€ HashJoin: INNER
1568+
โ”‚ โ”‚ โ”œโ”€โ”€ Build
1569+
โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ Scan: default.tpch_test.part (#0) (read rows: 20000)
1570+
โ”‚ โ”‚ โ””โ”€โ”€ Probe
1571+
โ”‚ โ”‚ โ””โ”€โ”€ Scan: default.tpch_test.lineitem (#2) (read rows: 600572)
1572+
โ”‚ โ””โ”€โ”€ Probe
1573+
โ”‚ โ””โ”€โ”€ Scan: default.tpch_test.orders (#4) (read rows: 150000)
15661574
โ””โ”€โ”€ Probe
1567-
โ””โ”€โ”€ HashJoin: INNER
1568-
โ”œโ”€โ”€ Build
1569-
โ”‚ โ””โ”€โ”€ HashJoin: INNER
1570-
โ”‚ โ”œโ”€โ”€ Build
1571-
โ”‚ โ”‚ โ””โ”€โ”€ Scan: default.tpch_test.part (#0) (read rows: 20000)
1572-
โ”‚ โ””โ”€โ”€ Probe
1573-
โ”‚ โ””โ”€โ”€ Scan: default.tpch_test.lineitem (#2) (read rows: 600572)
1574-
โ””โ”€โ”€ Probe
1575-
โ””โ”€โ”€ Scan: default.tpch_test.orders (#4) (read rows: 150000)
1575+
โ””โ”€โ”€ Scan: default.tpch_test.partsupp (#3) (read rows: 80000)
15761576

15771577
# Q10
15781578
query I

0 commit comments

Comments
ย (0)