@@ -47,6 +47,11 @@ pub const DEFAULT_SELECTIVITY: f64 = 1f64 / 5f64;
47
47
pub const SMALL_SELECTIVITY : f64 = 1f64 / 2500f64 ;
48
48
pub const MAX_SELECTIVITY : f64 = 1f64 ;
49
49
50
+ /// Some constants for like predicate selectivity estimation.
51
+ const FIXED_CHAR_SEL : f64 = 0.5 ;
52
+ const ANY_CHAR_SEL : f64 = 0.9 ; // not 1, since it won't match end-of-string
53
+ const FULL_WILDCARD_SEL : f64 = 2.0 ;
54
+
50
55
pub struct SelectivityEstimator < ' a > {
51
56
pub input_stat : & ' a mut Statistics ,
52
57
pub updated_column_indexes : HashSet < IndexType > ,
@@ -95,6 +100,9 @@ impl<'a> SelectivityEstimator<'a> {
95
100
}
96
101
97
102
ScalarExpr :: FunctionCall ( func) => {
103
+ if func. func_name . eq ( "like" ) {
104
+ return self . compute_like_selectivity ( func) ;
105
+ }
98
106
if let Some ( op) = ComparisonOp :: try_from_func_name ( & func. func_name ) {
99
107
return self . compute_selectivity_comparison_expr (
100
108
op,
@@ -111,6 +119,47 @@ impl<'a> SelectivityEstimator<'a> {
111
119
} )
112
120
}
113
121
122
+ // The method uses probability predication to compute like selectivity.
123
+ // The core idea is from postgresql.
124
+ fn compute_like_selectivity ( & mut self , func : & FunctionCall ) -> Result < f64 > {
125
+ let right = & func. arguments [ 1 ] ;
126
+ if let ScalarExpr :: ConstantExpr ( ConstantExpr {
127
+ value : Scalar :: String ( patt) ,
128
+ ..
129
+ } ) = right
130
+ {
131
+ let mut sel = 1.0_f64 ;
132
+
133
+ // Skip any leading %; it's already factored into initial sel
134
+ let mut chars = patt. chars ( ) . peekable ( ) ;
135
+ if matches ! ( chars. peek( ) , Some ( & '%' ) | Some ( & '_' ) ) {
136
+ chars. next ( ) ; // consume the leading %
137
+ }
138
+
139
+ while let Some ( c) = chars. next ( ) {
140
+ match c {
141
+ '%' => sel *= FULL_WILDCARD_SEL ,
142
+ '_' => sel *= ANY_CHAR_SEL ,
143
+ '\\' => {
144
+ if chars. peek ( ) . is_some ( ) {
145
+ chars. next ( ) ;
146
+ }
147
+ sel *= FIXED_CHAR_SEL ;
148
+ }
149
+ _ => sel *= FIXED_CHAR_SEL ,
150
+ }
151
+ }
152
+
153
+ // Could get sel > 1 if multiple wildcards
154
+ if sel > 1.0 {
155
+ sel = 1.0 ;
156
+ }
157
+ Ok ( sel)
158
+ } else {
159
+ Ok ( DEFAULT_SELECTIVITY )
160
+ }
161
+ }
162
+
114
163
fn compute_selectivity_comparison_expr (
115
164
& mut self ,
116
165
op : ComparisonOp ,
0 commit comments