|
| 1 | +// Copyright 2021 Datafuse Labs |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +use databend_common_expression::types::DataType; |
| 16 | +use databend_common_expression::types::NumberScalar; |
| 17 | +use databend_common_expression::RemoteExpr; |
| 18 | +use databend_common_expression::Scalar; |
| 19 | +use iceberg::expr::Predicate; |
| 20 | +use iceberg::expr::Reference; |
| 21 | +use iceberg::spec::Datum; |
| 22 | + |
| 23 | +#[derive(Default, Copy, Clone, Debug)] |
| 24 | +pub struct PredicateBuilder { |
| 25 | + uncertain: bool, |
| 26 | +} |
| 27 | + |
| 28 | +impl PredicateBuilder { |
| 29 | + pub fn build(&mut self, expr: &RemoteExpr<String>) -> Predicate { |
| 30 | + match expr { |
| 31 | + RemoteExpr::Constant { |
| 32 | + span: _, |
| 33 | + scalar, |
| 34 | + data_type, |
| 35 | + } if data_type.remove_nullable() == DataType::Boolean => { |
| 36 | + let value = scalar.as_boolean(); |
| 37 | + let is_true = value.copied().unwrap_or(false); |
| 38 | + if is_true { |
| 39 | + Predicate::AlwaysTrue |
| 40 | + } else { |
| 41 | + Predicate::AlwaysFalse |
| 42 | + } |
| 43 | + } |
| 44 | + |
| 45 | + // is_true |
| 46 | + RemoteExpr::FunctionCall { |
| 47 | + span: _, |
| 48 | + id, |
| 49 | + generics: _, |
| 50 | + args, |
| 51 | + return_type: _, |
| 52 | + } if args.len() == 1 && id.name().as_ref() == "is_true" => { |
| 53 | + let predicate = self.build(&args[0]); |
| 54 | + if self.uncertain { |
| 55 | + return Predicate::AlwaysTrue; |
| 56 | + } |
| 57 | + match predicate { |
| 58 | + Predicate::AlwaysTrue => Predicate::AlwaysTrue, |
| 59 | + Predicate::AlwaysFalse => Predicate::AlwaysFalse, |
| 60 | + _ => predicate, |
| 61 | + } |
| 62 | + } |
| 63 | + |
| 64 | + // unary |
| 65 | + RemoteExpr::FunctionCall { |
| 66 | + span: _, |
| 67 | + id, |
| 68 | + generics: _, |
| 69 | + args, |
| 70 | + return_type: _, |
| 71 | + } if args.len() == 1 && matches!(args[0], RemoteExpr::ColumnRef { .. }) => { |
| 72 | + let (_, name, _, _) = args[0].as_column_ref().unwrap(); |
| 73 | + let r = Reference::new(name); |
| 74 | + if let Some(op) = build_unary(r, id.name().as_ref()) { |
| 75 | + return op; |
| 76 | + } |
| 77 | + self.uncertain = true; |
| 78 | + Predicate::AlwaysTrue |
| 79 | + } |
| 80 | + |
| 81 | + // not |
| 82 | + RemoteExpr::FunctionCall { |
| 83 | + span: _, |
| 84 | + id, |
| 85 | + generics: _, |
| 86 | + args, |
| 87 | + return_type: _, |
| 88 | + } if args.len() == 1 && id.name().as_ref() == "not" => { |
| 89 | + let predicate = self.build(&args[0]); |
| 90 | + if self.uncertain { |
| 91 | + return Predicate::AlwaysTrue; |
| 92 | + } |
| 93 | + match predicate { |
| 94 | + Predicate::AlwaysTrue => Predicate::AlwaysFalse, |
| 95 | + Predicate::AlwaysFalse => Predicate::AlwaysTrue, |
| 96 | + _ => predicate.negate(), |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + // binary {a op datum} |
| 101 | + RemoteExpr::FunctionCall { |
| 102 | + span: _, |
| 103 | + id, |
| 104 | + generics: _, |
| 105 | + args, |
| 106 | + return_type: _, |
| 107 | + } if args.len() == 2 && ["and", "and_filters", "or"].contains(&id.name().as_ref()) => { |
| 108 | + let left = self.build(&args[0]); |
| 109 | + let right = self.build(&args[1]); |
| 110 | + if self.uncertain { |
| 111 | + return Predicate::AlwaysTrue; |
| 112 | + } |
| 113 | + match id.name().as_ref() { |
| 114 | + "and" | "and_filters" => left.and(right), |
| 115 | + "or" => left.or(right), |
| 116 | + _ => unreachable!(), |
| 117 | + } |
| 118 | + } |
| 119 | + |
| 120 | + // binary {a op datum} |
| 121 | + RemoteExpr::FunctionCall { |
| 122 | + span: _, |
| 123 | + id, |
| 124 | + generics: _, |
| 125 | + args, |
| 126 | + return_type: _, |
| 127 | + } if args.len() == 2 |
| 128 | + && matches!(args[0], RemoteExpr::ColumnRef { .. }) |
| 129 | + && matches!(args[1], RemoteExpr::Constant { .. }) => |
| 130 | + { |
| 131 | + let val = args[1].as_constant().unwrap(); |
| 132 | + let val = scalar_to_datatum(val.1); |
| 133 | + if let Some(datum) = val { |
| 134 | + let (_, name, _, _) = args[0].as_column_ref().unwrap(); |
| 135 | + let r = Reference::new(name); |
| 136 | + let p = build_binary(r, id.name().as_ref(), datum); |
| 137 | + if let Some(op) = p { |
| 138 | + return op; |
| 139 | + } |
| 140 | + } |
| 141 | + self.uncertain = true; |
| 142 | + Predicate::AlwaysTrue |
| 143 | + } |
| 144 | + |
| 145 | + // binary {datum op a} |
| 146 | + RemoteExpr::FunctionCall { |
| 147 | + span: _, |
| 148 | + id, |
| 149 | + generics: _, |
| 150 | + args, |
| 151 | + return_type: _, |
| 152 | + } if args.len() == 2 |
| 153 | + && matches!(args[1], RemoteExpr::ColumnRef { .. }) |
| 154 | + && matches!(args[0], RemoteExpr::Constant { .. }) => |
| 155 | + { |
| 156 | + let val = args[0].as_constant().unwrap(); |
| 157 | + let val = scalar_to_datatum(val.1); |
| 158 | + if let Some(datum) = val { |
| 159 | + let (_, name, _, _) = args[1].as_column_ref().unwrap(); |
| 160 | + let r = Reference::new(name); |
| 161 | + let p = build_reverse_binary(r, id.name().as_ref(), datum); |
| 162 | + if let Some(op) = p { |
| 163 | + return op; |
| 164 | + } |
| 165 | + } |
| 166 | + self.uncertain = true; |
| 167 | + Predicate::AlwaysTrue |
| 168 | + } |
| 169 | + |
| 170 | + _ => { |
| 171 | + self.uncertain = true; |
| 172 | + Predicate::AlwaysTrue |
| 173 | + } |
| 174 | + } |
| 175 | + } |
| 176 | +} |
| 177 | + |
| 178 | +fn build_unary(r: Reference, op: &str) -> Option<Predicate> { |
| 179 | + let op = match op { |
| 180 | + "is_null" => r.is_null(), |
| 181 | + "is_not_null" => r.is_not_null(), |
| 182 | + _ => return None, |
| 183 | + }; |
| 184 | + Some(op) |
| 185 | +} |
| 186 | + |
| 187 | +// a op datum |
| 188 | +fn build_binary(r: Reference, op: &str, datum: Datum) -> Option<Predicate> { |
| 189 | + let op = match op { |
| 190 | + "lt" | "<" => r.less_than(datum), |
| 191 | + "le" | "<=" => r.less_than_or_equal_to(datum), |
| 192 | + "gt" | ">" => r.greater_than(datum), |
| 193 | + "ge" | ">=" => r.greater_than_or_equal_to(datum), |
| 194 | + "eq" | "=" => r.equal_to(datum), |
| 195 | + "ne" | "!=" => r.not_equal_to(datum), |
| 196 | + _ => return None, |
| 197 | + }; |
| 198 | + Some(op) |
| 199 | +} |
| 200 | + |
| 201 | +// datum op a to a op_v datum |
| 202 | +fn build_reverse_binary(r: Reference, op: &str, datum: Datum) -> Option<Predicate> { |
| 203 | + let op = match op { |
| 204 | + "lt" | "<" => r.greater_than(datum), |
| 205 | + "le" | "<=" => r.greater_than_or_equal_to(datum), |
| 206 | + "gt" | ">" => r.less_than(datum), |
| 207 | + "ge" | ">=" => r.less_than_or_equal_to(datum), |
| 208 | + "eq" | "=" => r.equal_to(datum), |
| 209 | + "ne" | "!=" => r.not_equal_to(datum), |
| 210 | + _ => return None, |
| 211 | + }; |
| 212 | + Some(op) |
| 213 | +} |
| 214 | + |
| 215 | +fn scalar_to_datatum(scalar: &Scalar) -> Option<Datum> { |
| 216 | + let val = match scalar { |
| 217 | + Scalar::Number(n) => match n { |
| 218 | + NumberScalar::Int8(i) => Datum::int(*i as i32), |
| 219 | + NumberScalar::Int16(i) => Datum::int(*i as i32), |
| 220 | + NumberScalar::Int32(i) => Datum::int(*i), |
| 221 | + NumberScalar::Int64(i) => Datum::long(*i), |
| 222 | + NumberScalar::UInt8(i) => Datum::int(*i as i32), |
| 223 | + NumberScalar::UInt16(i) => Datum::int(*i as i32), |
| 224 | + NumberScalar::UInt32(i) if *i <= i32::MAX as u32 => Datum::int(*i as i32), |
| 225 | + NumberScalar::UInt64(i) if *i <= i64::MAX as u64 => Datum::long(*i as i64), /* Potential loss of precision */ |
| 226 | + NumberScalar::Float32(f) => Datum::float(*f), |
| 227 | + NumberScalar::Float64(f) => Datum::double(*f), |
| 228 | + _ => return None, |
| 229 | + }, |
| 230 | + Scalar::Timestamp(ts) => Datum::timestamp_micros(*ts), |
| 231 | + Scalar::Date(d) => Datum::date(*d), |
| 232 | + Scalar::Boolean(b) => Datum::bool(*b), |
| 233 | + Scalar::Binary(b) => Datum::binary(b.clone()), |
| 234 | + Scalar::String(s) => Datum::string(s), |
| 235 | + _ => return None, |
| 236 | + }; |
| 237 | + Some(val) |
| 238 | +} |
0 commit comments