Skip to content

Commit e89232e

Browse files
committed
feat(query): add and_filters function
1 parent 35c44d1 commit e89232e

File tree

8 files changed

+217
-31
lines changed

8 files changed

+217
-31
lines changed

src/query/expression/src/register.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3993,7 +3993,7 @@ fn erase_calc_domain_generic_1_arg<I1: ArgType, O: ArgType>(
39933993
func: impl Fn(&I1::Domain) -> Option<O::Domain>,
39943994
) -> impl Fn(&[Domain], &GenericMap) -> Option<Domain> {
39953995
move |args, _generics| {
3996-
let arg1 = I1::try_downcast_domain(&args[0]).unwrap();
3996+
let arg1 = I1::try_downcast_domain(&args[0])?;
39973997
func(&arg1).map(O::upcast_domain)
39983998
}
39993999
}
@@ -4002,8 +4002,8 @@ fn erase_calc_domain_generic_2_arg<I1: ArgType, I2: ArgType, O: ArgType>(
40024002
func: impl Fn(&I1::Domain, &I2::Domain) -> Option<O::Domain>,
40034003
) -> impl Fn(&[Domain], &GenericMap) -> Option<Domain> {
40044004
move |args, _generics| {
4005-
let arg1 = I1::try_downcast_domain(&args[0]).unwrap();
4006-
let arg2 = I2::try_downcast_domain(&args[1]).unwrap();
4005+
let arg1 = I1::try_downcast_domain(&args[0])?;
4006+
let arg2 = I2::try_downcast_domain(&args[1])?;
40074007
func(&arg1, &arg2).map(O::upcast_domain)
40084008
}
40094009
}
@@ -4012,9 +4012,9 @@ fn erase_calc_domain_generic_3_arg<I1: ArgType, I2: ArgType, I3: ArgType, O: Arg
40124012
func: impl Fn(&I1::Domain, &I2::Domain, &I3::Domain) -> Option<O::Domain>,
40134013
) -> impl Fn(&[Domain], &GenericMap) -> Option<Domain> {
40144014
move |args, _generics| {
4015-
let arg1 = I1::try_downcast_domain(&args[0]).unwrap();
4016-
let arg2 = I2::try_downcast_domain(&args[1]).unwrap();
4017-
let arg3 = I3::try_downcast_domain(&args[2]).unwrap();
4015+
let arg1 = I1::try_downcast_domain(&args[0])?;
4016+
let arg2 = I2::try_downcast_domain(&args[1])?;
4017+
let arg3 = I3::try_downcast_domain(&args[2])?;
40184018
func(&arg1, &arg2, &arg3).map(O::upcast_domain)
40194019
}
40204020
}
@@ -4029,9 +4029,9 @@ fn erase_calc_domain_generic_4_arg<
40294029
func: impl Fn(&I1::Domain, &I2::Domain, &I3::Domain, &I4::Domain) -> Option<O::Domain>,
40304030
) -> impl Fn(&[Domain], &GenericMap) -> Option<Domain> {
40314031
move |args, _generics| {
4032-
let arg1 = I1::try_downcast_domain(&args[0]).unwrap();
4033-
let arg2 = I2::try_downcast_domain(&args[1]).unwrap();
4034-
let arg3 = I3::try_downcast_domain(&args[2]).unwrap();
4032+
let arg1 = I1::try_downcast_domain(&args[0])?;
4033+
let arg2 = I2::try_downcast_domain(&args[1])?;
4034+
let arg3 = I3::try_downcast_domain(&args[2])?;
40354035
let arg4 = I4::try_downcast_domain(&args[3]).unwrap();
40364036
func(&arg1, &arg2, &arg3, &arg4).map(O::upcast_domain)
40374037
}
@@ -4048,9 +4048,9 @@ fn erase_calc_domain_generic_5_arg<
40484048
func: impl Fn(&I1::Domain, &I2::Domain, &I3::Domain, &I4::Domain, &I5::Domain) -> Option<O::Domain>,
40494049
) -> impl Fn(&[Domain], &GenericMap) -> Option<Domain> {
40504050
move |args, _generics| {
4051-
let arg1 = I1::try_downcast_domain(&args[0]).unwrap();
4052-
let arg2 = I2::try_downcast_domain(&args[1]).unwrap();
4053-
let arg3 = I3::try_downcast_domain(&args[2]).unwrap();
4051+
let arg1 = I1::try_downcast_domain(&args[0])?;
4052+
let arg2 = I2::try_downcast_domain(&args[1])?;
4053+
let arg3 = I3::try_downcast_domain(&args[2])?;
40544054
let arg4 = I4::try_downcast_domain(&args[3]).unwrap();
40554055
let arg5 = I5::try_downcast_domain(&args[4]).unwrap();
40564056
func(&arg1, &arg2, &arg3, &arg4, &arg5).map(O::upcast_domain)

src/query/functions-v2/src/scalars/boolean.rs

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
use common_expression::types::boolean::BooleanDomain;
1616
use common_expression::types::BooleanType;
17+
use common_expression::types::NullableType;
18+
use common_expression::vectorize_2_arg;
1719
use common_expression::FunctionProperty;
1820
use common_expression::FunctionRegistry;
1921

@@ -29,7 +31,58 @@ pub fn register(registry: &mut FunctionRegistry) {
2931
},
3032
|val| !val,
3133
);
34+
35+
// special function to combine the filter efficiently
3236
registry.register_2_arg::<BooleanType, BooleanType, BooleanType, _, _>(
37+
"and_filters",
38+
FunctionProperty::default(),
39+
|lhs, rhs| {
40+
Some(BooleanDomain {
41+
has_false: lhs.has_false || rhs.has_false,
42+
has_true: lhs.has_true && rhs.has_true,
43+
})
44+
},
45+
|lhs, rhs| lhs & rhs,
46+
);
47+
48+
// https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic
49+
registry.register_2_arg_core::<NullableType<BooleanType>, NullableType<BooleanType>, NullableType<BooleanType>, _, _>(
50+
"and",
51+
FunctionProperty::default(),
52+
|_, _| {
53+
None
54+
},
55+
vectorize_2_arg::<NullableType<BooleanType>, NullableType<BooleanType>, NullableType<BooleanType>>(|lhs, rhs| {
56+
let lhs_v = lhs.is_some();
57+
let rhs_v = rhs.is_some();
58+
let valid = (lhs_v & rhs_v) | (lhs_v & lhs.unwrap_or_default()) | (rhs_v & rhs.unwrap_or_default());
59+
if valid {
60+
Some(lhs.unwrap_or_default() & rhs.unwrap_or_default())
61+
} else {
62+
None
63+
}
64+
}),
65+
);
66+
67+
registry.register_2_arg_core::<NullableType<BooleanType>, NullableType<BooleanType>, NullableType<BooleanType>, _, _>(
68+
"or",
69+
FunctionProperty::default(),
70+
|_, _| {
71+
None
72+
},
73+
vectorize_2_arg::<NullableType<BooleanType>, NullableType<BooleanType>, NullableType<BooleanType>>(|lhs, rhs| {
74+
let lhs_v = lhs.is_some();
75+
let rhs_v = rhs.is_some();
76+
let valid = (lhs_v & rhs_v) | (lhs.unwrap_or_default() | rhs.unwrap_or_default());
77+
if valid {
78+
Some(lhs.unwrap_or_default() | rhs.unwrap_or_default())
79+
} else {
80+
None
81+
}
82+
}),
83+
);
84+
85+
registry.register_2_arg_core::<BooleanType, BooleanType, BooleanType, _, _>(
3386
"and",
3487
FunctionProperty::default(),
3588
|lhs, rhs| {
@@ -38,9 +91,10 @@ pub fn register(registry: &mut FunctionRegistry) {
3891
has_true: lhs.has_true && rhs.has_true,
3992
})
4093
},
41-
|lhs, rhs| lhs && rhs,
94+
vectorize_2_arg::<BooleanType, BooleanType, BooleanType>(|lhs, rhs| lhs & rhs),
4295
);
43-
registry.register_2_arg::<BooleanType, BooleanType, BooleanType, _, _>(
96+
97+
registry.register_2_arg_core::<BooleanType, BooleanType, BooleanType, _, _>(
4498
"or",
4599
FunctionProperty::default(),
46100
|lhs, rhs| {
@@ -49,8 +103,9 @@ pub fn register(registry: &mut FunctionRegistry) {
49103
has_true: lhs.has_true || rhs.has_true,
50104
})
51105
},
52-
|lhs, rhs| lhs || rhs,
106+
vectorize_2_arg::<BooleanType, BooleanType, BooleanType>(|lhs, rhs| lhs | rhs),
53107
);
108+
54109
registry.register_2_arg::<BooleanType, BooleanType, BooleanType, _, _>(
55110
"xor",
56111
FunctionProperty::default(),

src/query/functions-v2/tests/it/scalars/boolean.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@ fn test_boolean() {
3434

3535
fn test_and(file: &mut impl Write) {
3636
run_ast(file, "true AND false", &[]);
37-
run_ast(file, "null AND false", &[]);
37+
run_ast(file, "true AND null", &[]);
38+
run_ast(file, "true AND true", &[]);
39+
run_ast(file, "false AND false", &[]);
40+
run_ast(file, "false AND null", &[]);
41+
run_ast(file, "false AND true", &[]);
3842
}
3943

4044
fn test_not(file: &mut impl Write) {

src/query/functions-v2/tests/it/scalars/testdata/boolean.txt

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,57 @@
11
ast : true AND false
22
raw expr : and(true, false)
3-
checked expr : and<Boolean, Boolean>(true, false)
3+
checked expr : and<Boolean NULL, Boolean NULL>(CAST(true AS Boolean NULL), CAST(false AS Boolean NULL))
44
optimized expr : false
5-
output type : Boolean
6-
output domain : {FALSE}
5+
output type : Boolean NULL
6+
output domain : Unknown
7+
output : false
8+
9+
10+
ast : true AND null
11+
raw expr : and(true, NULL)
12+
checked expr : and<Boolean NULL, Boolean NULL>(CAST(true AS Boolean NULL), CAST(NULL AS Boolean NULL))
13+
optimized expr : false
14+
output type : Boolean NULL
15+
output domain : Unknown
716
output : false
817

918

10-
ast : null AND false
11-
raw expr : and(NULL, false)
12-
checked expr : and<NULL, Boolean NULL>(NULL, CAST(false AS Boolean NULL))
19+
ast : true AND true
20+
raw expr : and(true, true)
21+
checked expr : and<Boolean NULL, Boolean NULL>(CAST(true AS Boolean NULL), CAST(true AS Boolean NULL))
22+
optimized expr : true
23+
output type : Boolean NULL
24+
output domain : Unknown
25+
output : true
26+
27+
28+
ast : false AND false
29+
raw expr : and(false, false)
30+
checked expr : and<Boolean NULL, Boolean NULL>(CAST(false AS Boolean NULL), CAST(false AS Boolean NULL))
31+
optimized expr : false
32+
output type : Boolean NULL
33+
output domain : Unknown
34+
output : false
35+
36+
37+
ast : false AND null
38+
raw expr : and(false, NULL)
39+
checked expr : and<Boolean NULL, Boolean NULL>(CAST(false AS Boolean NULL), CAST(NULL AS Boolean NULL))
1340
optimized expr : NULL
14-
output type : NULL
15-
output domain : {NULL}
41+
output type : Boolean NULL
42+
output domain : Unknown
1643
output : NULL
1744

1845

46+
ast : false AND true
47+
raw expr : and(false, true)
48+
checked expr : and<Boolean NULL, Boolean NULL>(CAST(false AS Boolean NULL), CAST(true AS Boolean NULL))
49+
optimized expr : false
50+
output type : Boolean NULL
51+
output domain : Unknown
52+
output : false
53+
54+
1955
ast : NOT a
2056
raw expr : not(ColumnRef(0)::NULL)
2157
checked expr : not<NULL>(ColumnRef(0))
@@ -93,19 +129,19 @@ evaluation (internal):
93129

94130
ast : true OR false
95131
raw expr : or(true, false)
96-
checked expr : or<Boolean, Boolean>(true, false)
132+
checked expr : or<Boolean NULL, Boolean NULL>(CAST(true AS Boolean NULL), CAST(false AS Boolean NULL))
97133
optimized expr : true
98-
output type : Boolean
99-
output domain : {TRUE}
134+
output type : Boolean NULL
135+
output domain : Unknown
100136
output : true
101137

102138

103139
ast : null OR false
104140
raw expr : or(NULL, false)
105-
checked expr : or<NULL, Boolean NULL>(NULL, CAST(false AS Boolean NULL))
141+
checked expr : or<Boolean NULL, Boolean NULL>(CAST(NULL AS Boolean NULL), CAST(false AS Boolean NULL))
106142
optimized expr : NULL
107-
output type : NULL
108-
output domain : {NULL}
143+
output type : Boolean NULL
144+
output domain : Unknown
109145
output : NULL
110146

111147

src/query/functions/src/scalars/logics/and.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use core::fmt;
16+
17+
use common_arrow::arrow::bitmap::MutableBitmap;
1518
use common_datavalues::prelude::*;
19+
use common_exception::ErrorCode;
1620
use common_exception::Result;
1721

1822
use super::logic::LogicExpression;
@@ -30,6 +34,14 @@ impl_logic_expression!(LogicAndExpression, &, |lhs: bool, rhs: bool, lhs_v: bool
3034
(lhs & rhs, (lhs_v & rhs_v) | (!lhs & lhs_v) | (!rhs & rhs_v))
3135
});
3236

37+
/// Logical functions AND, OR, XOR and NOT support three-valued (or ternary) logic
38+
/// https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic
39+
///
40+
/// Functions XOR and NOT rely on "default implementation for NULLs":
41+
/// - if any of the arguments is of Nullable type, the return value type is Nullable
42+
/// - if any of the arguments is NULL, the return value is NULL
43+
///
44+
/// Functions AND and OR provide their own special implementations for ternary logic
3345
#[derive(Clone)]
3446
pub struct LogicAndFunction;
3547

@@ -47,3 +59,79 @@ impl LogicAndFunction {
4759
)
4860
}
4961
}
62+
63+
#[derive(Clone)]
64+
pub struct LogicAndFiltersFunction;
65+
66+
impl Function for LogicAndFiltersFunction {
67+
fn name(&self) -> &str {
68+
"and_filters"
69+
}
70+
71+
fn return_type(&self) -> DataTypeImpl {
72+
bool::to_data_type()
73+
}
74+
75+
fn eval(
76+
&self,
77+
_func_ctx: FunctionContext,
78+
columns: &ColumnsWithField,
79+
input_rows: usize,
80+
) -> Result<ColumnRef> {
81+
if columns.len() == 1 {
82+
return Ok(columns[1].column().clone());
83+
}
84+
85+
let mut validity = None;
86+
for c in columns.iter() {
87+
let c = c.column();
88+
if c.is_const() {
89+
let v = c.get_bool(0)?;
90+
if !v {
91+
let validity = MutableBitmap::from_len_zeroed(input_rows).into();
92+
return Ok(BooleanColumn::from_arrow_data(validity).arc());
93+
}
94+
} else {
95+
let bools: &BooleanColumn = Series::check_get(c)?;
96+
match validity.as_mut() {
97+
Some(v) => {
98+
*v = &*v & bools.values();
99+
}
100+
None => validity = Some(bools.values().clone()),
101+
}
102+
}
103+
}
104+
let validity = validity.unwrap_or(MutableBitmap::from_len_set(input_rows).into());
105+
Ok(BooleanColumn::from_arrow_data(validity).arc())
106+
}
107+
}
108+
109+
impl LogicAndFiltersFunction {
110+
pub fn try_create(_display_name: &str, args: &[&DataTypeImpl]) -> Result<Box<dyn Function>> {
111+
if args
112+
.iter()
113+
.any(|arg| !matches!(arg, DataTypeImpl::Boolean(_)))
114+
{
115+
return Err(ErrorCode::IllegalDataType(format!(
116+
"Illegal type {:?} of argument of function and_filters, expect to be all boolean types",
117+
args
118+
)));
119+
}
120+
121+
Ok(Box::new(Self))
122+
}
123+
124+
pub fn desc() -> FunctionDescription {
125+
FunctionDescription::creator(Box::new(Self::try_create)).features(
126+
FunctionFeatures::default()
127+
.deterministic()
128+
.variadic_arguments(1, std::usize::MAX),
129+
)
130+
}
131+
}
132+
133+
impl fmt::Display for LogicAndFiltersFunction {
134+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
135+
write!(f, "and_filters")
136+
}
137+
}

src/query/functions/src/scalars/logics/logic.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use common_datavalues::prelude::*;
1818
use common_exception::Result;
1919

2020
use super::xor::LogicXorFunction;
21+
use super::LogicAndFiltersFunction;
2122
use super::LogicAndFunction;
2223
use super::LogicNotFunction;
2324
use super::LogicOrFunction;
@@ -31,6 +32,7 @@ pub struct LogicFunction;
3132
impl LogicFunction {
3233
pub fn register(factory: &mut FunctionFactory) {
3334
factory.register("and", LogicAndFunction::desc());
35+
factory.register("and_filters", LogicAndFiltersFunction::desc());
3436
factory.register("or", LogicOrFunction::desc());
3537
factory.register("not", LogicNotFunction::desc());
3638
factory.register("xor", LogicXorFunction::desc());

src/query/functions/src/scalars/logics/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ mod xor;
2020
#[macro_use]
2121
mod macros;
2222

23+
pub use and::LogicAndFiltersFunction;
2324
pub use and::LogicAndFunction;
2425
pub use logic::LogicFunction;
2526
pub use not::LogicNotFunction;

src/query/service/src/sql/executor/pipeline_builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ impl PipelineBuilder {
252252
let data_types = vec![&left_type, &right_type];
253253
let func = FunctionFactory::instance().get("and", &data_types)?;
254254
predicate = PhysicalScalar::Function {
255-
name: "and".to_string(),
255+
name: "and_filters".to_string(),
256256
args: vec![
257257
(predicate.clone(), predicate.data_type()),
258258
(pred.clone(), pred.data_type()),

0 commit comments

Comments
 (0)