Skip to content

Commit 0175e20

Browse files
authored
Merge pull request #7712 from sundy-li/Kleene-logic-and
feat(query): add and_filters function
2 parents 985d6f8 + a60877e commit 0175e20

File tree

8 files changed

+285
-15
lines changed

8 files changed

+285
-15
lines changed

src/query/expression/src/types/nullable.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ impl<T: ValueType> ValueType for NullableType<T> {
6969
has_null: *has_null,
7070
value: Some(Box::new(T::try_downcast_domain(value)?)),
7171
}),
72+
Domain::Nullable(NullableDomain {
73+
has_null,
74+
value: None,
75+
}) => Some(NullableDomain {
76+
has_null: *has_null,
77+
value: None,
78+
}),
7279
_ => None,
7380
}
7481
}

src/query/functions-v2/src/scalars/boolean.rs

Lines changed: 136 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,14 @@
1313
// limitations under the License.
1414

1515
use common_expression::types::boolean::BooleanDomain;
16+
use common_expression::types::nullable::NullableDomain;
1617
use common_expression::types::BooleanType;
18+
use common_expression::types::NullableType;
19+
use common_expression::vectorize_2_arg;
1720
use common_expression::FunctionProperty;
1821
use common_expression::FunctionRegistry;
22+
use common_expression::Value;
23+
use common_expression::ValueRef;
1924

2025
pub fn register(registry: &mut FunctionRegistry) {
2126
registry.register_1_arg::<BooleanType, BooleanType, _, _>(
@@ -29,7 +34,37 @@ pub fn register(registry: &mut FunctionRegistry) {
2934
},
3035
|val| !val,
3136
);
32-
registry.register_2_arg::<BooleanType, BooleanType, BooleanType, _, _>(
37+
38+
// special function to combine the filter efficiently
39+
registry.register_passthrough_nullable_2_arg::<BooleanType, BooleanType, BooleanType, _, _>(
40+
"and_filters",
41+
FunctionProperty::default(),
42+
|lhs, rhs| {
43+
Some(BooleanDomain {
44+
has_false: lhs.has_false || rhs.has_false,
45+
has_true: lhs.has_true && rhs.has_true,
46+
})
47+
},
48+
|lhs, rhs, _| match (lhs, rhs) {
49+
(ValueRef::Scalar(flag), other) => {
50+
if flag {
51+
Ok(other.to_owned())
52+
} else {
53+
Ok(Value::Scalar(false))
54+
}
55+
}
56+
(other, ValueRef::Scalar(flag)) => {
57+
if flag {
58+
Ok(other.to_owned())
59+
} else {
60+
Ok(Value::Scalar(false))
61+
}
62+
}
63+
(ValueRef::Column(a), ValueRef::Column(b)) => Ok(Value::Column(&a & &b)),
64+
},
65+
);
66+
67+
registry.register_2_arg_core::<BooleanType, BooleanType, BooleanType, _, _>(
3368
"and",
3469
FunctionProperty::default(),
3570
|lhs, rhs| {
@@ -38,9 +73,26 @@ pub fn register(registry: &mut FunctionRegistry) {
3873
has_true: lhs.has_true && rhs.has_true,
3974
})
4075
},
41-
|lhs, rhs| lhs && rhs,
76+
|lhs, rhs, _| match (lhs, rhs) {
77+
(ValueRef::Scalar(flag), other) => {
78+
if flag {
79+
Ok(other.to_owned())
80+
} else {
81+
Ok(Value::Scalar(false))
82+
}
83+
}
84+
(other, ValueRef::Scalar(flag)) => {
85+
if flag {
86+
Ok(other.to_owned())
87+
} else {
88+
Ok(Value::Scalar(false))
89+
}
90+
}
91+
(ValueRef::Column(a), ValueRef::Column(b)) => Ok(Value::Column(&a & &b)),
92+
},
4293
);
43-
registry.register_2_arg::<BooleanType, BooleanType, BooleanType, _, _>(
94+
95+
registry.register_2_arg_core::<BooleanType, BooleanType, BooleanType, _, _>(
4496
"or",
4597
FunctionProperty::default(),
4698
|lhs, rhs| {
@@ -49,8 +101,88 @@ pub fn register(registry: &mut FunctionRegistry) {
49101
has_true: lhs.has_true || rhs.has_true,
50102
})
51103
},
52-
|lhs, rhs| lhs || rhs,
104+
|lhs, rhs, _| match (lhs, rhs) {
105+
(ValueRef::Scalar(flag), other) => {
106+
if flag {
107+
Ok(Value::Scalar(true))
108+
} else {
109+
Ok(other.to_owned())
110+
}
111+
}
112+
(other, ValueRef::Scalar(flag)) => {
113+
if flag {
114+
Ok(Value::Scalar(true))
115+
} else {
116+
Ok(other.to_owned())
117+
}
118+
}
119+
(ValueRef::Column(a), ValueRef::Column(b)) => Ok(Value::Column(&a | &b)),
120+
},
121+
);
122+
123+
// https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic
124+
registry.register_2_arg_core::<NullableType<BooleanType>, NullableType<BooleanType>, NullableType<BooleanType>, _, _>(
125+
"and",
126+
FunctionProperty::default(),
127+
|lhs, rhs| {
128+
if !lhs.has_null && !rhs.has_null {
129+
let bools = match (&lhs.value, &rhs.value) {
130+
(Some(a), Some(b)) => Some(Box::new(BooleanDomain {
131+
has_false: a.has_false || b.has_false,
132+
has_true: a.has_true && b.has_true,
133+
})),
134+
_ => return None,
135+
};
136+
return Some(NullableDomain::<BooleanType> {
137+
has_null: false,
138+
value: bools,
139+
});
140+
}
141+
None
142+
},
143+
// value = lhs & rhs, valid = (lhs_v & rhs_v) | (!lhs & lhs_v) | (!rhs & rhs_v))
144+
vectorize_2_arg::<NullableType<BooleanType>, NullableType<BooleanType>, NullableType<BooleanType>>(|lhs, rhs| {
145+
let lhs_v = lhs.is_some();
146+
let rhs_v = rhs.is_some();
147+
let valid = (lhs_v & rhs_v) | (lhs == Some(false)) | (rhs == Some(false));
148+
if valid {
149+
Some(lhs.unwrap_or_default() & rhs.unwrap_or_default())
150+
} else {
151+
None
152+
}
153+
}),
53154
);
155+
156+
registry.register_2_arg_core::<NullableType<BooleanType>, NullableType<BooleanType>, NullableType<BooleanType>, _, _>(
157+
"or",
158+
FunctionProperty::default(),
159+
|lhs, rhs| {
160+
if !lhs.has_null && !rhs.has_null {
161+
let bools = match (&lhs.value, &rhs.value) {
162+
(Some(a), Some(b)) => Some(Box::new(BooleanDomain {
163+
has_false: a.has_false && b.has_false,
164+
has_true: a.has_true || b.has_true,
165+
})),
166+
_ => return None,
167+
};
168+
return Some(NullableDomain::<BooleanType> {
169+
has_null: false,
170+
value: bools,
171+
});
172+
}
173+
None
174+
},
175+
// value = lhs | rhs, valid = (lhs_v & rhs_v) | (lhs | rhs))
176+
vectorize_2_arg::<NullableType<BooleanType>, NullableType<BooleanType>, NullableType<BooleanType>>(|lhs, rhs| {
177+
let valid = (lhs.is_some() & rhs.is_some()) | (lhs.unwrap_or_default() | rhs.unwrap_or_default());
178+
if valid {
179+
Some(lhs.unwrap_or_default() | rhs.unwrap_or_default())
180+
} else {
181+
None
182+
}
183+
}),
184+
);
185+
54186
registry.register_2_arg::<BooleanType, BooleanType, BooleanType, _, _>(
55187
"xor",
56188
FunctionProperty::default(),

src/query/functions-v2/tests/it/scalars/boolean.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@ fn test_boolean() {
3434

3535
fn test_and(file: &mut impl Write) {
3636
run_ast(file, "true AND false", &[]);
37-
run_ast(file, "null AND false", &[]);
37+
run_ast(file, "true AND null", &[]);
38+
run_ast(file, "true AND true", &[]);
39+
run_ast(file, "false AND false", &[]);
40+
run_ast(file, "false AND null", &[]);
41+
run_ast(file, "false AND true", &[]);
3842
}
3943

4044
fn test_not(file: &mut impl Write) {

src/query/functions-v2/tests/it/scalars/testdata/boolean.txt

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,51 @@ output domain : {FALSE}
77
output : false
88

99

10-
ast : null AND false
11-
raw expr : and(NULL, false)
12-
checked expr : and<NULL, Boolean NULL>(NULL, CAST(false AS Boolean NULL))
10+
ast : true AND null
11+
raw expr : and(true, NULL)
12+
checked expr : and<Boolean NULL, Boolean NULL>(CAST(true AS Boolean NULL), CAST(NULL AS Boolean NULL))
1313
optimized expr : NULL
14-
output type : NULL
15-
output domain : {NULL}
14+
output type : Boolean NULL
15+
output domain : Unknown
1616
output : NULL
1717

1818

19+
ast : true AND true
20+
raw expr : and(true, true)
21+
checked expr : and<Boolean, Boolean>(true, true)
22+
optimized expr : true
23+
output type : Boolean
24+
output domain : {TRUE}
25+
output : true
26+
27+
28+
ast : false AND false
29+
raw expr : and(false, false)
30+
checked expr : and<Boolean, Boolean>(false, false)
31+
optimized expr : false
32+
output type : Boolean
33+
output domain : {FALSE}
34+
output : false
35+
36+
37+
ast : false AND null
38+
raw expr : and(false, NULL)
39+
checked expr : and<Boolean NULL, Boolean NULL>(CAST(false AS Boolean NULL), CAST(NULL AS Boolean NULL))
40+
optimized expr : false
41+
output type : Boolean NULL
42+
output domain : Unknown
43+
output : false
44+
45+
46+
ast : false AND true
47+
raw expr : and(false, true)
48+
checked expr : and<Boolean, Boolean>(false, true)
49+
optimized expr : false
50+
output type : Boolean
51+
output domain : {FALSE}
52+
output : false
53+
54+
1955
ast : NOT a
2056
raw expr : not(ColumnRef(0)::NULL)
2157
checked expr : not<NULL>(ColumnRef(0))
@@ -102,10 +138,10 @@ output : true
102138

103139
ast : null OR false
104140
raw expr : or(NULL, false)
105-
checked expr : or<NULL, Boolean NULL>(NULL, CAST(false AS Boolean NULL))
141+
checked expr : or<Boolean NULL, Boolean NULL>(CAST(NULL AS Boolean NULL), CAST(false AS Boolean NULL))
106142
optimized expr : NULL
107-
output type : NULL
108-
output domain : {NULL}
143+
output type : Boolean NULL
144+
output domain : Unknown
109145
output : NULL
110146

111147

src/query/functions/src/scalars/logics/and.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use core::fmt;
16+
17+
use common_arrow::arrow::bitmap::MutableBitmap;
1518
use common_datavalues::prelude::*;
19+
use common_exception::ErrorCode;
1620
use common_exception::Result;
1721

1822
use super::logic::LogicExpression;
@@ -30,6 +34,14 @@ impl_logic_expression!(LogicAndExpression, &, |lhs: bool, rhs: bool, lhs_v: bool
3034
(lhs & rhs, (lhs_v & rhs_v) | (!lhs & lhs_v) | (!rhs & rhs_v))
3135
});
3236

37+
/// Logical functions AND, OR, XOR and NOT support three-valued (or ternary) logic
38+
/// https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic
39+
///
40+
/// Functions XOR and NOT rely on "default implementation for NULLs":
41+
/// - if any of the arguments is of Nullable type, the return value type is Nullable
42+
/// - if any of the arguments is NULL, the return value is NULL
43+
///
44+
/// Functions AND and OR provide their own special implementations for ternary logic
3345
#[derive(Clone)]
3446
pub struct LogicAndFunction;
3547

@@ -47,3 +59,79 @@ impl LogicAndFunction {
4759
)
4860
}
4961
}
62+
63+
#[derive(Clone)]
64+
pub struct LogicAndFiltersFunction;
65+
66+
impl Function for LogicAndFiltersFunction {
67+
fn name(&self) -> &str {
68+
"and_filters"
69+
}
70+
71+
fn return_type(&self) -> DataTypeImpl {
72+
bool::to_data_type()
73+
}
74+
75+
fn eval(
76+
&self,
77+
_func_ctx: FunctionContext,
78+
columns: &ColumnsWithField,
79+
input_rows: usize,
80+
) -> Result<ColumnRef> {
81+
if columns.len() == 1 {
82+
return Ok(columns[1].column().clone());
83+
}
84+
85+
let mut validity = None;
86+
for c in columns.iter() {
87+
let c = c.column();
88+
if c.is_const() {
89+
let v = c.get_bool(0)?;
90+
if !v {
91+
let validity = MutableBitmap::from_len_zeroed(input_rows).into();
92+
return Ok(BooleanColumn::from_arrow_data(validity).arc());
93+
}
94+
} else {
95+
let bools: &BooleanColumn = Series::check_get(c)?;
96+
match validity.as_mut() {
97+
Some(v) => {
98+
*v = &*v & bools.values();
99+
}
100+
None => validity = Some(bools.values().clone()),
101+
}
102+
}
103+
}
104+
let validity = validity.unwrap_or_else(|| MutableBitmap::from_len_set(input_rows).into());
105+
Ok(BooleanColumn::from_arrow_data(validity).arc())
106+
}
107+
}
108+
109+
impl LogicAndFiltersFunction {
110+
pub fn try_create(_display_name: &str, args: &[&DataTypeImpl]) -> Result<Box<dyn Function>> {
111+
if args
112+
.iter()
113+
.any(|arg| !matches!(arg, DataTypeImpl::Boolean(_)))
114+
{
115+
return Err(ErrorCode::IllegalDataType(format!(
116+
"Illegal type {:?} of argument of function and_filters, expect to be all boolean types",
117+
args
118+
)));
119+
}
120+
121+
Ok(Box::new(Self))
122+
}
123+
124+
pub fn desc() -> FunctionDescription {
125+
FunctionDescription::creator(Box::new(Self::try_create)).features(
126+
FunctionFeatures::default()
127+
.deterministic()
128+
.variadic_arguments(1, std::usize::MAX),
129+
)
130+
}
131+
}
132+
133+
impl fmt::Display for LogicAndFiltersFunction {
134+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
135+
write!(f, "and_filters")
136+
}
137+
}

src/query/functions/src/scalars/logics/logic.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use common_datavalues::prelude::*;
1818
use common_exception::Result;
1919

2020
use super::xor::LogicXorFunction;
21+
use super::LogicAndFiltersFunction;
2122
use super::LogicAndFunction;
2223
use super::LogicNotFunction;
2324
use super::LogicOrFunction;
@@ -31,6 +32,7 @@ pub struct LogicFunction;
3132
impl LogicFunction {
3233
pub fn register(factory: &mut FunctionFactory) {
3334
factory.register("and", LogicAndFunction::desc());
35+
factory.register("and_filters", LogicAndFiltersFunction::desc());
3436
factory.register("or", LogicOrFunction::desc());
3537
factory.register("not", LogicNotFunction::desc());
3638
factory.register("xor", LogicXorFunction::desc());

0 commit comments

Comments
 (0)