Skip to content

Commit 794efab

Browse files
committed
qgm: add RejectedNulls derived attribute
1 parent cad3ddb commit 794efab

File tree

2 files changed

+281
-0
lines changed

2 files changed

+281
-0
lines changed

src/sql/src/query_model/attribute/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@
1111
1212
pub mod core;
1313
pub mod propagated_nulls;
14+
pub mod rejected_nulls;
Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
// Copyright Materialize, Inc. and contributors. All rights reserved.
2+
//
3+
// Use of this software is governed by the Business Source License
4+
// included in the LICENSE file.
5+
//
6+
// As of the Change Date specified in that file, in accordance with
7+
// the Business Source License, use of this software will be governed
8+
// by the Apache License, Version 2.0.
9+
10+
//! Defines the [`RejectedNulls`] attribute.
11+
//!
12+
//! The attribute value is a set of column references associated with
13+
//! each `QueryBox`. If any of the references is `NULL`, there is at
14+
//! least one predicate in that box that will be evaluated to `NULL`
15+
//! or `FALSE` (that is, a row with that column will be filtered
16+
//! away). For boxes without predicates, the attribute value is
17+
//! always the empty set.
18+
//!
19+
//! Besides "predicate p rejects nulls in a set of columns C", in the
20+
//! literature this property is also stated as "predicate p is strong
21+
//! with respect to C".
22+
23+
use super::propagated_nulls::propagated_nulls;
24+
use crate::query_model::attribute::core::{Attribute, AttributeKey};
25+
use crate::query_model::model::{
26+
BoxId, BoxScalarExpr, BoxType, ColumnReference, Model, QuantifierType,
27+
};
28+
use std::collections::HashSet;
29+
30+
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
31+
pub(crate) struct RejectedNulls;
32+
33+
impl AttributeKey for RejectedNulls {
34+
type Value = HashSet<ColumnReference>;
35+
}
36+
37+
impl Attribute for RejectedNulls {
38+
fn attr_id(&self) -> &'static str {
39+
"RejectedNulls"
40+
}
41+
42+
fn requires(&self) -> Vec<Box<dyn Attribute>> {
43+
vec![]
44+
}
45+
46+
fn derive(&self, model: &mut Model, box_id: BoxId) {
47+
let mut r#box = model.get_mut_box(box_id);
48+
let mut value = HashSet::new();
49+
50+
match r#box.box_type {
51+
BoxType::Select(ref select) => {
52+
for p in select.predicates.iter() {
53+
rejected_nulls(p, &mut value);
54+
}
55+
}
56+
BoxType::OuterJoin(ref outerjoin) => {
57+
for p in outerjoin.predicates.iter() {
58+
rejected_nulls(p, &mut value);
59+
}
60+
// By definition, preserved sides in outer joins don't filter
61+
// anything, so columns from the corresponding sides cannot
62+
// reject nulls and need to be removed from the result value.
63+
for q in r#box
64+
.input_quantifiers()
65+
.filter(|q| q.quantifier_type == QuantifierType::PreservedForeach)
66+
{
67+
value.retain(|c| c.quantifier_id != q.id);
68+
}
69+
}
70+
_ => (),
71+
}
72+
73+
r#box.attributes.set::<RejectedNulls>(value);
74+
}
75+
}
76+
77+
/// Returns all columns that *must* be non-NULL for the boolean `expr`
78+
/// to be `NULL` or `FALSE`.
79+
///
80+
/// An expression `expr` rejects nulls in a set of column references
81+
/// `C` if it evaluates to either `FALSE` or `NULL` whenever some
82+
/// `c` in `C` is null.
83+
///
84+
/// An expression `expr` propagates nulls in a set of column references
85+
/// `C` if it evaluates to `NULL` whenever some `c` in `C` is null.
86+
///
87+
/// Consequently, results returned by [`propagated_nulls`] must be
88+
/// included in [`rejected_nulls`].
89+
///
90+
/// Unfortuantely, boolean functions such as "and" and "or" are not
91+
/// propagating nulls in their inputs, but we still need to handle
92+
/// them here, as they are used quite frequently in predicates.
93+
/// The procedure for doing this is derived below.
94+
///
95+
/// Observe the truth values for the following terms:
96+
///
97+
/// For `AND(A, B)`:
98+
///
99+
/// | | F | N | T |
100+
/// | |:-:|:-:|:-:|
101+
/// | F | F | F | F |
102+
/// | N | F | N | N |
103+
/// | T | F | N | T |
104+
///
105+
/// For `OR(A, B)`:
106+
///
107+
/// | | F | N | T |
108+
/// | |:-:|:-:|:-:|
109+
/// | F | F | N | T |
110+
/// | N | N | N | T |
111+
/// | T | T | T | T |
112+
///
113+
/// For `NOT(AND(A, B))`:
114+
///
115+
/// | | F | N | T |
116+
/// | |:-:|:-:|:-:|
117+
/// | F | T | T | T |
118+
/// | N | T | N | N |
119+
/// | T | T | N | F |
120+
///
121+
/// For `NOT(OR(A, B))`:
122+
///
123+
/// | | F | N | T |
124+
/// | |:-:|:-:|:-:|
125+
/// | F | T | N | F |
126+
/// | N | N | N | F |
127+
/// | T | F | F | F |
128+
///
129+
/// Based on the above truth tables, we can establish the following
130+
/// statements are always true:
131+
/// 1. If either `A` or `B` rejects nulls in `C`,
132+
/// then `AND(A, B)` rejects nulls in `C`.
133+
/// 2. If both `A` and `B` reject nulls in `C`,
134+
/// then `OR(A, B)` rejects nulls in `C`.
135+
/// 3. If both `A` and `B` propagate nulls in `C`,
136+
/// then `NOT(AND(A, B))` rejects nulls in `C`.
137+
/// 4. If either `A` or `B` propagates nulls in `C`,
138+
/// then `NOT(OR(A, B))` rejects nulls in `C`.
139+
///
140+
/// Based on the above statements, the algorithm implemented by
141+
/// this function can be described by the following pseudo-code:
142+
///
143+
/// ```text
144+
/// def rejected_nulls(expr: Expr, sign: bool = true) -> Set[Expr]:
145+
/// match expr:
146+
/// case NOT(ISNULL(c)):
147+
/// { c }
148+
/// case NOT(expr):
149+
/// rejected_nulls(expr, !sign)
150+
/// case AND(lhs, rhs):
151+
/// if sign > 0:
152+
/// rejected_nulls(lhs, sign) ∪ rejected_nulls(rhs, sign)
153+
/// else:
154+
/// propagated_nulls(lhs) ∩ propagated_nulls(rhs)
155+
/// case OR(lhs, rhs):
156+
/// if sign > 0:
157+
/// rejected_nulls(lhs, sign) ∩ rejected_nulls(rhs, sign)
158+
/// else:
159+
/// propagated_nulls(lhs) ∪ propagated_nulls(rhs)
160+
/// case expr:
161+
/// propagated_nulls(expr)
162+
/// ```
163+
pub(crate) fn rejected_nulls(expr: &BoxScalarExpr, set: &mut HashSet<ColumnReference>) {
164+
/// Define an inner function needed in order to pass around the `sign`.
165+
fn rejected_nulls(expr: &BoxScalarExpr, sign: bool) -> HashSet<ColumnReference> {
166+
ore::stack::maybe_grow(|| {
167+
if let Some(c) = case_not_isnull(expr) {
168+
HashSet::from([c.clone()])
169+
} else if let Some(expr) = case_not(expr) {
170+
rejected_nulls(expr, !sign)
171+
} else if let Some((lhs, rhs)) = case_and(expr) {
172+
if sign {
173+
union(rejected_nulls(lhs, sign), rejected_nulls(rhs, sign))
174+
} else {
175+
intersect(propagated_nulls(lhs), propagated_nulls(rhs))
176+
}
177+
} else if let Some((lhs, rhs)) = case_or(expr) {
178+
if sign {
179+
intersect(rejected_nulls(lhs, sign), rejected_nulls(rhs, sign))
180+
} else {
181+
union(propagated_nulls(lhs), propagated_nulls(rhs))
182+
}
183+
} else {
184+
propagated_nulls(expr)
185+
}
186+
})
187+
}
188+
189+
set.extend(rejected_nulls(expr, true))
190+
}
191+
192+
/// Computes the union of two sets, consuming both sides
193+
/// and mutating and returning `lhs`.
194+
fn union<T>(mut lhs: HashSet<T>, rhs: HashSet<T>) -> HashSet<T>
195+
where
196+
T: Clone + Eq + std::hash::Hash,
197+
{
198+
lhs.extend(rhs);
199+
lhs
200+
}
201+
202+
/// Computes the intersection of two sets, consuming both sides
203+
/// and mutating and returning `lhs`.
204+
fn intersect<T>(mut lhs: HashSet<T>, rhs: HashSet<T>) -> HashSet<T>
205+
where
206+
T: Clone + Eq + std::hash::Hash,
207+
{
208+
lhs.retain(|item| rhs.contains(item));
209+
lhs
210+
}
211+
212+
/// Active pattern match for `NOT(ISNULL(c))` fragments.
213+
fn case_not_isnull(expr: &BoxScalarExpr) -> Option<&ColumnReference> {
214+
use BoxScalarExpr::*;
215+
216+
if let CallUnary {
217+
func: expr::UnaryFunc::Not(expr::func::Not),
218+
expr,
219+
} = expr
220+
{
221+
if let CallUnary {
222+
func: expr::UnaryFunc::IsNull(expr::func::IsNull),
223+
expr,
224+
} = &**expr
225+
{
226+
if let ColumnReference(c) = &**expr {
227+
return Some(c);
228+
}
229+
}
230+
}
231+
232+
None
233+
}
234+
235+
/// Active pattern match for `NOT(expr)` fragments.
236+
fn case_not(expr: &BoxScalarExpr) -> Option<&BoxScalarExpr> {
237+
use BoxScalarExpr::*;
238+
239+
if let CallUnary {
240+
func: expr::UnaryFunc::Not(expr::func::Not),
241+
expr,
242+
} = expr
243+
{
244+
return Some(expr);
245+
}
246+
247+
None
248+
}
249+
250+
/// Active pattern match for `NOT(expr)` fragments.
251+
fn case_or(expr: &BoxScalarExpr) -> Option<(&BoxScalarExpr, &BoxScalarExpr)> {
252+
use BoxScalarExpr::*;
253+
254+
if let CallBinary {
255+
func: expr::BinaryFunc::Or,
256+
expr1,
257+
expr2,
258+
} = expr
259+
{
260+
return Some((expr1, expr2));
261+
}
262+
263+
None
264+
}
265+
266+
/// Active pattern match for `NOT(expr)` fragments.
267+
fn case_and(expr: &BoxScalarExpr) -> Option<(&BoxScalarExpr, &BoxScalarExpr)> {
268+
use BoxScalarExpr::*;
269+
270+
if let CallBinary {
271+
func: expr::BinaryFunc::And,
272+
expr1,
273+
expr2,
274+
} = expr
275+
{
276+
return Some((expr1, expr2));
277+
}
278+
279+
None
280+
}

0 commit comments

Comments
 (0)