Skip to content

Commit 811fd1d

Browse files
feat: Add expression builder and display. (#169)
* feat: Add expression builder and display. * Fix comments * Fix doc test * Fix name of op * Fix comments * Fix timestamp
1 parent a20cda8 commit 811fd1d

File tree

4 files changed

+1021
-44
lines changed

4 files changed

+1021
-44
lines changed

crates/iceberg/src/expr/mod.rs

Lines changed: 118 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,129 @@
1818
//! This module contains expressions.
1919
2020
mod term;
21+
22+
use std::fmt::{Display, Formatter};
23+
2124
pub use term::*;
2225
mod predicate;
2326
pub use predicate::*;
2427

2528
/// Predicate operators used in expressions.
29+
///
30+
/// The discriminant of this enum is used for determining the type of the operator, see
31+
/// [`PredicateOperator::is_unary`], [`PredicateOperator::is_binary`], [`PredicateOperator::is_set`]
2632
#[allow(missing_docs)]
33+
#[derive(Debug, Clone, Copy)]
34+
#[repr(u16)]
2735
pub enum PredicateOperator {
28-
IsNull,
29-
NotNull,
30-
IsNan,
31-
NotNan,
32-
LessThan,
33-
LessThanOrEq,
34-
GreaterThan,
35-
GreaterThanOrEq,
36-
Eq,
37-
NotEq,
38-
In,
39-
NotIn,
40-
StartsWith,
41-
NotStartsWith,
36+
// Unary operators
37+
IsNull = 101,
38+
NotNull = 102,
39+
IsNan = 103,
40+
NotNan = 104,
41+
42+
// Binary operators
43+
LessThan = 201,
44+
LessThanOrEq = 202,
45+
GreaterThan = 203,
46+
GreaterThanOrEq = 204,
47+
Eq = 205,
48+
NotEq = 206,
49+
StartsWith = 207,
50+
NotStartsWith = 208,
51+
52+
// Set operators
53+
In = 301,
54+
NotIn = 302,
55+
}
56+
57+
impl Display for PredicateOperator {
58+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
59+
match self {
60+
PredicateOperator::IsNull => write!(f, "IS NULL"),
61+
PredicateOperator::NotNull => write!(f, "IS NOT NULL"),
62+
PredicateOperator::IsNan => write!(f, "IS NAN"),
63+
PredicateOperator::NotNan => write!(f, "IS NOT NAN"),
64+
PredicateOperator::LessThan => write!(f, "<"),
65+
PredicateOperator::LessThanOrEq => write!(f, "<="),
66+
PredicateOperator::GreaterThan => write!(f, ">"),
67+
PredicateOperator::GreaterThanOrEq => write!(f, ">="),
68+
PredicateOperator::Eq => write!(f, "="),
69+
PredicateOperator::NotEq => write!(f, "!="),
70+
PredicateOperator::In => write!(f, "IN"),
71+
PredicateOperator::NotIn => write!(f, "NOT IN"),
72+
PredicateOperator::StartsWith => write!(f, "STARTS WITH"),
73+
PredicateOperator::NotStartsWith => write!(f, "NOT STARTS WITH"),
74+
}
75+
}
76+
}
77+
78+
impl PredicateOperator {
79+
/// Check if this operator is unary operator.
80+
///
81+
/// # Example
82+
///
83+
/// ```rust
84+
/// use iceberg::expr::PredicateOperator;
85+
/// assert!(PredicateOperator::IsNull.is_unary());
86+
/// ```
87+
pub fn is_unary(self) -> bool {
88+
(self as u16) < (PredicateOperator::LessThan as u16)
89+
}
90+
91+
/// Check if this operator is binary operator.
92+
///
93+
/// # Example
94+
///
95+
/// ```rust
96+
/// use iceberg::expr::PredicateOperator;
97+
/// assert!(PredicateOperator::LessThan.is_binary());
98+
/// ```
99+
pub fn is_binary(self) -> bool {
100+
((self as u16) > (PredicateOperator::NotNan as u16))
101+
&& ((self as u16) < (PredicateOperator::In as u16))
102+
}
103+
104+
/// Check if this operator is set operator.
105+
///
106+
/// # Example
107+
///
108+
/// ```rust
109+
/// use iceberg::expr::PredicateOperator;
110+
/// assert!(PredicateOperator::In.is_set());
111+
/// ```
112+
pub fn is_set(self) -> bool {
113+
(self as u16) > (PredicateOperator::NotStartsWith as u16)
114+
}
115+
}
116+
117+
#[cfg(test)]
118+
mod tests {
119+
use crate::expr::PredicateOperator;
120+
121+
#[test]
122+
fn test_unary() {
123+
assert!(PredicateOperator::IsNull.is_unary());
124+
assert!(PredicateOperator::NotNull.is_unary());
125+
assert!(PredicateOperator::IsNan.is_unary());
126+
assert!(PredicateOperator::NotNan.is_unary());
127+
}
128+
129+
#[test]
130+
fn test_binary() {
131+
assert!(PredicateOperator::LessThan.is_binary());
132+
assert!(PredicateOperator::LessThanOrEq.is_binary());
133+
assert!(PredicateOperator::GreaterThan.is_binary());
134+
assert!(PredicateOperator::GreaterThanOrEq.is_binary());
135+
assert!(PredicateOperator::Eq.is_binary());
136+
assert!(PredicateOperator::NotEq.is_binary());
137+
assert!(PredicateOperator::StartsWith.is_binary());
138+
assert!(PredicateOperator::NotStartsWith.is_binary());
139+
}
140+
141+
#[test]
142+
fn test_set() {
143+
assert!(PredicateOperator::In.is_set());
144+
assert!(PredicateOperator::NotIn.is_set());
145+
}
42146
}

crates/iceberg/src/expr/predicate.rs

Lines changed: 194 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,40 @@
1919
//! Predicate expressions are used to filter data, and evaluates to a boolean value. For example,
2020
//! `a > 10` is a predicate expression, and it evaluates to `true` if `a` is greater than `10`,
2121
22-
use crate::expr::{BoundReference, PredicateOperator, UnboundReference};
23-
use crate::spec::Literal;
22+
use crate::expr::{BoundReference, PredicateOperator, Reference};
23+
use crate::spec::Datum;
2424
use std::collections::HashSet;
25+
use std::fmt::{Debug, Display, Formatter};
26+
use std::ops::Not;
2527

2628
/// Logical expression, such as `AND`, `OR`, `NOT`.
2729
pub struct LogicalExpression<T, const N: usize> {
2830
inputs: [Box<T>; N],
2931
}
3032

33+
impl<T: Debug, const N: usize> Debug for LogicalExpression<T, N> {
34+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
35+
f.debug_struct("LogicalExpression")
36+
.field("inputs", &self.inputs)
37+
.finish()
38+
}
39+
}
40+
41+
impl<T, const N: usize> LogicalExpression<T, N> {
42+
fn new(inputs: [Box<T>; N]) -> Self {
43+
Self { inputs }
44+
}
45+
46+
/// Return inputs of this logical expression.
47+
pub fn inputs(&self) -> [&T; N] {
48+
let mut ret: [&T; N] = [self.inputs[0].as_ref(); N];
49+
for (i, item) in ret.iter_mut().enumerate() {
50+
*item = &self.inputs[i];
51+
}
52+
ret
53+
}
54+
}
55+
3156
/// Unary predicate, for example, `a IS NULL`.
3257
pub struct UnaryExpression<T> {
3358
/// Operator of this predicate, must be single operand operator.
@@ -36,14 +61,59 @@ pub struct UnaryExpression<T> {
3661
term: T,
3762
}
3863

64+
impl<T: Debug> Debug for UnaryExpression<T> {
65+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
66+
f.debug_struct("UnaryExpression")
67+
.field("op", &self.op)
68+
.field("term", &self.term)
69+
.finish()
70+
}
71+
}
72+
73+
impl<T: Display> Display for UnaryExpression<T> {
74+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
75+
write!(f, "{} {}", self.term, self.op)
76+
}
77+
}
78+
79+
impl<T> UnaryExpression<T> {
80+
pub(crate) fn new(op: PredicateOperator, term: T) -> Self {
81+
debug_assert!(op.is_unary());
82+
Self { op, term }
83+
}
84+
}
85+
3986
/// Binary predicate, for example, `a > 10`.
4087
pub struct BinaryExpression<T> {
4188
/// Operator of this predicate, must be binary operator, such as `=`, `>`, `<`, etc.
4289
op: PredicateOperator,
4390
/// Term of this predicate, for example, `a` in `a > 10`.
4491
term: T,
4592
/// Literal of this predicate, for example, `10` in `a > 10`.
46-
literal: Literal,
93+
literal: Datum,
94+
}
95+
96+
impl<T: Debug> Debug for BinaryExpression<T> {
97+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
98+
f.debug_struct("BinaryExpression")
99+
.field("op", &self.op)
100+
.field("term", &self.term)
101+
.field("literal", &self.literal)
102+
.finish()
103+
}
104+
}
105+
106+
impl<T> BinaryExpression<T> {
107+
pub(crate) fn new(op: PredicateOperator, term: T, literal: Datum) -> Self {
108+
debug_assert!(op.is_binary());
109+
Self { op, term, literal }
110+
}
111+
}
112+
113+
impl<T: Display> Display for BinaryExpression<T> {
114+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
115+
write!(f, "{} {} {}", self.term, self.op, self.literal)
116+
}
47117
}
48118

49119
/// Set predicates, for example, `a in (1, 2, 3)`.
@@ -53,26 +123,139 @@ pub struct SetExpression<T> {
53123
/// Term of this predicate, for example, `a` in `a in (1, 2, 3)`.
54124
term: T,
55125
/// Literals of this predicate, for example, `(1, 2, 3)` in `a in (1, 2, 3)`.
56-
literals: HashSet<Literal>,
126+
literals: HashSet<Datum>,
127+
}
128+
129+
impl<T: Debug> Debug for SetExpression<T> {
130+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
131+
f.debug_struct("SetExpression")
132+
.field("op", &self.op)
133+
.field("term", &self.term)
134+
.field("literal", &self.literals)
135+
.finish()
136+
}
57137
}
58138

59139
/// Unbound predicate expression before binding to a schema.
60-
pub enum UnboundPredicate {
140+
#[derive(Debug)]
141+
pub enum Predicate {
61142
/// And predicate, for example, `a > 10 AND b < 20`.
62-
And(LogicalExpression<UnboundPredicate, 2>),
143+
And(LogicalExpression<Predicate, 2>),
63144
/// Or predicate, for example, `a > 10 OR b < 20`.
64-
Or(LogicalExpression<UnboundPredicate, 2>),
145+
Or(LogicalExpression<Predicate, 2>),
65146
/// Not predicate, for example, `NOT (a > 10)`.
66-
Not(LogicalExpression<UnboundPredicate, 1>),
147+
Not(LogicalExpression<Predicate, 1>),
67148
/// Unary expression, for example, `a IS NULL`.
68-
Unary(UnaryExpression<UnboundReference>),
149+
Unary(UnaryExpression<Reference>),
69150
/// Binary expression, for example, `a > 10`.
70-
Binary(BinaryExpression<UnboundReference>),
151+
Binary(BinaryExpression<Reference>),
71152
/// Set predicates, for example, `a in (1, 2, 3)`.
72-
Set(SetExpression<UnboundReference>),
153+
Set(SetExpression<Reference>),
154+
}
155+
156+
impl Display for Predicate {
157+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
158+
match self {
159+
Predicate::And(expr) => {
160+
write!(f, "({}) AND ({})", expr.inputs()[0], expr.inputs()[1])
161+
}
162+
Predicate::Or(expr) => {
163+
write!(f, "({}) OR ({})", expr.inputs()[0], expr.inputs()[1])
164+
}
165+
Predicate::Not(expr) => {
166+
write!(f, "NOT ({})", expr.inputs()[0])
167+
}
168+
Predicate::Unary(expr) => {
169+
write!(f, "{}", expr.term)
170+
}
171+
Predicate::Binary(expr) => {
172+
write!(f, "{} {} {}", expr.term, expr.op, expr.literal)
173+
}
174+
Predicate::Set(expr) => {
175+
write!(
176+
f,
177+
"{} {} ({})",
178+
expr.term,
179+
expr.op,
180+
expr.literals
181+
.iter()
182+
.map(|l| format!("{:?}", l))
183+
.collect::<Vec<String>>()
184+
.join(", ")
185+
)
186+
}
187+
}
188+
}
189+
}
190+
191+
impl Predicate {
192+
/// Combines two predicates with `AND`.
193+
///
194+
/// # Example
195+
///
196+
/// ```rust
197+
/// use std::ops::Bound::Unbounded;
198+
/// use iceberg::expr::BoundPredicate::Unary;
199+
/// use iceberg::expr::Reference;
200+
/// use iceberg::spec::Datum;
201+
/// let expr1 = Reference::new("a").less_than(Datum::long(10));
202+
///
203+
/// let expr2 = Reference::new("b").less_than(Datum::long(20));
204+
///
205+
/// let expr = expr1.and(expr2);
206+
///
207+
/// assert_eq!(&format!("{expr}"), "(a < 10) AND (b < 20)");
208+
/// ```
209+
pub fn and(self, other: Predicate) -> Predicate {
210+
Predicate::And(LogicalExpression::new([Box::new(self), Box::new(other)]))
211+
}
212+
213+
/// Combines two predicates with `OR`.
214+
///
215+
/// # Example
216+
///
217+
/// ```rust
218+
/// use std::ops::Bound::Unbounded;
219+
/// use iceberg::expr::BoundPredicate::Unary;
220+
/// use iceberg::expr::Reference;
221+
/// use iceberg::spec::Datum;
222+
/// let expr1 = Reference::new("a").less_than(Datum::long(10));
223+
///
224+
/// let expr2 = Reference::new("b").less_than(Datum::long(20));
225+
///
226+
/// let expr = expr1.or(expr2);
227+
///
228+
/// assert_eq!(&format!("{expr}"), "(a < 10) OR (b < 20)");
229+
/// ```
230+
pub fn or(self, other: Predicate) -> Predicate {
231+
Predicate::Or(LogicalExpression::new([Box::new(self), Box::new(other)]))
232+
}
233+
}
234+
235+
impl Not for Predicate {
236+
type Output = Predicate;
237+
238+
/// Create a predicate which is the reverse of this predicate. For example: `NOT (a > 10)`
239+
/// # Example
240+
///
241+
///```rust
242+
///use std::ops::Bound::Unbounded;
243+
///use iceberg::expr::BoundPredicate::Unary;
244+
///use iceberg::expr::Reference;
245+
///use iceberg::spec::Datum;
246+
///let expr1 = Reference::new("a").less_than(Datum::long(10));
247+
///
248+
///let expr = !expr1;
249+
///
250+
///assert_eq!(&format!("{expr}"), "NOT (a < 10)");
251+
///```
252+
fn not(self) -> Self::Output {
253+
Predicate::Not(LogicalExpression::new([Box::new(self)]))
254+
}
73255
}
74256

75257
/// Bound predicate expression after binding to a schema.
258+
#[derive(Debug)]
76259
pub enum BoundPredicate {
77260
/// An expression always evaluates to true.
78261
AlwaysTrue,

0 commit comments

Comments
 (0)