Skip to content

Commit

Permalink
feat: add UnboundPredicate::negate()
Browse files Browse the repository at this point in the history
Issue: apache#150
  • Loading branch information
sdd committed Mar 4, 2024
1 parent 811fd1d commit d363bb8
Show file tree
Hide file tree
Showing 5 changed files with 282 additions and 6 deletions.
35 changes: 34 additions & 1 deletion crates/iceberg/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub use predicate::*;
/// The discriminant of this enum is used for determining the type of the operator, see
/// [`PredicateOperator::is_unary`], [`PredicateOperator::is_binary`], [`PredicateOperator::is_set`]
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[repr(u16)]
pub enum PredicateOperator {
// Unary operators
Expand Down Expand Up @@ -112,6 +112,39 @@ impl PredicateOperator {
pub fn is_set(self) -> bool {
(self as u16) > (PredicateOperator::NotStartsWith as u16)
}

/// Returns the predicate that is the inverse of self
///
/// # Example
///
/// ```rust
/// use iceberg::expr::PredicateOperator;
/// assert!(PredicateOperator::IsNull.negate() == PredicateOperator::NotNull);
/// assert!(PredicateOperator::IsNan.negate() == PredicateOperator::NotNan);
/// assert!(PredicateOperator::LessThan.negate() == PredicateOperator::GreaterThanOrEq);
/// assert!(PredicateOperator::GreaterThan.negate() == PredicateOperator::LessThanOrEq);
/// assert!(PredicateOperator::Eq.negate() == PredicateOperator::NotEq);
/// assert!(PredicateOperator::In.negate() == PredicateOperator::NotIn);
/// assert!(PredicateOperator::StartsWith.negate() == PredicateOperator::NotStartsWith);
/// ```
pub fn negate(self) -> PredicateOperator {
match self {
PredicateOperator::IsNull => PredicateOperator::NotNull,
PredicateOperator::NotNull => PredicateOperator::IsNull,
PredicateOperator::IsNan => PredicateOperator::NotNan,
PredicateOperator::NotNan => PredicateOperator::IsNan,
PredicateOperator::LessThan => PredicateOperator::GreaterThanOrEq,
PredicateOperator::LessThanOrEq => PredicateOperator::GreaterThan,
PredicateOperator::GreaterThan => PredicateOperator::LessThanOrEq,
PredicateOperator::GreaterThanOrEq => PredicateOperator::LessThan,
PredicateOperator::Eq => PredicateOperator::NotEq,
PredicateOperator::NotEq => PredicateOperator::Eq,
PredicateOperator::In => PredicateOperator::NotIn,
PredicateOperator::NotIn => PredicateOperator::In,
PredicateOperator::StartsWith => PredicateOperator::NotStartsWith,
PredicateOperator::NotStartsWith => PredicateOperator::StartsWith,
}
}
}

#[cfg(test)]
Expand Down
158 changes: 157 additions & 1 deletion crates/iceberg/src/expr/predicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
use crate::expr::{BoundReference, PredicateOperator, Reference};
use crate::spec::Datum;
use itertools::Itertools;
use std::collections::HashSet;
use std::fmt::{Debug, Display, Formatter};
use std::ops::Not;

/// Logical expression, such as `AND`, `OR`, `NOT`.
#[derive(PartialEq)]
pub struct LogicalExpression<T, const N: usize> {
inputs: [Box<T>; N],
}
Expand Down Expand Up @@ -54,6 +56,7 @@ impl<T, const N: usize> LogicalExpression<T, N> {
}

/// Unary predicate, for example, `a IS NULL`.
#[derive(PartialEq)]
pub struct UnaryExpression<T> {
/// Operator of this predicate, must be single operand operator.
op: PredicateOperator,
Expand Down Expand Up @@ -84,6 +87,7 @@ impl<T> UnaryExpression<T> {
}

/// Binary predicate, for example, `a > 10`.
#[derive(PartialEq)]
pub struct BinaryExpression<T> {
/// Operator of this predicate, must be binary operator, such as `=`, `>`, `<`, etc.
op: PredicateOperator,
Expand Down Expand Up @@ -117,6 +121,7 @@ impl<T: Display> Display for BinaryExpression<T> {
}

/// Set predicates, for example, `a in (1, 2, 3)`.
#[derive(PartialEq)]
pub struct SetExpression<T> {
/// Operator of this predicate, must be set operator, such as `IN`, `NOT IN`, etc.
op: PredicateOperator,
Expand All @@ -136,8 +141,26 @@ impl<T: Debug> Debug for SetExpression<T> {
}
}

impl<T: Debug> SetExpression<T> {
pub(crate) fn new(op: PredicateOperator, term: T, literals: HashSet<Datum>) -> Self {
Self { op, term, literals }
}
}

impl<T: Display + Debug> Display for SetExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{} {} ({})",
self.term,
self.op,
self.literals.iter().join(", ")
)
}
}

/// Unbound predicate expression before binding to a schema.
#[derive(Debug)]
#[derive(Debug, PartialEq)]
pub enum Predicate {
/// And predicate, for example, `a > 10 AND b < 20`.
And(LogicalExpression<Predicate, 2>),
Expand Down Expand Up @@ -230,6 +253,54 @@ impl Predicate {
pub fn or(self, other: Predicate) -> Predicate {
Predicate::Or(LogicalExpression::new([Box::new(self), Box::new(other)]))
}

/// Returns a predicate representing the negation ('NOT') of this one,
/// by using inverse predicates rather than wrapping in a `NOT`.
/// Used for `NOT` elimination.
///
/// # Example
///
/// ```rust
/// use std::ops::Bound::Unbounded;
/// use iceberg::expr::BoundPredicate::Unary;
/// use iceberg::expr::{LogicalExpression, Predicate, Reference};
/// use iceberg::spec::Datum;
/// let expr1 = Reference::new("a").less_than(Datum::long(10));
/// let expr2 = Reference::new("b").less_than(Datum::long(5)).and(Reference::new("c").less_than(Datum::long(10)));
///
/// let result = expr1.negate();
/// assert_eq!(&format!("{result}"), "a >= 10");
///
/// let result = expr2.negate();
/// assert_eq!(&format!("{result}"), "(b >= 5) OR (c >= 10)");
/// ```
pub fn negate(self) -> Predicate {
match self {
Predicate::And(expr) => Predicate::Or(LogicalExpression::new(
expr.inputs.map(|expr| Box::new(expr.negate())),
)),
Predicate::Or(expr) => Predicate::And(LogicalExpression::new(
expr.inputs.map(|expr| Box::new(expr.negate())),
)),
Predicate::Not(expr) => {
let LogicalExpression { inputs: [input_0] } = expr;
*input_0
}
Predicate::Unary(expr) => {
Predicate::Unary(UnaryExpression::new(expr.op.negate(), expr.term))
}
Predicate::Binary(expr) => Predicate::Binary(BinaryExpression::new(
expr.op.negate(),
expr.term,
expr.literal,
)),
Predicate::Set(expr) => Predicate::Set(SetExpression::new(
expr.op.negate(),
expr.term,
expr.literals,
)),
}
}
}

impl Not for Predicate {
Expand Down Expand Up @@ -274,3 +345,88 @@ pub enum BoundPredicate {
/// Set predicates, for example, `a in (1, 2, 3)`.
Set(SetExpression<BoundReference>),
}

#[cfg(test)]
mod tests {
use crate::expr::Reference;
use crate::spec::Datum;
use std::collections::HashSet;
use std::ops::Not;

#[test]
fn test_predicate_negate_and() {
let expression = Reference::new("b")
.less_than(Datum::long(5))
.and(Reference::new("c").less_than(Datum::long(10)));

let expected = Reference::new("b")
.greater_than_or_equal_to(Datum::long(5))
.or(Reference::new("c").greater_than_or_equal_to(Datum::long(10)));

let result = expression.negate();

assert_eq!(result, expected);
}

#[test]
fn test_predicate_negate_or() {
let expression = Reference::new("b")
.greater_than_or_equal_to(Datum::long(5))
.or(Reference::new("c").greater_than_or_equal_to(Datum::long(10)));

let expected = Reference::new("b")
.less_than(Datum::long(5))
.and(Reference::new("c").less_than(Datum::long(10)));

let result = expression.negate();

assert_eq!(result, expected);
}

#[test]
fn test_predicate_negate_not() {
let expression = Reference::new("b")
.greater_than_or_equal_to(Datum::long(5))
.not();

let expected = Reference::new("b").greater_than_or_equal_to(Datum::long(5));

let result = expression.negate();

assert_eq!(result, expected);
}

#[test]
fn test_predicate_negate_unary() {
let expression = Reference::new("b").is_not_null();

let expected = Reference::new("b").is_null();

let result = expression.negate();

assert_eq!(result, expected);
}

#[test]
fn test_predicate_negate_binary() {
let expression = Reference::new("a").less_than(Datum::long(5));

let expected = Reference::new("a").greater_than_or_equal_to(Datum::long(5));

let result = expression.negate();

assert_eq!(result, expected);
}

#[test]
fn test_predicate_negate_set() {
let expression = Reference::new("a").is_in(HashSet::from([Datum::long(5), Datum::long(6)]));

let expected =
Reference::new("a").is_not_in(HashSet::from([Datum::long(5), Datum::long(6)]));

let result = expression.negate();

assert_eq!(result, expected);
}
}
91 changes: 89 additions & 2 deletions crates/iceberg/src/expr/term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,17 @@

//! Term definition.
use crate::expr::{BinaryExpression, Predicate, PredicateOperator};
use crate::expr::{BinaryExpression, Predicate, PredicateOperator, SetExpression, UnaryExpression};
use crate::spec::{Datum, NestedField, NestedFieldRef};
use std::collections::HashSet;
use std::fmt::{Display, Formatter};

/// Unbound term before binding to a schema.
pub type Term = Reference;

/// A named reference in an unbound expression.
/// For example, `a` in `a > 10`.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq)]
pub struct Reference {
name: String,
}
Expand Down Expand Up @@ -63,6 +64,92 @@ impl Reference {
datum,
))
}

/// Creates a greater-than-or-equal-to than expression. For example, `a >= 10`.
///
/// # Example
///
/// ```rust
///
/// use iceberg::expr::Reference;
/// use iceberg::spec::Datum;
/// let expr = Reference::new("a").greater_than_or_equal_to(Datum::long(10));
///
/// assert_eq!(&format!("{expr}"), "a < 10");
/// ```
pub fn greater_than_or_equal_to(self, datum: Datum) -> Predicate {
Predicate::Binary(BinaryExpression::new(
PredicateOperator::GreaterThanOrEq,
self,
datum,
))
}

/// Creates an is-null expression. For example, `a IS NULL`.
///
/// # Example
///
/// ```rust
///
/// use iceberg::expr::Reference;
/// use iceberg::spec::Datum;
/// let expr = Reference::new("a").is_null();
///
/// assert_eq!(&format!("{expr}"), "a IS NULL");
/// ```
pub fn is_null(self) -> Predicate {
Predicate::Unary(UnaryExpression::new(PredicateOperator::IsNull, self))
}

/// Creates an is-not-null expression. For example, `a IS NOT NULL`.
///
/// # Example
///
/// ```rust
///
/// use iceberg::expr::Reference;
/// use iceberg::spec::Datum;
/// let expr = Reference::new("a").is_not_null();
///
/// assert_eq!(&format!("{expr}"), "a IS NOT NULL");
/// ```
pub fn is_not_null(self) -> Predicate {
Predicate::Unary(UnaryExpression::new(PredicateOperator::NotNull, self))
}

/// Creates an is-in expression. For example, `a IS IN (5, 6)`.
///
/// # Example
///
/// ```rust
///
/// use std::collections::HashSet;
/// use iceberg::expr::Reference;
/// use iceberg::spec::Datum;
/// let expr = Reference::new("a").is_in(HashSet::from([Datum::long(5), Datum::long(6)]));
///
/// assert_eq!(&format!("{expr}"), "a IS IN (5, 6)");
/// ```
pub fn is_in(self, literals: HashSet<Datum>) -> Predicate {
Predicate::Set(SetExpression::new(PredicateOperator::In, self, literals))
}

/// Creates an is-not-in expression. For example, `a IS NOT IN (5, 6)`.
///
/// # Example
///
/// ```rust
///
/// use std::collections::HashSet;
/// use iceberg::expr::Reference;
/// use iceberg::spec::Datum;
/// let expr = Reference::new("a").is_not_in(HashSet::from([Datum::long(5), Datum::long(6)]));
///
/// assert_eq!(&format!("{expr}"), "a IS IN (5, 6)");
/// ```
pub fn is_not_in(self, literals: HashSet<Datum>) -> Predicate {
Predicate::Set(SetExpression::new(PredicateOperator::NotIn, self, literals))
}
}

impl Display for Reference {
Expand Down
2 changes: 1 addition & 1 deletion crates/iceberg/src/spec/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ impl From<MapType> for Type {
}

/// Primitive data types
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Hash)]
#[serde(rename_all = "lowercase", remote = "Self")]
pub enum PrimitiveType {
/// True or False
Expand Down
2 changes: 1 addition & 1 deletion crates/iceberg/src/spec/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ pub enum PrimitiveLiteral {
///
/// By default, we decouple the type and value of a literal, so we can use avoid the cost of storing extra type info
/// for each literal. But associate type with literal can be useful in some cases, for example, in unbound expression.
#[derive(Debug)]
#[derive(Debug, PartialEq, Hash, Eq)]
pub struct Datum {
r#type: PrimitiveType,
literal: PrimitiveLiteral,
Expand Down

0 comments on commit d363bb8

Please sign in to comment.