Skip to content

Commit

Permalink
apply reviews
Browse files Browse the repository at this point in the history
  • Loading branch information
haohuaijin committed Jan 22, 2024
1 parent f89a4d1 commit 560cb6c
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 20 deletions.
45 changes: 45 additions & 0 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,51 @@ impl Expr {
Ok(Transformed::Yes(expr))
})
}

/// Returns true if some of this `exprs` subexpressions may not be evaluated
/// and thus any side effects (like divide by zero) may not be encountered
pub fn short_circuits(&self) -> bool {
match self {
Expr::ScalarFunction(ScalarFunction { func_def, .. }) => {
matches!(func_def, ScalarFunctionDefinition::BuiltIn(fun) if *fun == BuiltinScalarFunction::Coalesce)
}
Expr::BinaryExpr(BinaryExpr { op, .. }) => {
matches!(op, Operator::And | Operator::Or)
}
Expr::Case { .. } => true,
Expr::AggregateFunction(..)
| Expr::Alias(..)
| Expr::Between(..)
| Expr::Cast(..)
| Expr::Column(..)
| Expr::Exists(..)
| Expr::GetIndexedField(..)
| Expr::GroupingSet(..)
| Expr::InList(..)
| Expr::InSubquery(..)
| Expr::IsFalse(..)
| Expr::IsNotFalse(..)
| Expr::IsNotNull(..)
| Expr::IsNotTrue(..)
| Expr::IsNotUnknown(..)
| Expr::IsNull(..)
| Expr::IsTrue(..)
| Expr::IsUnknown(..)
| Expr::Like(..)
| Expr::ScalarSubquery(..)
| Expr::ScalarVariable(_, _)
| Expr::SimilarTo(..)
| Expr::Not(..)
| Expr::Negative(..)
| Expr::OuterReferenceColumn(_, _)
| Expr::TryCast(..)
| Expr::Wildcard { .. }
| Expr::WindowFunction(..)
| Expr::Literal(..)
| Expr::Sort(..)
| Expr::Placeholder(..) => false,
}
}
}

// modifies expr if it is a placeholder with datatype of right
Expand Down
23 changes: 3 additions & 20 deletions datafusion/optimizer/src/common_subexpr_eliminate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@ use datafusion_expr::expr::Alias;
use datafusion_expr::logical_plan::{
Aggregate, Filter, LogicalPlan, Projection, Sort, Window,
};
use datafusion_expr::{
col, expr::ScalarFunction, BinaryExpr, BuiltinScalarFunction, Expr, ExprSchemable,
Operator, ScalarFunctionDefinition,
};
use datafusion_expr::{col, Expr, ExprSchemable};

/// A map from expression's identifier to tuple including
/// - the expression itself (cloned)
Expand Down Expand Up @@ -620,7 +617,7 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
fn pre_visit(&mut self, expr: &Expr) -> Result<VisitRecursion> {
// related to https://github.com/apache/arrow-datafusion/issues/8814
// If the expr contain volatile expression or is a short-circuit expression, skip it.
if is_short_circuit_expression(expr) || is_volatile_expression(expr)? {
if expr.short_circuits() || is_volatile_expression(expr)? {
return Ok(VisitRecursion::Skip);
}
self.visit_stack
Expand Down Expand Up @@ -658,20 +655,6 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
}
}

/// Check if the expression is short-circuit expression
fn is_short_circuit_expression(expr: &Expr) -> bool {
match expr {
Expr::ScalarFunction(ScalarFunction { func_def, .. }) => {
matches!(func_def, ScalarFunctionDefinition::BuiltIn(fun) if *fun == BuiltinScalarFunction::Coalesce)
}
Expr::BinaryExpr(BinaryExpr { op, .. }) => {
matches!(op, Operator::And | Operator::Or)
}
Expr::Case { .. } => true,
_ => false,
}
}

/// Go through an expression tree and generate identifier for every node in this tree.
fn expr_to_identifier(
expr: &Expr,
Expand Down Expand Up @@ -717,7 +700,7 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_> {
// The `CommonSubexprRewriter` relies on `ExprIdentifierVisitor` to generate
// the `id_array`, which records the expr's identifier used to rewrite expr. So if we
// skip an expr in `ExprIdentifierVisitor`, we should skip it here, too.
if is_short_circuit_expression(expr) || is_volatile_expression(expr)? {
if expr.short_circuits() || is_volatile_expression(expr)? {
return Ok(RewriteRecursion::Stop);
}
if self.curr_index >= self.id_array.len()
Expand Down
14 changes: 14 additions & 0 deletions datafusion/sqllogictest/test_files/select.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,9 @@ FROM t AS A, (SELECT * FROM t WHERE x = 0) AS B;
0 0
0 0

# Expressions that short circuit should not be refactored out as that may cause side effects (divide by zero)
# at plan time that would not actually happen during execution, so the follow three query should not be extract
# the common sub-expression
query TT
explain select coalesce(1, y/x), coalesce(2, y/x) from t;
----
Expand Down Expand Up @@ -1159,5 +1162,16 @@ physical_plan
ProjectionExec: expr=[y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 as t.y = Int64(0) OR Int64(1) / t.y < Int64(1), x@0 = 0 OR y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 / CAST(x@0 AS Int64) as t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x]
--MemoryExec: partitions=1, partition_sizes=[1]

# due to the reason describe in https://github.com/apache/arrow-datafusion/issues/8927,
# the following queries will fail
query error
select coalesce(1, y/x), coalesce(2, y/x) from t;

query error
SELECT y > 0 and 1 / y < 1, x > 0 and y > 0 and 1 / y < 1 / x from t;

query error
SELECT y = 0 or 1 / y < 1, x = 0 or y = 0 or 1 / y < 1 / x from t;

statement ok
DROP TABLE t;

0 comments on commit 560cb6c

Please sign in to comment.