Skip to content

Commit

Permalink
remove the type coercion in the simplify_expressions rule (#3657)
Browse files Browse the repository at this point in the history
  • Loading branch information
liukun4515 authored Sep 29, 2022
1 parent 29b8bbd commit 0d1cd55
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 44 deletions.
52 changes: 16 additions & 36 deletions datafusion/optimizer/src/simplify_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
//! Simplify expressions optimizer rule
use crate::expr_simplifier::ExprSimplifiable;
use crate::type_coercion::TypeCoercionRewriter;
use crate::{expr_simplifier::SimplifyInfo, OptimizerConfig, OptimizerRule};
use arrow::array::new_null_array;
use arrow::datatypes::{DataType, Field, Schema};
Expand All @@ -33,7 +32,6 @@ use datafusion_expr::{
BuiltinScalarFunction, ColumnarValue, Expr, ExprSchemable, Operator, Volatility,
};
use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
use std::sync::Arc;

/// Provides simplification information based on schema and properties
pub(crate) struct SimplifyContext<'a, 'b> {
Expand Down Expand Up @@ -377,9 +375,6 @@ pub struct ConstEvaluator<'a> {
execution_props: &'a ExecutionProps,
input_schema: DFSchema,
input_batch: RecordBatch,
// Needed until we ensure type coercion is done before any optimizations
// https://github.com/apache/arrow-datafusion/issues/3557
type_coercion_helper: TypeCoercionRewriter,
}

impl<'a> ExprRewriter for ConstEvaluator<'a> {
Expand Down Expand Up @@ -434,14 +429,12 @@ impl<'a> ConstEvaluator<'a> {
// Need a single "input" row to produce a single output row
let col = new_null_array(&DataType::Null, 1);
let input_batch = RecordBatch::try_new(std::sync::Arc::new(schema), vec![col])?;
let type_coercion = TypeCoercionRewriter::new(Arc::new(input_schema.clone()));

Ok(Self {
can_evaluate: vec![],
execution_props,
input_schema,
input_batch,
type_coercion_helper: type_coercion,
})
}

Expand Down Expand Up @@ -510,15 +503,6 @@ impl<'a> ConstEvaluator<'a> {
return Ok(s);
}

// TODO: https://github.com/apache/arrow-datafusion/issues/3582
// TODO: https://github.com/apache/arrow-datafusion/issues/3556
// Do the type coercion in the simplify expression
// this is just a work around for removing the type coercion in the physical phase
// we need to support eval the result without the physical expr.
// If we don't do the type coercion, we will meet the
// https://github.com/apache/arrow-datafusion/issues/3556 when create the physical expr
// to try evaluate the result.
let expr = expr.rewrite(&mut self.type_coercion_helper)?;
let phys_expr = create_physical_expr(
&expr,
&self.input_schema,
Expand Down Expand Up @@ -1223,12 +1207,6 @@ mod tests {
assert_eq!(simplify(expr_eq), lit(true));
}

#[test]
fn test_simplify_with_type_coercion() {
let expr_plus = binary_expr(lit(1_i32), Operator::Plus, lit(1_i64));
assert_eq!(simplify(expr_plus), lit(2_i64));
}

#[test]
fn test_simplify_concat_ws_null_separator() {
fn build_concat_ws_expr(args: &[Expr]) -> Expr {
Expand Down Expand Up @@ -1351,13 +1329,13 @@ mod tests {
// now() --> ts
test_evaluate_with_start_time(now_expr(), lit_timestamp_nano(ts_nanos), &time);

// CAST(now() as int64) + 100 --> ts + 100
let expr = cast_to_int64_expr(now_expr()) + lit(100);
// CAST(now() as int64) + 100_i64 --> ts + 100_i64
let expr = cast_to_int64_expr(now_expr()) + lit(100_i64);
test_evaluate_with_start_time(expr, lit(ts_nanos + 100), &time);

// CAST(now() as int64) < cast(to_timestamp(...) as int64) + 50000 ---> true
// CAST(now() as int64) < cast(to_timestamp(...) as int64) + 50000_i64 ---> true
let expr = cast_to_int64_expr(now_expr())
.lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) + lit(50000));
.lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) + lit(50000i64));
test_evaluate_with_start_time(expr, lit(true), &time);
}

Expand Down Expand Up @@ -2208,19 +2186,21 @@ mod tests {
let ts_string = "2020-09-08T12:05:00+00:00";
let time = chrono::Utc.timestamp_nanos(1599566400000000000i64);

// cast(now() as int) < cast(to_timestamp(...) as int) + 5000000000
let plan = LogicalPlanBuilder::from(table_scan)
.filter(
cast_to_int64_expr(now_expr())
.lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) + lit(50000)),
)
.unwrap()
.build()
.unwrap();
// cast(now() as int) < cast(to_timestamp(...) as int) + 50000_i64
let plan =
LogicalPlanBuilder::from(table_scan)
.filter(
cast_to_int64_expr(now_expr())
.lt(cast_to_int64_expr(to_timestamp_expr(ts_string))
+ lit(50000_i64)),
)
.unwrap()
.build()
.unwrap();

// Note that constant folder runs and folds the entire
// expression down to a single constant (true)
let expected = "Filter: Boolean(true) AS now() < totimestamp(Utf8(\"2020-09-08T12:05:00+00:00\")) + Int32(50000)\
let expected = "Filter: Boolean(true) AS now() < totimestamp(Utf8(\"2020-09-08T12:05:00+00:00\")) + Int64(50000)\
\n TableScan: test";
let actual = get_optimized_plan_formatted(&plan, &time);

Expand Down
10 changes: 2 additions & 8 deletions datafusion/optimizer/src/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,16 +117,10 @@ fn optimize_internal(
from_plan(plan, &new_expr, &new_inputs)
}

pub(crate) struct TypeCoercionRewriter {
struct TypeCoercionRewriter {
pub(crate) schema: DFSchemaRef,
}

impl TypeCoercionRewriter {
pub(crate) fn new(schema: DFSchemaRef) -> TypeCoercionRewriter {
TypeCoercionRewriter { schema }
}
}

impl ExprRewriter for TypeCoercionRewriter {
fn pre_visit(&mut self, _expr: &Expr) -> Result<RewriteRecursion> {
Ok(RewriteRecursion::Continue)
Expand Down Expand Up @@ -796,7 +790,7 @@ mod test {
)
.unwrap(),
);
let mut rewriter = TypeCoercionRewriter::new(schema);
let mut rewriter = TypeCoercionRewriter { schema };
let expr = is_true(lit(12i32).eq(lit(13i64)));
let expected = is_true(
cast(lit(ScalarValue::Int32(Some(12))), DataType::Int64)
Expand Down

0 comments on commit 0d1cd55

Please sign in to comment.