diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs index a1129ca65b72b..5381f28f64490 100644 --- a/datafusion/core/src/physical_optimizer/pruning.rs +++ b/datafusion/core/src/physical_optimizer/pruning.rs @@ -1986,54 +1986,52 @@ mod tests { DataType::Decimal128(9, 2), true, )])); - // s1 > 5 - let expr = col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 9, 2))); - let expr = logical2physical(&expr, &schema); - // If the data is written by spark, the physical data type is INT32 in the parquet - // So we use the INT32 type of statistic. - let statistics = TestStatistics::new().with( - "s1", - ContainerStats::new_i32( - vec![Some(0), Some(4), None, Some(3)], // min - vec![Some(5), Some(6), Some(4), None], // max + + prune_with_expr( + // s1 > 5 + col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 9, 2))), + &schema, + // If the data is written by spark, the physical data type is INT32 in the parquet + // So we use the INT32 type of statistic. + &TestStatistics::new().with( + "s1", + ContainerStats::new_i32( + vec![Some(0), Some(4), None, Some(3)], // min + vec![Some(5), Some(6), Some(4), None], // max + ), ), + &[false, true, false, true], ); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - let expected = vec![false, true, false, true]; - assert_eq!(result, expected); - // with cast column to other type - let expr = cast(col("s1"), DataType::Decimal128(14, 3)) - .gt(lit(ScalarValue::Decimal128(Some(5000), 14, 3))); - let expr = logical2physical(&expr, &schema); - let statistics = TestStatistics::new().with( - "s1", - ContainerStats::new_i32( - vec![Some(0), Some(4), None, Some(3)], // min - vec![Some(5), Some(6), Some(4), None], // max + prune_with_expr( + // with cast column to other type + cast(col("s1"), DataType::Decimal128(14, 3)) + .gt(lit(ScalarValue::Decimal128(Some(5000), 14, 3))), + &schema, + &TestStatistics::new().with( + "s1", + ContainerStats::new_i32( + vec![Some(0), Some(4), None, Some(3)], // min + vec![Some(5), Some(6), Some(4), None], // max + ), ), + &[false, true, false, true], ); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - let expected = vec![false, true, false, true]; - assert_eq!(result, expected); - // with try cast column to other type - let expr = try_cast(col("s1"), DataType::Decimal128(14, 3)) - .gt(lit(ScalarValue::Decimal128(Some(5000), 14, 3))); - let expr = logical2physical(&expr, &schema); - let statistics = TestStatistics::new().with( - "s1", - ContainerStats::new_i32( - vec![Some(0), Some(4), None, Some(3)], // min - vec![Some(5), Some(6), Some(4), None], // max + prune_with_expr( + // with try cast column to other type + try_cast(col("s1"), DataType::Decimal128(14, 3)) + .gt(lit(ScalarValue::Decimal128(Some(5000), 14, 3))), + &schema, + &TestStatistics::new().with( + "s1", + ContainerStats::new_i32( + vec![Some(0), Some(4), None, Some(3)], // min + vec![Some(5), Some(6), Some(4), None], // max + ), ), + &[false, true, false, true], ); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - let expected = vec![false, true, false, true]; - assert_eq!(result, expected); // decimal(18,2) let schema = Arc::new(Schema::new(vec![Field::new( @@ -2041,22 +2039,21 @@ mod tests { DataType::Decimal128(18, 2), true, )])); - // s1 > 5 - let expr = col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 18, 2))); - let expr = logical2physical(&expr, &schema); - // If the data is written by spark, the physical data type is INT64 in the parquet - // So we use the INT32 type of statistic. - let statistics = TestStatistics::new().with( - "s1", - ContainerStats::new_i64( - vec![Some(0), Some(4), None, Some(3)], // min - vec![Some(5), Some(6), Some(4), None], // max + prune_with_expr( + // s1 > 5 + col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 18, 2))), + &schema, + // If the data is written by spark, the physical data type is INT64 in the parquet + // So we use the INT32 type of statistic. + &TestStatistics::new().with( + "s1", + ContainerStats::new_i64( + vec![Some(0), Some(4), None, Some(3)], // min + vec![Some(5), Some(6), Some(4), None], // max + ), ), + &[false, true, false, true], ); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - let expected = vec![false, true, false, true]; - assert_eq!(result, expected); // decimal(23,2) let schema = Arc::new(Schema::new(vec![Field::new( @@ -2064,22 +2061,22 @@ mod tests { DataType::Decimal128(23, 2), true, )])); - // s1 > 5 - let expr = col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 23, 2))); - let expr = logical2physical(&expr, &schema); - let statistics = TestStatistics::new().with( - "s1", - ContainerStats::new_decimal128( - vec![Some(0), Some(400), None, Some(300)], // min - vec![Some(500), Some(600), Some(400), None], // max - 23, - 2, + + prune_with_expr( + // s1 > 5 + col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 23, 2))), + &schema, + &TestStatistics::new().with( + "s1", + ContainerStats::new_decimal128( + vec![Some(0), Some(400), None, Some(300)], // min + vec![Some(500), Some(600), Some(400), None], // max + 23, + 2, + ), ), + &[false, true, false, true], ); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - let expected = vec![false, true, false, true]; - assert_eq!(result, expected); } #[test] @@ -2089,10 +2086,6 @@ mod tests { Field::new("s2", DataType::Int32, true), ])); - // Prune using s2 > 5 - let expr = col("s2").gt(lit(5)); - let expr = logical2physical(&expr, &schema); - let statistics = TestStatistics::new().with( "s2", ContainerStats::new_i32( @@ -2100,53 +2093,50 @@ mod tests { vec![Some(5), Some(6), None, None], // max ), ); + prune_with_expr( + // Prune using s2 > 5 + col("s2").gt(lit(5)), + &schema, + &statistics, + // s2 [0, 5] ==> no rows should pass + // s2 [4, 6] ==> some rows could pass + // No stats for s2 ==> some rows could pass + // s2 [3, None] (null max) ==> some rows could pass + &[false, true, true, true], + ); - // s2 [0, 5] ==> no rows should pass - // s2 [4, 6] ==> some rows could pass - // No stats for s2 ==> some rows could pass - // s2 [3, None] (null max) ==> some rows could pass - - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - let expected = vec![false, true, true, true]; - assert_eq!(result, expected); - - // filter with cast - let expr = cast(col("s2"), DataType::Int64).gt(lit(ScalarValue::Int64(Some(5)))); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - let expected = vec![false, true, true, true]; - assert_eq!(result, expected); + prune_with_expr( + // filter with cast + cast(col("s2"), DataType::Int64).gt(lit(ScalarValue::Int64(Some(5)))), + &schema, + &statistics, + &[false, true, true, true], + ); } #[test] fn prune_not_eq_data() { let schema = Arc::new(Schema::new(vec![Field::new("s1", DataType::Utf8, true)])); - // Prune using s2 != 'M' - let expr = col("s1").not_eq(lit("M")); - let expr = logical2physical(&expr, &schema); - - let statistics = TestStatistics::new().with( - "s1", - ContainerStats::new_utf8( - vec![Some("A"), Some("A"), Some("N"), Some("M"), None, Some("A")], // min - vec![Some("Z"), Some("L"), Some("Z"), Some("M"), None, None], // max + prune_with_expr( + // Prune using s2 != 'M' + col("s1").not_eq(lit("M")), + &schema, + &TestStatistics::new().with( + "s1", + ContainerStats::new_utf8( + vec![Some("A"), Some("A"), Some("N"), Some("M"), None, Some("A")], // min + vec![Some("Z"), Some("L"), Some("Z"), Some("M"), None, None], // max + ), ), + // s1 [A, Z] ==> might have values that pass predicate + // s1 [A, L] ==> all rows pass the predicate + // s1 [N, Z] ==> all rows pass the predicate + // s1 [M, M] ==> all rows do not pass the predicate + // No stats for s2 ==> some rows could pass + // s2 [3, None] (null max) ==> some rows could pass + &[true, true, true, false, true, true], ); - - // s1 [A, Z] ==> might have values that pass predicate - // s1 [A, L] ==> all rows pass the predicate - // s1 [N, Z] ==> all rows pass the predicate - // s1 [M, M] ==> all rows do not pass the predicate - // No stats for s2 ==> some rows could pass - // s2 [3, None] (null max) ==> some rows could pass - - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - let expected = vec![true, true, true, false, true, true]; - assert_eq!(result, expected); } /// Creates setup for boolean chunk pruning @@ -2185,69 +2175,75 @@ mod tests { fn prune_bool_const_expr() { let (schema, statistics, _, _) = bool_setup(); - // true - let expr = lit(true); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, vec![true, true, true, true, true]); + prune_with_expr( + // true + lit(true), + &schema, + &statistics, + &[true, true, true, true, true], + ); - // false - // constant literals that do NOT refer to any columns are currently not evaluated at all, hence the result is - // "all true" - let expr = lit(false); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, vec![true, true, true, true, true]); + prune_with_expr( + // false + // constant literals that do NOT refer to any columns are currently not evaluated at all, hence the result is + // "all true" + lit(false), + &schema, + &statistics, + &[true, true, true, true, true], + ); } #[test] fn prune_bool_column() { let (schema, statistics, expected_true, _) = bool_setup(); - // b1 - let expr = col("b1"); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_true); + prune_with_expr( + // b1 + col("b1"), + &schema, + &statistics, + &expected_true, + ); } #[test] fn prune_bool_not_column() { let (schema, statistics, _, expected_false) = bool_setup(); - // !b1 - let expr = col("b1").not(); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_false); + prune_with_expr( + // !b1 + col("b1").not(), + &schema, + &statistics, + &expected_false, + ); } #[test] fn prune_bool_column_eq_true() { let (schema, statistics, expected_true, _) = bool_setup(); - // b1 = true - let expr = col("b1").eq(lit(true)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_true); + prune_with_expr( + // b1 = true + col("b1").eq(lit(true)), + &schema, + &statistics, + &expected_true, + ); } #[test] fn prune_bool_not_column_eq_true() { let (schema, statistics, _, expected_false) = bool_setup(); - // !b1 = true - let expr = col("b1").not().eq(lit(true)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_false); + prune_with_expr( + // !b1 = true + col("b1").not().eq(lit(true)), + &schema, + &statistics, + &expected_false, + ); } /// Creates a setup for chunk pruning, modeling a int32 column "i" @@ -2282,21 +2278,18 @@ mod tests { // i [-11, -1] ==> no rows can pass (not keep) // i [NULL, NULL] ==> unknown (must keep) // i [1, NULL] ==> unknown (must keep) - let expected_ret = vec![true, true, false, true, true]; + let expected_ret = &[true, true, false, true, true]; // i > 0 - let expr = col("i").gt(lit(0)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr(col("i").gt(lit(0)), &schema, &statistics, expected_ret); // -i < 0 - let expr = Expr::Negative(Box::new(col("i"))).lt(lit(0)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + Expr::Negative(Box::new(col("i"))).lt(lit(0)), + &schema, + &statistics, + expected_ret, + ); } #[test] @@ -2309,21 +2302,23 @@ mod tests { // i [-11, -1] ==> all rows must pass (must keep) // i [NULL, NULL] ==> unknown (must keep) // i [1, NULL] ==> no rows can pass (not keep) - let expected_ret = vec![true, false, true, true, false]; + let expected_ret = &[true, false, true, true, false]; - // i <= 0 - let expr = col("i").lt_eq(lit(0)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // i <= 0 + col("i").lt_eq(lit(0)), + &schema, + &statistics, + expected_ret, + ); - // -i >= 0 - let expr = Expr::Negative(Box::new(col("i"))).gt_eq(lit(0)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // -i >= 0 + Expr::Negative(Box::new(col("i"))).gt_eq(lit(0)), + &schema, + &statistics, + expected_ret, + ); } #[test] @@ -2336,37 +2331,39 @@ mod tests { // i [-11, -1] ==> no rows could pass in theory (conservatively keep) // i [NULL, NULL] ==> unknown (must keep) // i [1, NULL] ==> no rows can pass (conservatively keep) - let expected_ret = vec![true, true, true, true, true]; + let expected_ret = &[true, true, true, true, true]; - // cast(i as utf8) <= 0 - let expr = cast(col("i"), DataType::Utf8).lt_eq(lit("0")); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // cast(i as utf8) <= 0 + cast(col("i"), DataType::Utf8).lt_eq(lit("0")), + &schema, + &statistics, + expected_ret, + ); - // try_cast(i as utf8) <= 0 - let expr = try_cast(col("i"), DataType::Utf8).lt_eq(lit("0")); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // try_cast(i as utf8) <= 0 + try_cast(col("i"), DataType::Utf8).lt_eq(lit("0")), + &schema, + &statistics, + expected_ret, + ); - // cast(-i as utf8) >= 0 - let expr = - cast(Expr::Negative(Box::new(col("i"))), DataType::Utf8).gt_eq(lit("0")); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // cast(-i as utf8) >= 0 + cast(Expr::Negative(Box::new(col("i"))), DataType::Utf8).gt_eq(lit("0")), + &schema, + &statistics, + expected_ret, + ); - // try_cast(-i as utf8) >= 0 - let expr = - try_cast(Expr::Negative(Box::new(col("i"))), DataType::Utf8).gt_eq(lit("0")); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // try_cast(-i as utf8) >= 0 + try_cast(Expr::Negative(Box::new(col("i"))), DataType::Utf8).gt_eq(lit("0")), + &schema, + &statistics, + expected_ret, + ); } #[test] @@ -2379,14 +2376,15 @@ mod tests { // i [-11, -1] ==> no rows can pass (not keep) // i [NULL, NULL] ==> unknown (must keep) // i [1, NULL] ==> no rows can pass (not keep) - let expected_ret = vec![true, false, false, true, false]; + let expected_ret = &[true, false, false, true, false]; - // i = 0 - let expr = col("i").eq(lit(0)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // i = 0 + col("i").eq(lit(0)), + &schema, + &statistics, + expected_ret, + ); } #[test] @@ -2399,19 +2397,21 @@ mod tests { // i [-11, -1] ==> no rows can pass (not keep) // i [NULL, NULL] ==> unknown (must keep) // i [1, NULL] ==> no rows can pass (not keep) - let expected_ret = vec![true, false, false, true, false]; + let expected_ret = &[true, false, false, true, false]; - let expr = cast(col("i"), DataType::Int64).eq(lit(0i64)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + cast(col("i"), DataType::Int64).eq(lit(0i64)), + &schema, + &statistics, + expected_ret, + ); - let expr = try_cast(col("i"), DataType::Int64).eq(lit(0i64)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + try_cast(col("i"), DataType::Int64).eq(lit(0i64)), + &schema, + &statistics, + expected_ret, + ); } #[test] @@ -2427,13 +2427,14 @@ mod tests { // i [-11, -1] ==> no rows can pass (could keep) // i [NULL, NULL] ==> unknown (keep) // i [1, NULL] ==> no rows can pass (could keep) - let expected_ret = vec![true, true, true, true, true]; + let expected_ret = &[true, true, true, true, true]; - let expr = cast(col("i"), DataType::Utf8).eq(lit("0")); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + cast(col("i"), DataType::Utf8).eq(lit("0")), + &schema, + &statistics, + expected_ret, + ); } #[test] @@ -2446,21 +2447,23 @@ mod tests { // i [-11, -1] ==> no rows can pass (not keep) // i [NULL, NULL] ==> unknown (must keep) // i [1, NULL] ==> all rows must pass (must keep) - let expected_ret = vec![true, true, false, true, true]; + let expected_ret = &[true, true, false, true, true]; - // i > -1 - let expr = col("i").gt(lit(-1)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // i > -1 + col("i").gt(lit(-1)), + &schema, + &statistics, + expected_ret, + ); - // -i < 1 - let expr = Expr::Negative(Box::new(col("i"))).lt(lit(1)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // -i < 1 + Expr::Negative(Box::new(col("i"))).lt(lit(1)), + &schema, + &statistics, + expected_ret, + ); } #[test] @@ -2469,14 +2472,15 @@ mod tests { // Expression "i IS NULL" when there are no null statistics, // should all be kept - let expected_ret = vec![true, true, true, true, true]; + let expected_ret = &[true, true, true, true, true]; - // i IS NULL, no null statistics - let expr = col("i").is_null(); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // i IS NULL, no null statistics + col("i").is_null(), + &schema, + &statistics, + expected_ret, + ); // provide null counts for each column let statistics = statistics.with_null_counts( @@ -2490,51 +2494,55 @@ mod tests { ], ); - let expected_ret = vec![false, true, true, true, false]; + let expected_ret = &[false, true, true, true, false]; - // i IS NULL, with actual null statistcs - let expr = col("i").is_null(); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // i IS NULL, with actual null statistcs + col("i").is_null(), + &schema, + &statistics, + expected_ret, + ); } #[test] fn prune_cast_column_scalar() { // The data type of column i is INT32 let (schema, statistics) = int32_setup(); - let expected_ret = vec![true, true, false, true, true]; + let expected_ret = &[true, true, false, true, true]; - // i > int64(0) - let expr = col("i").gt(cast(lit(ScalarValue::Int64(Some(0))), DataType::Int32)); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // i > int64(0) + col("i").gt(cast(lit(ScalarValue::Int64(Some(0))), DataType::Int32)), + &schema, + &statistics, + expected_ret, + ); - // cast(i as int64) > int64(0) - let expr = cast(col("i"), DataType::Int64).gt(lit(ScalarValue::Int64(Some(0)))); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // cast(i as int64) > int64(0) + cast(col("i"), DataType::Int64).gt(lit(ScalarValue::Int64(Some(0)))), + &schema, + &statistics, + expected_ret, + ); - // try_cast(i as int64) > int64(0) - let expr = - try_cast(col("i"), DataType::Int64).gt(lit(ScalarValue::Int64(Some(0)))); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema.clone()).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // try_cast(i as int64) > int64(0) + try_cast(col("i"), DataType::Int64).gt(lit(ScalarValue::Int64(Some(0)))), + &schema, + &statistics, + expected_ret, + ); - // `-cast(i as int64) < 0` convert to `cast(i as int64) > -0` - let expr = Expr::Negative(Box::new(cast(col("i"), DataType::Int64))) - .lt(lit(ScalarValue::Int64(Some(0)))); - let expr = logical2physical(&expr, &schema); - let p = PruningPredicate::try_new(expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap(); - assert_eq!(result, expected_ret); + prune_with_expr( + // `-cast(i as int64) < 0` convert to `cast(i as int64) > -0` + Expr::Negative(Box::new(cast(col("i"), DataType::Int64))) + .lt(lit(ScalarValue::Int64(Some(0)))), + &schema, + &statistics, + expected_ret, + ); } #[test] @@ -2685,7 +2693,7 @@ mod tests { &schema, &statistics, // rule out containers where we know foo is not present - vec![true, false, true, true, false, true, true, false, true], + &[true, false, true, true, false, true, true, false, true], ); // s1 = 'bar' @@ -2694,7 +2702,7 @@ mod tests { &schema, &statistics, // rule out containers where we know bar is not present - vec![true, true, true, false, false, false, true, true, true], + &[true, true, true, false, false, false, true, true, true], ); // s1 = 'baz' (unknown value) @@ -2703,7 +2711,7 @@ mod tests { &schema, &statistics, // can't rule out anything - vec![true, true, true, true, true, true, true, true, true], + &[true, true, true, true, true, true, true, true, true], ); // s1 = 'foo' AND s1 = 'bar' @@ -2713,7 +2721,7 @@ mod tests { &statistics, // this can't possibly be true (the column can't take on both values) // but we can certainly rule it out if the stats tell us that both values are not present - vec![true, true, true, true, true, true, true, true, true], + &[true, true, true, true, true, true, true, true, true], ); // s1 = 'foo' OR s1 = 'bar' @@ -2722,7 +2730,7 @@ mod tests { &schema, &statistics, // returns the check with foo and bar constants - vec![true, true, true, true, true, true, false, false, false], + &[true, true, true, true, true, true, false, false, false], ); // s1 != foo @@ -2731,7 +2739,7 @@ mod tests { &schema, &statistics, // rule out when we know for sure s1 does the value - vec![false, true, true, false, true, true, false, true, true], + &[false, true, true, false, true, true, false, true, true], ); // s1 != bar @@ -2740,7 +2748,7 @@ mod tests { &schema, &statistics, // rule out when we know for sure s1 does have the value - vec![false, false, false, true, true, true, true, true, true], + &[false, false, false, true, true, true, true, true, true], ); // s1 != foo AND s1 != bar @@ -2751,7 +2759,7 @@ mod tests { &schema, &statistics, // can rule out any container where we know s1 does have either value - vec![true, true, true, false, false, false, true, true, true], + &[true, true, true, false, false, false, true, true, true], ); // s1 != foo OR s1 != bar @@ -2762,7 +2770,7 @@ mod tests { &schema, &statistics, // cant' rule out anything - vec![true, true, true, true, true, true, true, true, true], + &[true, true, true, true, true, true, true, true, true], ); } @@ -2819,7 +2827,7 @@ mod tests { &schema, &statistics, // rule out containers where we know s1 is not present - vec![true, false, true, true, false, true, true, false, true], + &[true, false, true, true, false, true, true, false, true], ); // s1 = 'foo' OR s2 = 'bar' @@ -2829,7 +2837,7 @@ mod tests { &schema, &statistics, // can't rule out any container (predicate is on both columns) - vec![true, true, true, true, true, true, true, true, true], + &[true, true, true, true, true, true, true, true, true], ); // s1 = 'foo' AND s2 != 'bar' @@ -2838,7 +2846,7 @@ mod tests { &schema, &statistics, // can only rule out container where we know s1 is NOT present or s2 IS preset - vec![false, false, false, true, false, true, true, false, true], + &[false, false, false, true, false, true, true, false, true], ); // s1 != 'foo' AND s2 != 'bar' @@ -2849,7 +2857,7 @@ mod tests { &schema, &statistics, // Can rule out any container where we know either values is present for sure - vec![false, false, false, false, true, true, false, true, true], + &[false, false, false, false, true, true, false, true, true], ); // s1 like '%foo%bar%' @@ -2858,7 +2866,7 @@ mod tests { &schema, &statistics, // cant rule out anything (unknown predicate) - vec![true, true, true, true, true, true, true, true, true], + &[true, true, true, true, true, true, true, true, true], ); // s1 like '%foo%bar%' AND s2 = 'bar' @@ -2869,7 +2877,7 @@ mod tests { &schema, &statistics, // can rule out all results when we know second column is false - vec![true, true, true, false, false, false, true, true, true], + &[true, true, true, false, false, false, true, true, true], ); } @@ -2939,7 +2947,7 @@ mod tests { &statistics, // Can only rule out container where we know values are not present // (range is false, and contains is false) - vec![true, false, true, false, false, false, true, false, true], + &[true, false, true, false, false, false, true, false, true], ); // i = 0 and s != 'foo' @@ -2948,7 +2956,7 @@ mod tests { &schema, &statistics, // Can only rule out container where we know range is false, or contains is true) - vec![false, false, false, true, false, true, true, false, true], + &[false, false, false, true, false, true, true, false, true], ); // i = 0 OR s = 'foo' @@ -2957,7 +2965,7 @@ mod tests { &schema, &statistics, // cant rule out anything (as connected by OR) - vec![true, true, true, true, true, true, true, true, true], + &[true, true, true, true, true, true, true, true, true], ); } @@ -2972,7 +2980,7 @@ mod tests { expr: Expr, schema: &SchemaRef, statistics: &TestStatistics, - expected: Vec, + expected: &[bool], ) { println!("Pruning with expr: {}", expr); let expr = logical2physical(&expr, schema);