Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for SIMILAR TO for physical plan #12350

Merged
merged 4 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,22 @@ pub fn binary(
Ok(Arc::new(BinaryExpr::new(lhs, op, rhs)))
}

/// Create a similar to expression
pub fn similar_to(
negated: bool,
case_insensitive: bool,
expr: Arc<dyn PhysicalExpr>,
pattern: Arc<dyn PhysicalExpr>,
) -> Result<Arc<dyn PhysicalExpr>> {
let binary_op = match (negated, case_insensitive) {
(false, false) => Operator::RegexMatch,
(false, true) => Operator::RegexIMatch,
(true, false) => Operator::RegexNotMatch,
(true, true) => Operator::RegexNotIMatch,
};
Ok(Arc::new(BinaryExpr::new(expr, binary_op, pattern)))
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -4226,4 +4242,62 @@ mod tests {
.contains("Overflow happened on: 2147483647 * 2"));
Ok(())
}

/// Test helper for SIMILAR TO binary operation
fn apply_similar_to(
schema: &SchemaRef,
va: Vec<&str>,
vb: Vec<&str>,
negated: bool,
case_insensitive: bool,
expected: &BooleanArray,
) -> Result<()> {
let a = StringArray::from(va);
let b = StringArray::from(vb);
let op = similar_to(
negated,
case_insensitive,
col("a", schema)?,
col("b", schema)?,
)?;
let batch =
RecordBatch::try_new(Arc::clone(schema), vec![Arc::new(a), Arc::new(b)])?;
let result = op
.evaluate(&batch)?
.into_array(batch.num_rows())
.expect("Failed to convert to array");
assert_eq!(result.as_ref(), expected);

Ok(())
}

#[test]
fn test_similar_to() {
let schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Utf8, false),
Field::new("b", DataType::Utf8, false),
]));

let expected = [Some(true), Some(false)].iter().collect();
// case-sensitive
apply_similar_to(
&schema,
vec!["hello world", "Hello World"],
vec!["hello.*", "hello.*"],
false,
false,
&expected,
)
.unwrap();
// case-insensitive
apply_similar_to(
&schema,
vec!["hello world", "bye"],
vec!["hello.*", "hello.*"],
false,
true,
&expected,
)
.unwrap();
}
}
2 changes: 1 addition & 1 deletion datafusion/physical-expr/src/expressions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub use crate::window::ntile::Ntile;
pub use crate::window::rank::{dense_rank, percent_rank, rank, Rank, RankType};
pub use crate::PhysicalSortExpr;

pub use binary::{binary, BinaryExpr};
pub use binary::{binary, similar_to, BinaryExpr};
pub use case::{case, CaseExpr};
pub use cast::{cast, CastExpr};
pub use column::{col, with_new_schema, Column};
Expand Down
18 changes: 17 additions & 1 deletion datafusion/physical-expr/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::sync::Arc;

use crate::scalar_function;
use crate::{
expressions::{self, binary, like, Column, Literal},
expressions::{self, binary, like, similar_to, Column, Literal},
PhysicalExpr,
};

Expand Down Expand Up @@ -215,6 +215,22 @@ pub fn create_physical_expr(
input_schema,
)
}
Expr::SimilarTo(Like {
negated,
expr,
pattern,
escape_char,
case_insensitive,
}) => {
if escape_char.is_some() {
return exec_err!("SIMILAR TO does not support escape_char yet");
}
let physical_expr =
create_physical_expr(expr, input_dfschema, execution_props)?;
let physical_pattern =
create_physical_expr(pattern, input_dfschema, execution_props)?;
similar_to(*negated, *case_insensitive, physical_expr, physical_pattern)
}
Expr::Case(case) => {
let expr: Option<Arc<dyn PhysicalExpr>> = if let Some(e) = &case.expr {
Some(create_physical_expr(
Expand Down
45 changes: 45 additions & 0 deletions datafusion/sqllogictest/test_files/strings.slt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,51 @@ P1m1e1
p1m1e1
p2m1e1

# REGEX
query T rowsort
SELECT s FROM test WHERE s ~ 'p[12].*';
----
p1
p1e1
p1m1e1
p2
p2e1
p2m1e1

# REGEX nocase
query T rowsort
SELECT s FROM test WHERE s ~* 'p[12].*';
----
P1
P1e1
P1m1e1
p1
p1e1
p1m1e1
p2
p2e1
p2m1e1

# SIMILAR TO
query T rowsort
SELECT s FROM test WHERE s SIMILAR TO 'p[12].*';
----
p1
p1e1
p1m1e1
p2
p2e1
p2m1e1

# NOT SIMILAR TO
query T rowsort
SELECT s FROM test WHERE s NOT SIMILAR TO 'p[12].*';
----
P1
P1e1
P1m1e1
e1

# NOT LIKE
query T rowsort
SELECT s FROM test WHERE s NOT LIKE 'p1%';
Expand Down