From 388fd24dbd7a98382dadc712d29c11ab97f8f866 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Thu, 7 Nov 2024 13:41:52 +0100 Subject: [PATCH] Wildcard fix --- datafusion/expr/src/expr.rs | 20 +++++++++++-------- datafusion/expr/src/expr_fn.rs | 18 ++++++++--------- datafusion/expr/src/logical_plan/plan.rs | 8 ++++---- datafusion/expr/src/utils.rs | 12 +++++------ .../src/analyzer/expand_wildcard_rule.rs | 4 ++-- .../src/analyzer/inline_table_scan.rs | 6 +++--- .../proto/src/logical_plan/from_proto.rs | 6 +++--- datafusion/proto/src/logical_plan/to_proto.rs | 4 ++-- .../tests/cases/roundtrip_logical_plan.rs | 10 +++++----- datafusion/sql/src/expr/function.rs | 20 ++++++++++--------- datafusion/sql/src/expr/mod.rs | 10 +++++----- datafusion/sql/src/unparser/expr.rs | 20 +++++++++---------- 12 files changed, 72 insertions(+), 66 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index c495b5396f53..6460b9cdc652 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -311,10 +311,7 @@ pub enum Expr { /// /// This expr has to be resolved to a list of columns before translating logical /// plan into physical plan. - Wildcard { - qualifier: Option, - options: WildcardOptions, - }, + Wildcard(Wildcard), /// List of grouping set expressions. Only valid in the context of an aggregate /// GROUP BY expression list GroupingSet(GroupingSet), @@ -367,6 +364,13 @@ impl<'a> TreeNodeContainer<'a, Self> for Expr { } } +/// Wildcard expression. +#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +pub struct Wildcard { + pub qualifier: Option, + pub options: WildcardOptions, +} + /// UNNEST expression. #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] pub struct Unnest { @@ -1797,9 +1801,9 @@ impl HashNode for Expr { Expr::ScalarSubquery(subquery) => { subquery.hash(state); } - Expr::Wildcard { qualifier, options } => { - qualifier.hash(state); - options.hash(state); + Expr::Wildcard(wildcard) => { + wildcard.hash(state); + wildcard.hash(state); } Expr::GroupingSet(grouping_set) => { mem::discriminant(grouping_set).hash(state); @@ -2321,7 +2325,7 @@ impl Display for Expr { write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list)) } } - Expr::Wildcard { qualifier, options } => match qualifier { + Expr::Wildcard(Wildcard { qualifier, options }) => match qualifier { Some(qualifier) => write!(f, "{qualifier}.*{options}"), None => write!(f, "*{options}"), }, diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 681eb3c0afd5..e1ba0778cf9e 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -19,7 +19,7 @@ use crate::expr::{ AggregateFunction, BinaryExpr, Cast, Exists, GroupingSet, InList, InSubquery, - Placeholder, TryCast, Unnest, WildcardOptions, WindowFunction, + Placeholder, TryCast, Unnest, Wildcard, WildcardOptions, WindowFunction, }; use crate::function::{ AccumulatorArgs, AccumulatorFactoryFunction, PartitionEvaluatorFactory, @@ -121,18 +121,18 @@ pub fn placeholder(id: impl Into) -> Expr { /// assert_eq!(p.to_string(), "*") /// ``` pub fn wildcard() -> Expr { - Expr::Wildcard { + Expr::Wildcard(Wildcard { qualifier: None, options: WildcardOptions::default(), - } + }) } /// Create an '*' [`Expr::Wildcard`] expression with the wildcard options pub fn wildcard_with_options(options: WildcardOptions) -> Expr { - Expr::Wildcard { + Expr::Wildcard(Wildcard { qualifier: None, options, - } + }) } /// Create an 't.*' [`Expr::Wildcard`] expression that matches all columns from a specific table @@ -146,10 +146,10 @@ pub fn wildcard_with_options(options: WildcardOptions) -> Expr { /// assert_eq!(p.to_string(), "t.*") /// ``` pub fn qualified_wildcard(qualifier: impl Into) -> Expr { - Expr::Wildcard { + Expr::Wildcard(Wildcard { qualifier: Some(qualifier.into()), options: WildcardOptions::default(), - } + }) } /// Create an 't.*' [`Expr::Wildcard`] expression with the wildcard options @@ -157,10 +157,10 @@ pub fn qualified_wildcard_with_options( qualifier: impl Into, options: WildcardOptions, ) -> Expr { - Expr::Wildcard { + Expr::Wildcard(Wildcard { qualifier: Some(qualifier.into()), options, - } + }) } /// Return a new expression `left right` diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index fe725e7d96de..85143db0242e 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -26,7 +26,7 @@ use std::sync::{Arc, OnceLock}; use super::dml::CopyTo; use super::DdlStatement; use crate::builder::{change_redundant_column, unnest_with_options}; -use crate::expr::{Placeholder, Sort as SortExpr, WindowFunction}; +use crate::expr::{Placeholder, Sort as SortExpr, Wildcard, WindowFunction}; use crate::expr_rewriter::{ create_col_from_scalar_expr, normalize_cols, normalize_sorts, NamePreserver, }; @@ -3187,12 +3187,12 @@ fn calc_func_dependencies_for_project( let proj_indices = exprs .iter() .map(|expr| match expr { - Expr::Wildcard { qualifier, options } => { + Expr::Wildcard(Wildcard { qualifier, options }) => { let wildcard_fields = exprlist_to_fields( - vec![&Expr::Wildcard { + vec![&Expr::Wildcard(Wildcard { qualifier: qualifier.clone(), options: options.clone(), - }], + })], input, )?; Ok::<_, DataFusionError>( diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 6f7c5d379260..a1ab142fa835 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -22,7 +22,7 @@ use std::collections::HashSet; use std::ops::Deref; use std::sync::Arc; -use crate::expr::{Alias, Sort, WildcardOptions, WindowFunction}; +use crate::expr::{Alias, Sort, Wildcard, WildcardOptions, WindowFunction}; use crate::expr_rewriter::strip_outer_reference; use crate::{ and, BinaryExpr, Expr, ExprSchemable, Filter, GroupingSet, LogicalPlan, Operator, @@ -703,7 +703,7 @@ pub fn exprlist_to_fields<'a>( let result = exprs .into_iter() .map(|e| match e { - Expr::Wildcard { qualifier, options } => match qualifier { + Expr::Wildcard(Wildcard { qualifier, options }) => match qualifier { None => { let excluded: Vec = get_excluded_columns( options.exclude.as_ref(), @@ -802,10 +802,10 @@ pub fn exprlist_len( exprs .iter() .map(|e| match e { - Expr::Wildcard { + Expr::Wildcard(Wildcard { qualifier: None, options, - } => { + }) => { let excluded = get_excluded_columns( options.exclude.as_ref(), options.except.as_ref(), @@ -819,10 +819,10 @@ pub fn exprlist_len( .len(), ) } - Expr::Wildcard { + Expr::Wildcard(Wildcard { qualifier: Some(qualifier), options, - } => { + }) => { let related_wildcard_schema = wildcard_schema.as_ref().map_or_else( || Ok(Arc::clone(schema)), |schema| { diff --git a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs index 9fbe54e1ccb9..ff9f3df39fd2 100644 --- a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs +++ b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs @@ -22,7 +22,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult}; use datafusion_common::{Column, Result}; use datafusion_expr::builder::validate_unique_names; -use datafusion_expr::expr::PlannedReplaceSelectItem; +use datafusion_expr::expr::{PlannedReplaceSelectItem, Wildcard}; use datafusion_expr::utils::{ expand_qualified_wildcard, expand_wildcard, find_base_plan, }; @@ -89,7 +89,7 @@ fn expand_exprlist(input: &LogicalPlan, expr: Vec) -> Result> { let input = find_base_plan(input); for e in expr { match e { - Expr::Wildcard { qualifier, options } => { + Expr::Wildcard(Wildcard { qualifier, options }) => { if let Some(qualifier) = qualifier { let expanded = expand_qualified_wildcard( &qualifier, diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs index 342d85a915b4..68edda671a7a 100644 --- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs +++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs @@ -23,7 +23,7 @@ use crate::analyzer::AnalyzerRule; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{Column, Result}; -use datafusion_expr::expr::WildcardOptions; +use datafusion_expr::expr::{Wildcard, WildcardOptions}; use datafusion_expr::{logical_plan::LogicalPlan, Expr, LogicalPlanBuilder}; /// Analyzed rule that inlines TableScan that provide a [`LogicalPlan`] @@ -93,10 +93,10 @@ fn generate_projection_expr( ))); } } else { - exprs.push(Expr::Wildcard { + exprs.push(Expr::Wildcard(Wildcard { qualifier: None, options: WildcardOptions::default(), - }); + })); } Ok(exprs) } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 301efc42a7c4..64d8e24ce518 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -22,7 +22,7 @@ use datafusion_common::{ exec_datafusion_err, internal_err, plan_datafusion_err, RecursionUnnestOption, Result, ScalarValue, TableReference, UnnestOptions, }; -use datafusion_expr::expr::{Alias, Placeholder, Sort}; +use datafusion_expr::expr::{Alias, Placeholder, Sort, Wildcard}; use datafusion_expr::expr::{Unnest, WildcardOptions}; use datafusion_expr::ExprFunctionExt; use datafusion_expr::{ @@ -511,10 +511,10 @@ pub fn parse_expr( ))), ExprType::Wildcard(protobuf::Wildcard { qualifier }) => { let qualifier = qualifier.to_owned().map(|x| x.try_into()).transpose()?; - Ok(Expr::Wildcard { + Ok(Expr::Wildcard(Wildcard { qualifier, options: WildcardOptions::default(), - }) + })) } ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode { fun_name, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index caceb3db164c..b3c91207ecf3 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -22,7 +22,7 @@ use datafusion_common::{TableReference, UnnestOptions}; use datafusion_expr::expr::{ self, Alias, Between, BinaryExpr, Cast, GroupingSet, InList, Like, Placeholder, - ScalarFunction, Unnest, + ScalarFunction, Unnest, Wildcard, }; use datafusion_expr::{ logical_plan::PlanType, logical_plan::StringifiedPlan, Expr, JoinConstraint, @@ -552,7 +552,7 @@ pub fn serialize_expr( expr_type: Some(ExprType::InList(expr)), } } - Expr::Wildcard { qualifier, .. } => protobuf::LogicalExprNode { + Expr::Wildcard(Wildcard { qualifier, .. }) => protobuf::LogicalExprNode { expr_type: Some(ExprType::Wildcard(protobuf::Wildcard { qualifier: qualifier.to_owned().map(|x| x.into()), })), diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index cfb6862a0ca3..deece2e54a5a 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -65,7 +65,7 @@ use datafusion_common::{ use datafusion_expr::dml::CopyTo; use datafusion_expr::expr::{ self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, ScalarFunction, - Unnest, WildcardOptions, + Unnest, Wildcard, WildcardOptions, }; use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore}; use datafusion_expr::{ @@ -2059,10 +2059,10 @@ fn roundtrip_unnest() { #[test] fn roundtrip_wildcard() { - let test_expr = Expr::Wildcard { + let test_expr = Expr::Wildcard(Wildcard { qualifier: None, options: WildcardOptions::default(), - }; + }); let ctx = SessionContext::new(); roundtrip_expr_test(test_expr, ctx); @@ -2070,10 +2070,10 @@ fn roundtrip_wildcard() { #[test] fn roundtrip_qualified_wildcard() { - let test_expr = Expr::Wildcard { + let test_expr = Expr::Wildcard(Wildcard { qualifier: Some("foo".into()), options: WildcardOptions::default(), - }; + }); let ctx = SessionContext::new(); roundtrip_expr_test(test_expr, ctx); diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 67fa23b86990..3e9606527680 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -22,7 +22,7 @@ use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema, Dependency, Result, }; -use datafusion_expr::expr::WildcardOptions; +use datafusion_expr::expr::{Wildcard, WildcardOptions}; use datafusion_expr::expr::{ScalarFunction, Unnest}; use datafusion_expr::planner::PlannerResult; use datafusion_expr::{ @@ -413,17 +413,19 @@ impl SqlToRel<'_, S> { name: _, arg: FunctionArgExpr::Wildcard, operator: _, - } => Ok(Expr::Wildcard { + } => Ok(Expr::Wildcard(Wildcard { qualifier: None, options: WildcardOptions::default(), - }), + })), FunctionArg::Unnamed(FunctionArgExpr::Expr(arg)) => { self.sql_expr_to_logical_expr(arg, schema, planner_context) } - FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => Ok(Expr::Wildcard { - qualifier: None, - options: WildcardOptions::default(), - }), + FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => { + Ok(Expr::Wildcard(Wildcard { + qualifier: None, + options: WildcardOptions::default(), + })) + } FunctionArg::Unnamed(FunctionArgExpr::QualifiedWildcard(object_name)) => { let qualifier = self.object_name_to_table_reference(object_name)?; // Sanity check on qualifier with schema @@ -431,10 +433,10 @@ impl SqlToRel<'_, S> { if qualified_indices.is_empty() { return plan_err!("Invalid qualifier {qualifier}"); } - Ok(Expr::Wildcard { + Ok(Expr::Wildcard(Wildcard { qualifier: Some(qualifier), options: WildcardOptions::default(), - }) + })) } _ => not_impl_err!("Unsupported qualified wildcard argument: {sql:?}"), } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 57ac96951f1f..4b4efb684a2b 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -30,8 +30,8 @@ use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, }; -use datafusion_expr::expr::ScalarFunction; use datafusion_expr::expr::{InList, WildcardOptions}; +use datafusion_expr::expr::{ScalarFunction, Wildcard}; use datafusion_expr::{ lit, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal, Operator, TryCast, @@ -565,14 +565,14 @@ impl SqlToRel<'_, S> { } not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}") } - SQLExpr::Wildcard => Ok(Expr::Wildcard { + SQLExpr::Wildcard => Ok(Expr::Wildcard(Wildcard { qualifier: None, options: WildcardOptions::default(), - }), - SQLExpr::QualifiedWildcard(object_name) => Ok(Expr::Wildcard { + })), + SQLExpr::QualifiedWildcard(object_name) => Ok(Expr::Wildcard(Wildcard { qualifier: Some(self.object_name_to_table_reference(object_name)?), options: WildcardOptions::default(), - }), + })), SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values), _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 48dfc425ee63..a7331f3abc91 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use datafusion_expr::expr::Unnest; +use datafusion_expr::expr::{Unnest, Wildcard}; use sqlparser::ast::Value::SingleQuotedString; use sqlparser::ast::{ self, Array, BinaryOperator, Expr as AstExpr, Function, Ident, Interval, ObjectName, @@ -403,7 +403,7 @@ impl Unparser<'_> { }) } // TODO: unparsing wildcard addition options - Expr::Wildcard { qualifier, .. } => { + Expr::Wildcard(Wildcard { qualifier, .. }) => { if let Some(qualifier) = qualifier { let idents: Vec = qualifier.to_vec().into_iter().map(Ident::new).collect(); @@ -689,10 +689,10 @@ impl Unparser<'_> { .map(|e| { if matches!( e, - Expr::Wildcard { + Expr::Wildcard(Wildcard { qualifier: None, .. - } + }) ) { Ok(ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Wildcard)) } else { @@ -1670,10 +1670,10 @@ mod tests { fn expr_to_sql_ok() -> Result<()> { let dummy_schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); let dummy_logical_plan = table_scan(Some("t"), &dummy_schema, None)? - .project(vec![Expr::Wildcard { + .project(vec![Expr::Wildcard(Wildcard { qualifier: None, options: WildcardOptions::default(), - }])? + })])? .filter(col("a").eq(lit(1)))? .build()?; @@ -1864,10 +1864,10 @@ mod tests { (sum(col("a")), r#"sum(a)"#), ( count_udaf() - .call(vec![Expr::Wildcard { + .call(vec![Expr::Wildcard(Wildcard { qualifier: None, options: WildcardOptions::default(), - }]) + })]) .distinct() .build() .unwrap(), @@ -1875,10 +1875,10 @@ mod tests { ), ( count_udaf() - .call(vec![Expr::Wildcard { + .call(vec![Expr::Wildcard(Wildcard { qualifier: None, options: WildcardOptions::default(), - }]) + })]) .filter(lit(true)) .build() .unwrap(),