-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: preserve qualifiers when rewriting expressions #12341
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -26,9 +26,7 @@ use crate::logical_plan::Projection; | |||
use crate::{Expr, ExprSchemable, LogicalPlan, LogicalPlanBuilder}; | ||||
|
||||
use datafusion_common::config::ConfigOptions; | ||||
use datafusion_common::tree_node::{ | ||||
Transformed, TransformedResult, TreeNode, TreeNodeRewriter, | ||||
}; | ||||
use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; | ||||
use datafusion_common::TableReference; | ||||
use datafusion_common::{Column, DFSchema, Result}; | ||||
|
||||
|
@@ -279,22 +277,10 @@ pub fn unalias(expr: Expr) -> Expr { | |||
} | ||||
} | ||||
|
||||
/// Rewrites `expr` using `rewriter`, ensuring that the output has the | ||||
/// same name as `expr` prior to rewrite, adding an alias if necessary. | ||||
/// | ||||
/// This is important when optimizing plans to ensure the output | ||||
/// schema of plan nodes don't change after optimization | ||||
pub fn rewrite_preserving_name<R>(expr: Expr, rewriter: &mut R) -> Result<Expr> | ||||
where | ||||
R: TreeNodeRewriter<Node = Expr>, | ||||
{ | ||||
let original_name = expr.name_for_alias()?; | ||||
let expr = expr.rewrite(rewriter)?.data; | ||||
expr.alias_if_changed(original_name) | ||||
} | ||||
|
||||
/// Handles ensuring the name of rewritten expressions is not changed. | ||||
/// | ||||
/// This is important when optimizing plans to ensure the output | ||||
/// schema of plan nodes don't change after optimization. | ||||
/// For example, if an expression `1 + 2` is rewritten to `3`, the name of the | ||||
/// expression should be preserved: `3 as "1 + 2"` | ||||
/// | ||||
|
@@ -303,9 +289,17 @@ pub struct NamePreserver { | |||
use_alias: bool, | ||||
} | ||||
|
||||
/// If the name of an expression is remembered, it will be preserved when | ||||
/// rewriting the expression | ||||
pub struct SavedName(Option<String>); | ||||
/// If the qualified name of an expression is remembered, it will be preserved | ||||
/// when rewriting the expression | ||||
pub enum SavedName { | ||||
/// Saved qualified name to be preserved | ||||
Saved { | ||||
relation: Option<TableReference>, | ||||
name: String, | ||||
}, | ||||
/// Name is not preserved | ||||
None, | ||||
} | ||||
|
||||
impl NamePreserver { | ||||
/// Create a new NamePreserver for rewriting the `expr` that is part of the specified plan | ||||
|
@@ -326,23 +320,30 @@ impl NamePreserver { | |||
|
||||
pub fn save(&self, expr: &Expr) -> Result<SavedName> { | ||||
let original_name = if self.use_alias { | ||||
Some(expr.name_for_alias()?) | ||||
let (relation, name) = expr.qualified_name(); | ||||
SavedName::Saved { relation, name } | ||||
} else { | ||||
None | ||||
SavedName::None | ||||
}; | ||||
|
||||
Ok(SavedName(original_name)) | ||||
Ok(original_name) | ||||
} | ||||
} | ||||
|
||||
impl SavedName { | ||||
/// Ensures the name of the rewritten expression is preserved | ||||
/// Ensures the qualified name of the rewritten expression is preserved | ||||
pub fn restore(self, expr: Expr) -> Result<Expr> { | ||||
let Self(original_name) = self; | ||||
match original_name { | ||||
Some(name) => expr.alias_if_changed(name), | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I seems like we could potentially also remove The only other use of it seems to be in
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||
None => Ok(expr), | ||||
} | ||||
let expr = match self { | ||||
SavedName::Saved { relation, name } => { | ||||
let (new_relation, new_name) = expr.qualified_name(); | ||||
if new_relation != relation || new_name != name { | ||||
expr.alias_qualified(relation, name) | ||||
} else { | ||||
expr | ||||
} | ||||
} | ||||
SavedName::None => expr, | ||||
}; | ||||
Ok(expr) | ||||
} | ||||
} | ||||
|
||||
|
@@ -353,6 +354,7 @@ mod test { | |||
use super::*; | ||||
use crate::{col, lit, Cast}; | ||||
use arrow::datatypes::{DataType, Field, Schema}; | ||||
use datafusion_common::tree_node::TreeNodeRewriter; | ||||
use datafusion_common::ScalarValue; | ||||
|
||||
#[derive(Default)] | ||||
|
@@ -511,10 +513,20 @@ mod test { | |||
|
||||
// change literal type from i32 to i64 | ||||
test_rewrite(col("a").add(lit(1i32)), col("a").add(lit(1i64))); | ||||
|
||||
// test preserve qualifier | ||||
test_rewrite( | ||||
Expr::Column(Column::new(Some("test"), "a")), | ||||
Expr::Column(Column::new_unqualified("test.a")), | ||||
); | ||||
test_rewrite( | ||||
Expr::Column(Column::new_unqualified("test.a")), | ||||
Expr::Column(Column::new(Some("test"), "a")), | ||||
); | ||||
} | ||||
|
||||
/// rewrites `expr_from` to `rewrite_to` using | ||||
/// `rewrite_preserving_name` verifying the result is `expected_expr` | ||||
/// rewrites `expr_from` to `rewrite_to` while preserving the original qualified name | ||||
/// by using the `NamePreserver` | ||||
fn test_rewrite(expr_from: Expr, rewrite_to: Expr) { | ||||
struct TestRewriter { | ||||
rewrite_to: Expr, | ||||
|
@@ -531,11 +543,12 @@ mod test { | |||
let mut rewriter = TestRewriter { | ||||
rewrite_to: rewrite_to.clone(), | ||||
}; | ||||
let expr = rewrite_preserving_name(expr_from.clone(), &mut rewriter).unwrap(); | ||||
|
||||
let original_name = expr_from.schema_name().to_string(); | ||||
let new_name = expr.schema_name().to_string(); | ||||
let saved_name = NamePreserver { use_alias: true }.save(&expr_from).unwrap(); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tests use |
||||
let new_expr = expr_from.clone().rewrite(&mut rewriter).unwrap().data; | ||||
let new_expr = saved_name.restore(new_expr).unwrap(); | ||||
|
||||
let original_name = expr_from.qualified_name(); | ||||
let new_name = new_expr.qualified_name(); | ||||
assert_eq!( | ||||
original_name, new_name, | ||||
"mismatch rewriting expr_from: {expr_from} to {rewrite_to}" | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The return value no longer needs
Result
, but removing it breaks many optimization rules. Maybe we can handle it in the next PR.