-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
single_distinct_to_groupby no longer drops qualifiers #4050
Changes from 3 commits
1a9e8e6
e029290
084635e
99f126a
353d9ce
d13c21c
4420cf9
a6a7edd
8e45738
a232211
746c1f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -654,13 +654,13 @@ async fn group_by_dictionary() { | |
.expect("ran plan correctly"); | ||
|
||
let expected = vec![ | ||
"+-----+------------------------+", | ||
"| val | COUNT(DISTINCT t.dict) |", | ||
"+-----+------------------------+", | ||
"| 1 | 2 |", | ||
"| 2 | 2 |", | ||
"| 4 | 1 |", | ||
"+-----+------------------------+", | ||
"+-------+------------------------+", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand why this plan would produce There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I only fixed this for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually. I'm confused now as well ... perhaps this name should not have been qualified in the first place? I'll take another look There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand this now. The test above is referencing a column named
The updated test is referencing a column with an alias of |
||
"| t.val | COUNT(DISTINCT t.dict) |", | ||
"+-------+------------------------+", | ||
"| 1 | 2 |", | ||
"| 2 | 2 |", | ||
"| 4 | 1 |", | ||
"+-------+------------------------+", | ||
]; | ||
assert_batches_sorted_eq!(expected, &results); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -137,12 +137,14 @@ fn optimize(plan: &LogicalPlan) -> Result<LogicalPlan> { | |
// - aggr expr | ||
let mut alias_expr: Vec<Expr> = Vec::new(); | ||
for (alias, original_field) in group_expr_alias { | ||
alias_expr.push(col(&alias).alias(original_field.name())); | ||
alias_expr.push(col(&alias).alias(original_field.qualified_name())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the main change |
||
} | ||
for (i, expr) in new_aggr_exprs.iter().enumerate() { | ||
alias_expr.push(columnize_expr( | ||
expr.clone() | ||
.alias(schema.clone().fields()[i + group_expr.len()].name()), | ||
expr.clone().alias( | ||
schema.clone().fields()[i + group_expr.len()] | ||
.qualified_name(), | ||
), | ||
&outer_aggr_schema, | ||
)); | ||
} | ||
|
@@ -362,7 +364,7 @@ mod tests { | |
.build()?; | ||
|
||
// Should work | ||
let expected = "Projection: group_alias_0 AS a, COUNT(alias1) AS COUNT(DISTINCT test.b) [a:UInt32, COUNT(DISTINCT test.b):Int64;N]\ | ||
let expected = "Projection: group_alias_0 AS test.a, COUNT(alias1) AS COUNT(DISTINCT test.b) [a:UInt32, COUNT(DISTINCT test.b):Int64;N]\ | ||
\n Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1)]] [group_alias_0:UInt32, COUNT(alias1):Int64;N]\ | ||
\n Aggregate: groupBy=[[test.a AS group_alias_0, test.b AS alias1]], aggr=[[]] [group_alias_0:UInt32, alias1:UInt32]\ | ||
\n TableScan: test [a:UInt32, b:UInt32, c:UInt32]"; | ||
|
@@ -409,7 +411,7 @@ mod tests { | |
)? | ||
.build()?; | ||
// Should work | ||
let expected = "Projection: group_alias_0 AS a, COUNT(alias1) AS COUNT(DISTINCT test.b), MAX(alias1) AS MAX(DISTINCT test.b) [a:UInt32, COUNT(DISTINCT test.b):Int64;N, MAX(DISTINCT test.b):UInt32;N]\ | ||
let expected = "Projection: group_alias_0 AS test.a, COUNT(alias1) AS COUNT(DISTINCT test.b), MAX(alias1) AS MAX(DISTINCT test.b) [a:UInt32, COUNT(DISTINCT test.b):Int64;N, MAX(DISTINCT test.b):UInt32;N]\ | ||
\n Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1), MAX(alias1)]] [group_alias_0:UInt32, COUNT(alias1):Int64;N, MAX(alias1):UInt32;N]\ | ||
\n Aggregate: groupBy=[[test.a AS group_alias_0, test.b AS alias1]], aggr=[[]] [group_alias_0:UInt32, alias1:UInt32]\ | ||
\n TableScan: test [a:UInt32, b:UInt32, c:UInt32]"; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -252,6 +252,34 @@ mod roundtrip_tests { | |
Ok(()) | ||
} | ||
|
||
#[tokio::test] | ||
async fn roundtrip_single_count_distinct() -> Result<(), DataFusionError> { | ||
let ctx = SessionContext::new(); | ||
|
||
let schema = Schema::new(vec![ | ||
Field::new("a", DataType::Int64, true), | ||
Field::new("b", DataType::Decimal128(15, 2), true), | ||
]); | ||
|
||
ctx.register_csv( | ||
"t1", | ||
"testdata/test.csv", | ||
CsvReadOptions::default().schema(&schema), | ||
) | ||
.await?; | ||
|
||
let query = "SELECT a, COUNT(DISTINCT b) as b_cd FROM t1 GROUP BY a"; | ||
let plan = ctx.sql(query).await?.to_logical_plan()?; | ||
|
||
println!("{:?}", plan); | ||
|
||
let bytes = logical_plan_to_bytes(&plan)?; | ||
let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?; | ||
assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
|
||
Ok(()) | ||
} | ||
|
||
#[tokio::test] | ||
async fn roundtrip_logical_plan_with_extension() -> Result<(), DataFusionError> { | ||
let ctx = SessionContext::new(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that this logical plan was invalid before this PR because the outer projection was looking for
part.p_brand
and the inner projection only providedp_brand
.