Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change name of MAX/MIN udaf to lowercase max/min #11795

Merged
merged 4 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datafusion/core/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2060,7 +2060,7 @@ mod tests {

assert_batches_sorted_eq!(
["+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
"| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | avg(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | count(aggregate_test_100.c12) | count(DISTINCT aggregate_test_100.c12) |",
"| c1 | min(aggregate_test_100.c12) | max(aggregate_test_100.c12) | avg(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | count(aggregate_test_100.c12) | count(DISTINCT aggregate_test_100.c12) |",
"+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
"| a | 0.02182578039211991 | 0.9800193410444061 | 0.48754517466109415 | 10.238448667882977 | 21 | 21 |",
"| b | 0.04893135681998029 | 0.9185813970744787 | 0.41040709263815384 | 7.797734760124923 | 19 | 19 |",
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ where
/// assert_batches_eq!(
/// &[
/// "+---+----------------+",
/// "| a | MIN(?table?.b) |",
/// "| a | min(?table?.b) |",
/// "+---+----------------+",
/// "| 1 | 2 |",
/// "+---+----------------+",
Expand All @@ -182,14 +182,14 @@ where
/// let mut ctx = SessionContext::new();
/// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
/// let results = ctx
/// .sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")
/// .sql("SELECT a, min(b) FROM example GROUP BY a LIMIT 100")
/// .await?
/// .collect()
/// .await?;
/// assert_batches_eq!(
/// &[
/// "+---+----------------+",
/// "| a | MIN(example.b) |",
/// "| a | min(example.b) |",
/// "+---+----------------+",
/// "| 1 | 2 |",
/// "+---+----------------+",
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
//!
//! let expected = vec![
//! "+---+----------------+",
//! "| a | MIN(?table?.b) |",
//! "| a | min(?table?.b) |",
//! "+---+----------------+",
//! "| 1 | 2 |",
//! "+---+----------------+"
Expand Down Expand Up @@ -114,7 +114,7 @@
//!
//! let expected = vec![
//! "+---+----------------+",
//! "| a | MIN(example.b) |",
//! "| a | min(example.b) |",
//! "+---+----------------+",
//! "| 1 | 2 |",
//! "+---+----------------+"
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/custom_sources_cases/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,8 @@ async fn optimizers_catch_all_statistics() {
let expected = RecordBatch::try_new(
Arc::new(Schema::new(vec![
Field::new("count(*)", DataType::Int64, false),
Field::new("MIN(test.c1)", DataType::Int32, false),
Field::new("MAX(test.c1)", DataType::Int32, false),
Field::new("min(test.c1)", DataType::Int32, false),
Field::new("max(test.c1)", DataType::Int32, false),
])),
vec![
Arc::new(Int64Array::from(vec![4])),
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/tests/expr_api/parse_sql_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ async fn round_trip_parse_sql_expr() -> Result<()> {
"((a = 10) AND b NOT IN (20, 30))",
"sum(a)",
"(sum(a) + 1)",
"(MIN(a) + MAX(b))",
"(MIN(a) + (MAX(b) * sum(c)))",
"(MIN(a) + ((MAX(b) * sum(c)) / 10))",
"(min(a) + max(b))",
"(min(a) + (max(b) * sum(c)))",
"(min(a) + ((max(b) * sum(c)) / 10))",
];

for test in tests {
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/tests/sql/explain_analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -615,11 +615,11 @@ async fn test_physical_plan_display_indent() {
"GlobalLimitExec: skip=0, fetch=10",
" SortPreservingMergeExec: [the_min@2 DESC], fetch=10",
" SortExec: TopK(fetch=10), expr=[the_min@2 DESC], preserve_partitioning=[true]",
" ProjectionExec: expr=[c1@0 as c1, MAX(aggregate_test_100.c12)@1 as MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)@2 as the_min]",
" AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)]",
" ProjectionExec: expr=[c1@0 as c1, max(aggregate_test_100.c12)@1 as max(aggregate_test_100.c12), min(aggregate_test_100.c12)@2 as the_min]",
" AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]",
" CoalesceBatchesExec: target_batch_size=4096",
" RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000",
" AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)]",
" AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]",
" CoalesceBatchesExec: target_batch_size=4096",
" FilterExec: c12@1 < 10",
" RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1",
Expand Down
12 changes: 6 additions & 6 deletions datafusion/expr/src/expr_rewriter/order_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
use datafusion_common::{Column, Result};

/// Rewrite sort on aggregate expressions to sort on the column of aggregate output
/// For example, `max(x)` is written to `col("MAX(x)")`
/// For example, `max(x)` is written to `col("max(x)")`
pub fn rewrite_sort_cols_by_aggs(
exprs: impl IntoIterator<Item = impl Into<Expr>>,
plan: &LogicalPlan,
Expand Down Expand Up @@ -108,7 +108,7 @@ fn rewrite_in_terms_of_projection(
};

// expr is an actual expr like min(t.c2), but we are looking
// for a column with the same "MIN(C2)", so translate there
// for a column with the same "min(C2)", so translate there
let name = normalized_expr.display_name()?;

let search_col = Expr::Column(Column {
Expand Down Expand Up @@ -237,15 +237,15 @@ mod test {
expected: sort(col("c1")),
},
TestCase {
desc: r#"min(c2) --> "MIN(c2)" -- (column *named* "min(t.c2)"!)"#,
desc: r#"min(c2) --> "min(c2)" -- (column *named* "min(t.c2)"!)"#,
input: sort(min(col("c2"))),
expected: sort(col("MIN(t.c2)")),
expected: sort(col("min(t.c2)")),
},
TestCase {
desc: r#"c1 + min(c2) --> "c1 + MIN(c2)" -- (column *named* "min(t.c2)"!)"#,
desc: r#"c1 + min(c2) --> "c1 + min(c2)" -- (column *named* "min(t.c2)"!)"#,
input: sort(col("c1") + min(col("c2"))),
// should be "c1" not t.c1
expected: sort(col("c1") + col("MIN(t.c2)")),
expected: sort(col("c1") + col("min(t.c2)")),
},
TestCase {
desc: r#"avg(c3) --> "avg(t.c3)" as average (column *named* "avg(t.c3)", aliased)"#,
Expand Down
12 changes: 4 additions & 8 deletions datafusion/expr/src/test/function_stub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,6 @@ pub fn min(expr: Expr) -> Expr {
/// Testing stub implementation of Min aggregate
pub struct Min {
signature: Signature,
aliases: Vec<String>,
}

impl std::fmt::Debug for Min {
Expand All @@ -326,7 +325,6 @@ impl Default for Min {
impl Min {
pub fn new() -> Self {
Self {
aliases: vec!["min".to_string()],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need uppercase alias.

To use uppercase alias the query is something like select "MAX"(a) from t;, which is uncommon.

signature: Signature::variadic_any(Volatility::Immutable),
}
}
Expand All @@ -338,7 +336,7 @@ impl AggregateUDFImpl for Min {
}

fn name(&self) -> &str {
"MIN"
"min"
}

fn signature(&self) -> &Signature {
Expand All @@ -358,7 +356,7 @@ impl AggregateUDFImpl for Min {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn create_groups_accumulator(
Expand Down Expand Up @@ -392,7 +390,6 @@ pub fn max(expr: Expr) -> Expr {
/// Testing stub implementation of MAX aggregate
pub struct Max {
signature: Signature,
aliases: Vec<String>,
}

impl std::fmt::Debug for Max {
Expand All @@ -413,7 +410,6 @@ impl Default for Max {
impl Max {
pub fn new() -> Self {
Self {
aliases: vec!["max".to_string()],
signature: Signature::variadic_any(Volatility::Immutable),
}
}
Expand All @@ -425,7 +421,7 @@ impl AggregateUDFImpl for Max {
}

fn name(&self) -> &str {
"MAX"
"max"
}

fn signature(&self) -> &Signature {
Expand All @@ -445,7 +441,7 @@ impl AggregateUDFImpl for Max {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn create_groups_accumulator(
Expand Down
12 changes: 4 additions & 8 deletions datafusion/functions-aggregate/src/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,12 @@ fn get_min_max_result_type(input_types: &[DataType]) -> Result<Vec<DataType>> {
// MAX aggregate UDF
#[derive(Debug)]
pub struct Max {
aliases: Vec<String>,
signature: Signature,
}

impl Max {
pub fn new() -> Self {
Self {
aliases: vec!["max".to_owned()],
signature: Signature::user_defined(Volatility::Immutable),
}
}
Expand Down Expand Up @@ -146,7 +144,7 @@ impl AggregateUDFImpl for Max {
}

fn name(&self) -> &str {
"MAX"
"max"
}

fn signature(&self) -> &Signature {
Expand All @@ -162,7 +160,7 @@ impl AggregateUDFImpl for Max {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
Expand Down Expand Up @@ -891,14 +889,12 @@ impl Accumulator for SlidingMaxAccumulator {
#[derive(Debug)]
pub struct Min {
signature: Signature,
aliases: Vec<String>,
}

impl Min {
pub fn new() -> Self {
Self {
signature: Signature::user_defined(Volatility::Immutable),
aliases: vec!["min".to_owned()],
}
}
}
Expand All @@ -915,7 +911,7 @@ impl AggregateUDFImpl for Min {
}

fn name(&self) -> &str {
"MIN"
"min"
}

fn signature(&self) -> &Signature {
Expand All @@ -931,7 +927,7 @@ impl AggregateUDFImpl for Min {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ mod tests {
.build()?;

let expected = "Projection: count(Int64(1)) AS count(*) [count(*):Int64]\
\n Aggregate: groupBy=[[]], aggr=[[MAX(count(Int64(1))) AS MAX(count(*))]] [MAX(count(*)):Int64;N]\
\n Aggregate: groupBy=[[]], aggr=[[max(count(Int64(1))) AS max(count(*))]] [max(count(*)):Int64;N]\
\n TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
assert_plan_eq(plan, expected)
}
Expand Down
32 changes: 16 additions & 16 deletions datafusion/optimizer/src/optimize_projections/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,7 @@ mod tests {
.aggregate(Vec::<Expr>::new(), vec![max(col("b"))])?
.build()?;

let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(test.b)]]\
let expected = "Aggregate: groupBy=[[]], aggr=[[max(test.b)]]\
\n TableScan: test projection=[b]";

assert_optimized_plan_equal(plan, expected)
Expand All @@ -1375,7 +1375,7 @@ mod tests {
.aggregate(vec![col("c")], vec![max(col("b"))])?
.build()?;

let expected = "Aggregate: groupBy=[[test.c]], aggr=[[MAX(test.b)]]\
let expected = "Aggregate: groupBy=[[test.c]], aggr=[[max(test.b)]]\
\n TableScan: test projection=[b, c]";

assert_optimized_plan_equal(plan, expected)
Expand All @@ -1390,7 +1390,7 @@ mod tests {
.aggregate(vec![col("c")], vec![max(col("b"))])?
.build()?;

let expected = "Aggregate: groupBy=[[a.c]], aggr=[[MAX(a.b)]]\
let expected = "Aggregate: groupBy=[[a.c]], aggr=[[max(a.b)]]\
\n SubqueryAlias: a\
\n TableScan: test projection=[b, c]";

Expand All @@ -1406,7 +1406,7 @@ mod tests {
.aggregate(Vec::<Expr>::new(), vec![max(col("b"))])?
.build()?;

let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(test.b)]]\
let expected = "Aggregate: groupBy=[[]], aggr=[[max(test.b)]]\
\n Projection: test.b\
\n Filter: test.c > Int32(1)\
\n TableScan: test projection=[b, c]";
Expand All @@ -1422,7 +1422,7 @@ mod tests {
// "tag.one", not a column named "one" in a table named "tag"):
//
// Projection: tag.one
// Aggregate: groupBy=[], aggr=[MAX("tag.one") AS "tag.one"]
// Aggregate: groupBy=[], aggr=[max("tag.one") AS "tag.one"]
// TableScan
let plan = table_scan(Some("m4"), &schema, None)?
.aggregate(
Expand All @@ -1433,7 +1433,7 @@ mod tests {
.build()?;

let expected = "\
Aggregate: groupBy=[[]], aggr=[[MAX(m4.tag.one) AS tag.one]]\
Aggregate: groupBy=[[]], aggr=[[max(m4.tag.one) AS tag.one]]\
\n TableScan: m4 projection=[tag.one]";

assert_optimized_plan_equal(plan, expected)
Expand Down Expand Up @@ -1768,11 +1768,11 @@ mod tests {
.aggregate(vec![col("c")], vec![max(col("a"))])?
.build()?;

assert_fields_eq(&plan, vec!["c", "MAX(test.a)"]);
assert_fields_eq(&plan, vec!["c", "max(test.a)"]);

let plan = optimize(plan).expect("failed to optimize plan");
let expected = "\
Aggregate: groupBy=[[test.c]], aggr=[[MAX(test.a)]]\
Aggregate: groupBy=[[test.c]], aggr=[[max(test.a)]]\
\n Filter: test.c > Int32(1)\
\n Projection: test.c, test.a\
\n TableScan: test projection=[a, c]";
Expand Down Expand Up @@ -1862,14 +1862,14 @@ mod tests {
let plan = LogicalPlanBuilder::from(table_scan)
.aggregate(vec![col("a"), col("c")], vec![max(col("b")), min(col("b"))])?
.filter(col("c").gt(lit(1)))?
.project(vec![col("c"), col("a"), col("MAX(test.b)")])?
.project(vec![col("c"), col("a"), col("max(test.b)")])?
.build()?;

assert_fields_eq(&plan, vec!["c", "a", "MAX(test.b)"]);
assert_fields_eq(&plan, vec!["c", "a", "max(test.b)"]);

let expected = "Projection: test.c, test.a, MAX(test.b)\
let expected = "Projection: test.c, test.a, max(test.b)\
\n Filter: test.c > Int32(1)\
\n Aggregate: groupBy=[[test.a, test.c]], aggr=[[MAX(test.b)]]\
\n Aggregate: groupBy=[[test.a, test.c]], aggr=[[max(test.b)]]\
\n TableScan: test projection=[a, b, c]";

assert_optimized_plan_equal(plan, expected)
Expand Down Expand Up @@ -1937,10 +1937,10 @@ mod tests {
.project(vec![col1, col2])?
.build()?;

let expected = "Projection: MAX(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MAX(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
\n WindowAggr: windowExpr=[[MAX(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
\n Projection: test.b, MAX(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
\n WindowAggr: windowExpr=[[MAX(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
let expected = "Projection: max(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, max(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
\n WindowAggr: windowExpr=[[max(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
\n Projection: test.b, max(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
\n WindowAggr: windowExpr=[[max(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
\n TableScan: test projection=[a, b]";

assert_optimized_plan_equal(plan, expected)
Expand Down
Loading