Skip to content

Commit

Permalink
fix: make ntile work in some corner cases (#8371)
Browse files Browse the repository at this point in the history
* fix: make ntile work in some corner cases

* fix comments

* minor

* Update datafusion/sqllogictest/test_files/window.slt

Co-authored-by: Mustafa Akur <[email protected]>

---------

Co-authored-by: Mustafa Akur <[email protected]>
  • Loading branch information
haohuaijin and mustafasrepo authored Dec 1, 2023
1 parent c19260d commit eb5aa22
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 11 deletions.
15 changes: 14 additions & 1 deletion datafusion/expr/src/window_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,20 @@ impl BuiltInWindowFunction {
BuiltInWindowFunction::FirstValue | BuiltInWindowFunction::LastValue => {
Signature::any(1, Volatility::Immutable)
}
BuiltInWindowFunction::Ntile => Signature::any(1, Volatility::Immutable),
BuiltInWindowFunction::Ntile => Signature::uniform(
1,
vec![
DataType::UInt64,
DataType::UInt32,
DataType::UInt16,
DataType::UInt8,
DataType::Int64,
DataType::Int32,
DataType::Int16,
DataType::Int8,
],
Volatility::Immutable,
),
BuiltInWindowFunction::NthValue => Signature::any(2, Volatility::Immutable),
}
}
Expand Down
3 changes: 2 additions & 1 deletion datafusion/physical-expr/src/window/ntile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,9 @@ impl PartitionEvaluator for NtileEvaluator {
) -> Result<ArrayRef> {
let num_rows = num_rows as u64;
let mut vec: Vec<u64> = Vec::new();
let n = u64::min(self.n, num_rows);
for i in 0..num_rows {
let res = i * self.n / num_rows;
let res = i * n / num_rows;
vec.push(res + 1)
}
Ok(Arc::new(UInt64Array::from(vec)))
Expand Down
29 changes: 20 additions & 9 deletions datafusion/physical-plan/src/windows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,26 @@ fn create_built_in_window_expr(
BuiltInWindowFunction::PercentRank => Arc::new(percent_rank(name)),
BuiltInWindowFunction::CumeDist => Arc::new(cume_dist(name)),
BuiltInWindowFunction::Ntile => {
let n: i64 = get_scalar_value_from_args(args, 0)?
.ok_or_else(|| {
DataFusionError::Execution(
"NTILE requires at least 1 argument".to_string(),
)
})?
.try_into()?;
let n: u64 = n as u64;
Arc::new(Ntile::new(name, n))
let n = get_scalar_value_from_args(args, 0)?.ok_or_else(|| {
DataFusionError::Execution(
"NTILE requires a positive integer".to_string(),
)
})?;

if n.is_null() {
return exec_err!("NTILE requires a positive integer, but finds NULL");
}

if n.is_unsigned() {
let n: u64 = n.try_into()?;
Arc::new(Ntile::new(name, n))
} else {
let n: i64 = n.try_into()?;
if n <= 0 {
return exec_err!("NTILE requires a positive integer");
}
Arc::new(Ntile::new(name, n as u64))
}
}
BuiltInWindowFunction::Lag => {
let arg = args[0].clone();
Expand Down
146 changes: 146 additions & 0 deletions datafusion/sqllogictest/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3581,3 +3581,149 @@ CREATE TABLE new_table AS SELECT NTILE(2) OVER(ORDER BY c1) AS ntile_2 FROM aggr

statement ok
DROP TABLE new_table;

statement ok
CREATE TABLE t1 (a int) AS VALUES (1), (2), (3);

query I
SELECT NTILE(9223377) OVER(ORDER BY a) FROM t1;
----
1
2
3

query I
SELECT NTILE(9223372036854775809) OVER(ORDER BY a) FROM t1;
----
1
2
3

query error DataFusion error: Execution error: NTILE requires a positive integer
SELECT NTILE(-922337203685477580) OVER(ORDER BY a) FROM t1;

query error DataFusion error: Execution error: Table 't' doesn't exist\.
DROP TABLE t;

# NTILE with PARTITION BY, those tests from duckdb: https://github.com/duckdb/duckdb/blob/main/test/sql/window/test_ntile.test
statement ok
CREATE TABLE score_board (team_name VARCHAR, player VARCHAR, score INTEGER) as VALUES
('Mongrels', 'Apu', 350),
('Mongrels', 'Ned', 666),
('Mongrels', 'Meg', 1030),
('Mongrels', 'Burns', 1270),
('Simpsons', 'Homer', 1),
('Simpsons', 'Lisa', 710),
('Simpsons', 'Marge', 990),
('Simpsons', 'Bart', 2010)

query TTII
SELECT
team_name,
player,
score,
NTILE(2) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s
ORDER BY team_name, score;
----
Mongrels Apu 350 1
Mongrels Ned 666 1
Mongrels Meg 1030 2
Mongrels Burns 1270 2
Simpsons Homer 1 1
Simpsons Lisa 710 1
Simpsons Marge 990 2
Simpsons Bart 2010 2

query TTII
SELECT
team_name,
player,
score,
NTILE(2) OVER (ORDER BY score ASC) AS NTILE
FROM score_board s
ORDER BY score;
----
Simpsons Homer 1 1
Mongrels Apu 350 1
Mongrels Ned 666 1
Simpsons Lisa 710 1
Simpsons Marge 990 2
Mongrels Meg 1030 2
Mongrels Burns 1270 2
Simpsons Bart 2010 2

query TTII
SELECT
team_name,
player,
score,
NTILE(1000) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s
ORDER BY team_name, score;
----
Mongrels Apu 350 1
Mongrels Ned 666 2
Mongrels Meg 1030 3
Mongrels Burns 1270 4
Simpsons Homer 1 1
Simpsons Lisa 710 2
Simpsons Marge 990 3
Simpsons Bart 2010 4

query TTII
SELECT
team_name,
player,
score,
NTILE(1) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s
ORDER BY team_name, score;
----
Mongrels Apu 350 1
Mongrels Ned 666 1
Mongrels Meg 1030 1
Mongrels Burns 1270 1
Simpsons Homer 1 1
Simpsons Lisa 710 1
Simpsons Marge 990 1
Simpsons Bart 2010 1

# incorrect number of parameters for ntile
query error DataFusion error: Execution error: NTILE requires a positive integer, but finds NULL
SELECT
NTILE(NULL) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

query error DataFusion error: Execution error: NTILE requires a positive integer
SELECT
NTILE(-1) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

query error DataFusion error: Execution error: NTILE requires a positive integer
SELECT
NTILE(0) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

statement error
SELECT
NTILE() OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

statement error
SELECT
NTILE(1,2) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

statement error
SELECT
NTILE(1,2,3) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

statement error
SELECT
NTILE(1,2,3,4) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

statement ok
DROP TABLE score_board;

0 comments on commit eb5aa22

Please sign in to comment.