Skip to content

Commit eb5aa22

Browse files
fix: make ntile work in some corner cases (#8371)
* fix: make ntile work in some corner cases * fix comments * minor * Update datafusion/sqllogictest/test_files/window.slt Co-authored-by: Mustafa Akur <[email protected]> --------- Co-authored-by: Mustafa Akur <[email protected]>
1 parent c19260d commit eb5aa22

File tree

4 files changed

+182
-11
lines changed

4 files changed

+182
-11
lines changed

datafusion/expr/src/window_function.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,20 @@ impl BuiltInWindowFunction {
268268
BuiltInWindowFunction::FirstValue | BuiltInWindowFunction::LastValue => {
269269
Signature::any(1, Volatility::Immutable)
270270
}
271-
BuiltInWindowFunction::Ntile => Signature::any(1, Volatility::Immutable),
271+
BuiltInWindowFunction::Ntile => Signature::uniform(
272+
1,
273+
vec![
274+
DataType::UInt64,
275+
DataType::UInt32,
276+
DataType::UInt16,
277+
DataType::UInt8,
278+
DataType::Int64,
279+
DataType::Int32,
280+
DataType::Int16,
281+
DataType::Int8,
282+
],
283+
Volatility::Immutable,
284+
),
272285
BuiltInWindowFunction::NthValue => Signature::any(2, Volatility::Immutable),
273286
}
274287
}

datafusion/physical-expr/src/window/ntile.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,9 @@ impl PartitionEvaluator for NtileEvaluator {
9696
) -> Result<ArrayRef> {
9797
let num_rows = num_rows as u64;
9898
let mut vec: Vec<u64> = Vec::new();
99+
let n = u64::min(self.n, num_rows);
99100
for i in 0..num_rows {
100-
let res = i * self.n / num_rows;
101+
let res = i * n / num_rows;
101102
vec.push(res + 1)
102103
}
103104
Ok(Arc::new(UInt64Array::from(vec)))

datafusion/physical-plan/src/windows/mod.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -189,15 +189,26 @@ fn create_built_in_window_expr(
189189
BuiltInWindowFunction::PercentRank => Arc::new(percent_rank(name)),
190190
BuiltInWindowFunction::CumeDist => Arc::new(cume_dist(name)),
191191
BuiltInWindowFunction::Ntile => {
192-
let n: i64 = get_scalar_value_from_args(args, 0)?
193-
.ok_or_else(|| {
194-
DataFusionError::Execution(
195-
"NTILE requires at least 1 argument".to_string(),
196-
)
197-
})?
198-
.try_into()?;
199-
let n: u64 = n as u64;
200-
Arc::new(Ntile::new(name, n))
192+
let n = get_scalar_value_from_args(args, 0)?.ok_or_else(|| {
193+
DataFusionError::Execution(
194+
"NTILE requires a positive integer".to_string(),
195+
)
196+
})?;
197+
198+
if n.is_null() {
199+
return exec_err!("NTILE requires a positive integer, but finds NULL");
200+
}
201+
202+
if n.is_unsigned() {
203+
let n: u64 = n.try_into()?;
204+
Arc::new(Ntile::new(name, n))
205+
} else {
206+
let n: i64 = n.try_into()?;
207+
if n <= 0 {
208+
return exec_err!("NTILE requires a positive integer");
209+
}
210+
Arc::new(Ntile::new(name, n as u64))
211+
}
201212
}
202213
BuiltInWindowFunction::Lag => {
203214
let arg = args[0].clone();

datafusion/sqllogictest/test_files/window.slt

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3581,3 +3581,149 @@ CREATE TABLE new_table AS SELECT NTILE(2) OVER(ORDER BY c1) AS ntile_2 FROM aggr
35813581

35823582
statement ok
35833583
DROP TABLE new_table;
3584+
3585+
statement ok
3586+
CREATE TABLE t1 (a int) AS VALUES (1), (2), (3);
3587+
3588+
query I
3589+
SELECT NTILE(9223377) OVER(ORDER BY a) FROM t1;
3590+
----
3591+
1
3592+
2
3593+
3
3594+
3595+
query I
3596+
SELECT NTILE(9223372036854775809) OVER(ORDER BY a) FROM t1;
3597+
----
3598+
1
3599+
2
3600+
3
3601+
3602+
query error DataFusion error: Execution error: NTILE requires a positive integer
3603+
SELECT NTILE(-922337203685477580) OVER(ORDER BY a) FROM t1;
3604+
3605+
query error DataFusion error: Execution error: Table 't' doesn't exist\.
3606+
DROP TABLE t;
3607+
3608+
# NTILE with PARTITION BY, those tests from duckdb: https://github.com/duckdb/duckdb/blob/main/test/sql/window/test_ntile.test
3609+
statement ok
3610+
CREATE TABLE score_board (team_name VARCHAR, player VARCHAR, score INTEGER) as VALUES
3611+
('Mongrels', 'Apu', 350),
3612+
('Mongrels', 'Ned', 666),
3613+
('Mongrels', 'Meg', 1030),
3614+
('Mongrels', 'Burns', 1270),
3615+
('Simpsons', 'Homer', 1),
3616+
('Simpsons', 'Lisa', 710),
3617+
('Simpsons', 'Marge', 990),
3618+
('Simpsons', 'Bart', 2010)
3619+
3620+
query TTII
3621+
SELECT
3622+
team_name,
3623+
player,
3624+
score,
3625+
NTILE(2) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
3626+
FROM score_board s
3627+
ORDER BY team_name, score;
3628+
----
3629+
Mongrels Apu 350 1
3630+
Mongrels Ned 666 1
3631+
Mongrels Meg 1030 2
3632+
Mongrels Burns 1270 2
3633+
Simpsons Homer 1 1
3634+
Simpsons Lisa 710 1
3635+
Simpsons Marge 990 2
3636+
Simpsons Bart 2010 2
3637+
3638+
query TTII
3639+
SELECT
3640+
team_name,
3641+
player,
3642+
score,
3643+
NTILE(2) OVER (ORDER BY score ASC) AS NTILE
3644+
FROM score_board s
3645+
ORDER BY score;
3646+
----
3647+
Simpsons Homer 1 1
3648+
Mongrels Apu 350 1
3649+
Mongrels Ned 666 1
3650+
Simpsons Lisa 710 1
3651+
Simpsons Marge 990 2
3652+
Mongrels Meg 1030 2
3653+
Mongrels Burns 1270 2
3654+
Simpsons Bart 2010 2
3655+
3656+
query TTII
3657+
SELECT
3658+
team_name,
3659+
player,
3660+
score,
3661+
NTILE(1000) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
3662+
FROM score_board s
3663+
ORDER BY team_name, score;
3664+
----
3665+
Mongrels Apu 350 1
3666+
Mongrels Ned 666 2
3667+
Mongrels Meg 1030 3
3668+
Mongrels Burns 1270 4
3669+
Simpsons Homer 1 1
3670+
Simpsons Lisa 710 2
3671+
Simpsons Marge 990 3
3672+
Simpsons Bart 2010 4
3673+
3674+
query TTII
3675+
SELECT
3676+
team_name,
3677+
player,
3678+
score,
3679+
NTILE(1) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
3680+
FROM score_board s
3681+
ORDER BY team_name, score;
3682+
----
3683+
Mongrels Apu 350 1
3684+
Mongrels Ned 666 1
3685+
Mongrels Meg 1030 1
3686+
Mongrels Burns 1270 1
3687+
Simpsons Homer 1 1
3688+
Simpsons Lisa 710 1
3689+
Simpsons Marge 990 1
3690+
Simpsons Bart 2010 1
3691+
3692+
# incorrect number of parameters for ntile
3693+
query error DataFusion error: Execution error: NTILE requires a positive integer, but finds NULL
3694+
SELECT
3695+
NTILE(NULL) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
3696+
FROM score_board s
3697+
3698+
query error DataFusion error: Execution error: NTILE requires a positive integer
3699+
SELECT
3700+
NTILE(-1) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
3701+
FROM score_board s
3702+
3703+
query error DataFusion error: Execution error: NTILE requires a positive integer
3704+
SELECT
3705+
NTILE(0) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
3706+
FROM score_board s
3707+
3708+
statement error
3709+
SELECT
3710+
NTILE() OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
3711+
FROM score_board s
3712+
3713+
statement error
3714+
SELECT
3715+
NTILE(1,2) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
3716+
FROM score_board s
3717+
3718+
statement error
3719+
SELECT
3720+
NTILE(1,2,3) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
3721+
FROM score_board s
3722+
3723+
statement error
3724+
SELECT
3725+
NTILE(1,2,3,4) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
3726+
FROM score_board s
3727+
3728+
statement ok
3729+
DROP TABLE score_board;

0 commit comments

Comments
 (0)