Skip to content

Commit 9b41907

Browse files
committed
Handle no DISTINCT ON expressions and extend the docs for the replace_distinct_aggregate optimizer
1 parent 2730bbd commit 9b41907

File tree

3 files changed

+26
-1
lines changed

3 files changed

+26
-1
lines changed

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2204,13 +2204,17 @@ pub struct DistinctOn {
22042204
}
22052205

22062206
impl DistinctOn {
2207-
/// Create a new `DistintOn` struct.
2207+
/// Create a new `DistinctOn` struct.
22082208
pub fn try_new(
22092209
on_expr: Vec<Expr>,
22102210
select_expr: Vec<Expr>,
22112211
sort_expr: Option<Vec<Expr>>,
22122212
input: Arc<LogicalPlan>,
22132213
) -> Result<Self> {
2214+
if on_expr.is_empty() {
2215+
return plan_err!("No `ON` expressions provided");
2216+
}
2217+
22142218
let on_expr = normalize_cols(on_expr, input.as_ref())?;
22152219

22162220
// Create fields with any qualifier stuffed in the name itself

datafusion/optimizer/src/replace_distinct_aggregate.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,23 @@ use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
3636
/// ```text
3737
/// SELECT a, b FROM tab GROUP BY a, b
3838
/// ```
39+
///
40+
/// On the other hand, in the case of a [[DistinctOn]] query the replacement is
41+
/// a bit more involved and effectively converts
42+
/// ```text
43+
/// SELECT DISTINCT ON (a) b FROM tab ORDER BY a DESC, c
44+
/// ```
45+
///
46+
/// into
47+
/// ```text
48+
/// SELECT b FROM (
49+
/// SELECT a, FIRST_VALUE(b ORDER BY a DESC, c) AS b
50+
/// FROM tab
51+
/// GROUP BY a
52+
/// )
53+
/// ORDER BY a DESC
54+
/// ```
55+
/// ```
3956
4057
/// Optimizer that replaces logical [[Distinct]] with a logical [[Aggregate]]
4158
#[derive(Default)]

datafusion/sqllogictest/test_files/distinct_on.slt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ SELECT DISTINCT ON (c1) c2 FROM aggregate_test_100 ORDER BY c1, c3;
9292
query error SELECT DISTINCT ON expressions must match initial ORDER BY expressions
9393
SELECT DISTINCT ON (c2 % 2 = 0) c2, c3 - 100 FROM aggregate_test_100 ORDER BY c2, c3;
9494

95+
# ON expressions are empty
96+
query error DataFusion error: Error during planning: No `ON` expressions provided
97+
SELECT DISTINCT ON () c1, c2 FROM aggregate_test_100 ORDER BY c1, c2;
98+
9599
# Use expressions in the ON and ORDER BY clauses, as well as the selection
96100
query II
97101
SELECT DISTINCT ON (c2 % 2 = 0) c2, c3 - 100 FROM aggregate_test_100 ORDER BY c2 % 2 = 0, c3 DESC;

0 commit comments

Comments
 (0)