@@ -3733,3 +3733,61 @@ AggregateExec: mode=FinalPartitioned, gby=[c@0 as c, b@1 as b], aggr=[SUM(multip
37333733--------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallyOrdered
37343734----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
37353735------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], has_header=true
3736+
3737+ # Tests for single distinct to group by optimization rule
3738+ statement ok
3739+ CREATE TABLE t(x int) AS VALUES (1), (2), (1);
3740+
3741+ statement ok
3742+ create table t1(x bigint,y int) as values (9223372036854775807,2), (9223372036854775806,2);
3743+
3744+ query II
3745+ SELECT SUM(DISTINCT x), MAX(DISTINCT x) from t GROUP BY x ORDER BY x;
3746+ ----
3747+ 1 1
3748+ 2 2
3749+
3750+ query II
3751+ SELECT MAX(DISTINCT x), SUM(DISTINCT x) from t GROUP BY x ORDER BY x;
3752+ ----
3753+ 1 1
3754+ 2 2
3755+
3756+ query TT
3757+ EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT x) FROM t1 GROUP BY y;
3758+ ----
3759+ logical_plan
3760+ Projection: SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)
3761+ --Aggregate: groupBy=[[t1.y]], aggr=[[SUM(DISTINCT CAST(t1.x AS Float64)), MAX(DISTINCT t1.x)]]
3762+ ----TableScan: t1 projection=[x, y]
3763+ physical_plan
3764+ ProjectionExec: expr=[SUM(DISTINCT t1.x)@1 as SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)@2 as MAX(DISTINCT t1.x)]
3765+ --AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)]
3766+ ----CoalesceBatchesExec: target_batch_size=2
3767+ ------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
3768+ --------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
3769+ ----------AggregateExec: mode=Partial, gby=[y@1 as y], aggr=[SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)]
3770+ ------------MemoryExec: partitions=1, partition_sizes=[1]
3771+
3772+ query TT
3773+ EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT CAST(x AS DOUBLE)) FROM t1 GROUP BY y;
3774+ ----
3775+ logical_plan
3776+ Projection: SUM(alias1) AS SUM(DISTINCT t1.x), MAX(alias1) AS MAX(DISTINCT t1.x)
3777+ --Aggregate: groupBy=[[t1.y]], aggr=[[SUM(alias1), MAX(alias1)]]
3778+ ----Aggregate: groupBy=[[t1.y, CAST(t1.x AS Float64)t1.x AS t1.x AS alias1]], aggr=[[]]
3779+ ------Projection: CAST(t1.x AS Float64) AS CAST(t1.x AS Float64)t1.x, t1.y
3780+ --------TableScan: t1 projection=[x, y]
3781+ physical_plan
3782+ ProjectionExec: expr=[SUM(alias1)@1 as SUM(DISTINCT t1.x), MAX(alias1)@2 as MAX(DISTINCT t1.x)]
3783+ --AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[SUM(alias1), MAX(alias1)]
3784+ ----CoalesceBatchesExec: target_batch_size=2
3785+ ------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
3786+ --------AggregateExec: mode=Partial, gby=[y@0 as y], aggr=[SUM(alias1), MAX(alias1)]
3787+ ----------AggregateExec: mode=FinalPartitioned, gby=[y@0 as y, alias1@1 as alias1], aggr=[]
3788+ ------------CoalesceBatchesExec: target_batch_size=2
3789+ --------------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8), input_partitions=8
3790+ ----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
3791+ ------------------AggregateExec: mode=Partial, gby=[y@1 as y, CAST(t1.x AS Float64)t1.x@0 as alias1], aggr=[]
3792+ --------------------ProjectionExec: expr=[CAST(x@0 AS Float64) as CAST(t1.x AS Float64)t1.x, y@1 as y]
3793+ ----------------------MemoryExec: partitions=1, partition_sizes=[1]
0 commit comments