@@ -40,7 +40,7 @@ use arrow_schema::DataType;
4040use datafusion_common:: stats:: Precision ;
4141use datafusion_common:: { internal_err, not_impl_err, Result } ;
4242use datafusion_execution:: TaskContext ;
43- use datafusion_expr:: Accumulator ;
43+ use datafusion_expr:: { Accumulator , Aggregate } ;
4444use datafusion_physical_expr:: {
4545 equivalence:: { collapse_lex_req, ProjectionMapping } ,
4646 expressions:: Column ,
@@ -110,8 +110,6 @@ impl AggregateMode {
110110 }
111111}
112112
113- const INTERNAL_GROUPING_ID : & str = "grouping_id" ;
114-
115113/// Represents `GROUP BY` clause in the plan (including the more general GROUPING SET)
116114/// In the case of a simple `GROUP BY a, b` clause, this will contain the expression [a, b]
117115/// and a single group [false, false].
@@ -141,10 +139,6 @@ pub struct PhysicalGroupBy {
141139 /// expression in null_expr. If `groups[i][j]` is true, then the
142140 /// j-th expression in the i-th group is NULL, otherwise it is `expr[j]`.
143141 groups : Vec < Vec < bool > > ,
144- // The number of internal expressions that are used to implement grouping
145- // sets. These output are removed from the final output and not in `expr`
146- // as they are generated based on the value in `groups`
147- num_internal_exprs : usize ,
148142}
149143
150144impl PhysicalGroupBy {
@@ -154,12 +148,10 @@ impl PhysicalGroupBy {
154148 null_expr : Vec < ( Arc < dyn PhysicalExpr > , String ) > ,
155149 groups : Vec < Vec < bool > > ,
156150 ) -> Self {
157- let num_internal_exprs = if !null_expr. is_empty ( ) { 1 } else { 0 } ;
158151 Self {
159152 expr,
160153 null_expr,
161154 groups,
162- num_internal_exprs,
163155 }
164156 }
165157
@@ -171,7 +163,6 @@ impl PhysicalGroupBy {
171163 expr,
172164 null_expr : vec ! [ ] ,
173165 groups : vec ! [ vec![ false ; num_exprs] ] ,
174- num_internal_exprs : 0 ,
175166 }
176167 }
177168
@@ -222,20 +213,17 @@ impl PhysicalGroupBy {
222213 }
223214
224215 /// The number of expressions in the output schema.
225- fn num_output_exprs ( & self , mode : & AggregateMode ) -> usize {
216+ fn num_output_exprs ( & self ) -> usize {
226217 let mut num_exprs = self . expr . len ( ) ;
227218 if !self . is_single ( ) {
228- num_exprs += self . num_internal_exprs ;
229- }
230- if * mode != AggregateMode :: Partial {
231- num_exprs -= self . num_internal_exprs ;
219+ num_exprs += 1
232220 }
233221 num_exprs
234222 }
235223
236224 /// Return grouping expressions as they occur in the output schema.
237- pub fn output_exprs ( & self , mode : & AggregateMode ) -> Vec < Arc < dyn PhysicalExpr > > {
238- let num_output_exprs = self . num_output_exprs ( mode ) ;
225+ pub fn output_exprs ( & self ) -> Vec < Arc < dyn PhysicalExpr > > {
226+ let num_output_exprs = self . num_output_exprs ( ) ;
239227 let mut output_exprs = Vec :: with_capacity ( num_output_exprs) ;
240228 output_exprs. extend (
241229 self . expr
@@ -244,9 +232,11 @@ impl PhysicalGroupBy {
244232 . take ( num_output_exprs)
245233 . map ( |( index, ( _, name) ) | Arc :: new ( Column :: new ( name, index) ) as _ ) ,
246234 ) ;
247- if !self . is_single ( ) && * mode == AggregateMode :: Partial {
248- output_exprs
249- . push ( Arc :: new ( Column :: new ( INTERNAL_GROUPING_ID , self . expr . len ( ) ) ) as _ ) ;
235+ if !self . is_single ( ) {
236+ output_exprs. push ( Arc :: new ( Column :: new (
237+ Aggregate :: INTERNAL_GROUPING_ID ,
238+ self . expr . len ( ) ,
239+ ) ) as _ ) ;
250240 }
251241 output_exprs
252242 }
@@ -256,7 +246,7 @@ impl PhysicalGroupBy {
256246 if self . is_single ( ) {
257247 self . expr . len ( )
258248 } else {
259- self . expr . len ( ) + self . num_internal_exprs
249+ self . expr . len ( ) + 1
260250 }
261251 }
262252
@@ -290,7 +280,7 @@ impl PhysicalGroupBy {
290280 }
291281 if !self . is_single ( ) {
292282 fields. push ( Field :: new (
293- INTERNAL_GROUPING_ID ,
283+ Aggregate :: INTERNAL_GROUPING_ID ,
294284 self . grouping_id_type ( ) ,
295285 false ,
296286 ) ) ;
@@ -302,35 +292,29 @@ impl PhysicalGroupBy {
302292 ///
303293 /// This might be different from the `group_fields` that might contain internal expressions that
304294 /// should not be part of the output schema.
305- fn output_fields (
306- & self ,
307- input_schema : & Schema ,
308- mode : & AggregateMode ,
309- ) -> Result < Vec < Field > > {
295+ fn output_fields ( & self , input_schema : & Schema ) -> Result < Vec < Field > > {
310296 let mut fields = self . group_fields ( input_schema) ?;
311- fields. truncate ( self . num_output_exprs ( mode ) ) ;
297+ fields. truncate ( self . num_output_exprs ( ) ) ;
312298 Ok ( fields)
313299 }
314300
315301 /// Returns the `PhysicalGroupBy` for a final aggregation if `self` is used for a partial
316302 /// aggregation.
317303 pub fn as_final ( & self ) -> PhysicalGroupBy {
318- let expr: Vec < _ > = self
319- . output_exprs ( & AggregateMode :: Partial )
320- . into_iter ( )
321- . zip (
322- self . expr
323- . iter ( )
324- . map ( |t| t. 1 . clone ( ) )
325- . chain ( std:: iter:: once ( INTERNAL_GROUPING_ID . to_owned ( ) ) ) ,
326- )
327- . collect ( ) ;
304+ let expr: Vec < _ > =
305+ self . output_exprs ( )
306+ . into_iter ( )
307+ . zip (
308+ self . expr . iter ( ) . map ( |t| t. 1 . clone ( ) ) . chain ( std:: iter:: once (
309+ Aggregate :: INTERNAL_GROUPING_ID . to_owned ( ) ,
310+ ) ) ,
311+ )
312+ . collect ( ) ;
328313 let num_exprs = expr. len ( ) ;
329314 Self {
330315 expr,
331316 null_expr : vec ! [ ] ,
332317 groups : vec ! [ vec![ false ; num_exprs] ] ,
333- num_internal_exprs : self . num_internal_exprs ,
334318 }
335319 }
336320}
@@ -567,7 +551,7 @@ impl AggregateExec {
567551
568552 /// Grouping expressions as they occur in the output schema
569553 pub fn output_group_expr ( & self ) -> Vec < Arc < dyn PhysicalExpr > > {
570- self . group_by . output_exprs ( & AggregateMode :: Partial )
554+ self . group_by . output_exprs ( )
571555 }
572556
573557 /// Aggregate expressions
@@ -901,9 +885,8 @@ fn create_schema(
901885 aggr_expr : & [ AggregateFunctionExpr ] ,
902886 mode : AggregateMode ,
903887) -> Result < Schema > {
904- let mut fields =
905- Vec :: with_capacity ( group_by. num_output_exprs ( & mode) + aggr_expr. len ( ) ) ;
906- fields. extend ( group_by. output_fields ( input_schema, & mode) ?) ;
888+ let mut fields = Vec :: with_capacity ( group_by. num_output_exprs ( ) + aggr_expr. len ( ) ) ;
889+ fields. extend ( group_by. output_fields ( input_schema) ?) ;
907890
908891 match mode {
909892 AggregateMode :: Partial => {
@@ -1506,49 +1489,49 @@ mod tests {
15061489 // In spill mode, we test with the limited memory, if the mem usage exceeds,
15071490 // we trigger the early emit rule, which turns out the partial aggregate result.
15081491 vec ! [
1509- "+---+-----+-------------+-----------------+" ,
1510- "| a | b | grouping_id | COUNT(1)[count] |" ,
1511- "+---+-----+-------------+-----------------+" ,
1512- "| | 1.0 | 2 | 1 |" ,
1513- "| | 1.0 | 2 | 1 |" ,
1514- "| | 2.0 | 2 | 1 |" ,
1515- "| | 2.0 | 2 | 1 |" ,
1516- "| | 3.0 | 2 | 1 |" ,
1517- "| | 3.0 | 2 | 1 |" ,
1518- "| | 4.0 | 2 | 1 |" ,
1519- "| | 4.0 | 2 | 1 |" ,
1520- "| 2 | | 1 | 1 |" ,
1521- "| 2 | | 1 | 1 |" ,
1522- "| 2 | 1.0 | 0 | 1 |" ,
1523- "| 2 | 1.0 | 0 | 1 |" ,
1524- "| 3 | | 1 | 1 |" ,
1525- "| 3 | | 1 | 2 |" ,
1526- "| 3 | 2.0 | 0 | 2 |" ,
1527- "| 3 | 3.0 | 0 | 1 |" ,
1528- "| 4 | | 1 | 1 |" ,
1529- "| 4 | | 1 | 2 |" ,
1530- "| 4 | 3.0 | 0 | 1 |" ,
1531- "| 4 | 4.0 | 0 | 2 |" ,
1532- "+---+-----+-------------+-----------------+" ,
1492+ "+---+-----+--------------- +-----------------+" ,
1493+ "| a | b | __grouping_id | COUNT(1)[count] |" ,
1494+ "+---+-----+--------------- +-----------------+" ,
1495+ "| | 1.0 | 2 | 1 |" ,
1496+ "| | 1.0 | 2 | 1 |" ,
1497+ "| | 2.0 | 2 | 1 |" ,
1498+ "| | 2.0 | 2 | 1 |" ,
1499+ "| | 3.0 | 2 | 1 |" ,
1500+ "| | 3.0 | 2 | 1 |" ,
1501+ "| | 4.0 | 2 | 1 |" ,
1502+ "| | 4.0 | 2 | 1 |" ,
1503+ "| 2 | | 1 | 1 |" ,
1504+ "| 2 | | 1 | 1 |" ,
1505+ "| 2 | 1.0 | 0 | 1 |" ,
1506+ "| 2 | 1.0 | 0 | 1 |" ,
1507+ "| 3 | | 1 | 1 |" ,
1508+ "| 3 | | 1 | 2 |" ,
1509+ "| 3 | 2.0 | 0 | 2 |" ,
1510+ "| 3 | 3.0 | 0 | 1 |" ,
1511+ "| 4 | | 1 | 1 |" ,
1512+ "| 4 | | 1 | 2 |" ,
1513+ "| 4 | 3.0 | 0 | 1 |" ,
1514+ "| 4 | 4.0 | 0 | 2 |" ,
1515+ "+---+-----+--------------- +-----------------+" ,
15331516 ]
15341517 } else {
15351518 vec ! [
1536- "+---+-----+-------------+-----------------+" ,
1537- "| a | b | grouping_id | COUNT(1)[count] |" ,
1538- "+---+-----+-------------+-----------------+" ,
1539- "| | 1.0 | 2 | 2 |" ,
1540- "| | 2.0 | 2 | 2 |" ,
1541- "| | 3.0 | 2 | 2 |" ,
1542- "| | 4.0 | 2 | 2 |" ,
1543- "| 2 | | 1 | 2 |" ,
1544- "| 2 | 1.0 | 0 | 2 |" ,
1545- "| 3 | | 1 | 3 |" ,
1546- "| 3 | 2.0 | 0 | 2 |" ,
1547- "| 3 | 3.0 | 0 | 1 |" ,
1548- "| 4 | | 1 | 3 |" ,
1549- "| 4 | 3.0 | 0 | 1 |" ,
1550- "| 4 | 4.0 | 0 | 2 |" ,
1551- "+---+-----+-------------+-----------------+" ,
1519+ "+---+-----+--------------- +-----------------+" ,
1520+ "| a | b | __grouping_id | COUNT(1)[count] |" ,
1521+ "+---+-----+--------------- +-----------------+" ,
1522+ "| | 1.0 | 2 | 2 |" ,
1523+ "| | 2.0 | 2 | 2 |" ,
1524+ "| | 3.0 | 2 | 2 |" ,
1525+ "| | 4.0 | 2 | 2 |" ,
1526+ "| 2 | | 1 | 2 |" ,
1527+ "| 2 | 1.0 | 0 | 2 |" ,
1528+ "| 3 | | 1 | 3 |" ,
1529+ "| 3 | 2.0 | 0 | 2 |" ,
1530+ "| 3 | 3.0 | 0 | 1 |" ,
1531+ "| 4 | | 1 | 3 |" ,
1532+ "| 4 | 3.0 | 0 | 1 |" ,
1533+ "| 4 | 4.0 | 0 | 2 |" ,
1534+ "+---+-----+--------------- +-----------------+" ,
15521535 ]
15531536 } ;
15541537 assert_batches_sorted_eq ! ( expected, & result) ;
0 commit comments