66which here returns a DataFrameGroupBy object.
77"""
88
9- from collections import OrderedDict , abc
9+ from collections import OrderedDict , abc , namedtuple
1010import copy
1111from functools import partial
1212from textwrap import dedent
13+ import typing
14+ from typing import Any , Callable , List , Union
1315import warnings
1416
1517import numpy as np
1618
1719from pandas ._libs import Timestamp , lib
20+ from pandas .compat import PY36
1821from pandas .errors import AbstractMethodError
1922from pandas .util ._decorators import Appender , Substitution
2023
4144
4245from pandas .plotting ._core import boxplot_frame_groupby
4346
47+ NamedAgg = namedtuple ("NamedAgg" , ["column" , "aggfunc" ])
48+ # TODO(typing) the return value on this callable should be any *scalar*.
49+ AggScalar = Union [str , Callable [..., Any ]]
50+
4451
4552class NDFrameGroupBy (GroupBy ):
4653
@@ -144,8 +151,18 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True,
144151 return new_items , new_blocks
145152
146153 def aggregate (self , func , * args , ** kwargs ):
147-
148154 _level = kwargs .pop ('_level' , None )
155+
156+ relabeling = func is None and _is_multi_agg_with_relabel (** kwargs )
157+ if relabeling :
158+ func , columns , order = _normalize_keyword_aggregation (kwargs )
159+
160+ kwargs = {}
161+ elif func is None :
162+ # nicer error message
163+ raise TypeError ("Must provide 'func' or tuples of "
164+ "'(column, aggfunc)." )
165+
149166 result , how = self ._aggregate (func , _level = _level , * args , ** kwargs )
150167 if how is None :
151168 return result
@@ -179,6 +196,10 @@ def aggregate(self, func, *args, **kwargs):
179196 self ._insert_inaxis_grouper_inplace (result )
180197 result .index = np .arange (len (result ))
181198
199+ if relabeling :
200+ result = result [order ]
201+ result .columns = columns
202+
182203 return result ._convert (datetime = True )
183204
184205 agg = aggregate
@@ -791,11 +812,8 @@ def _aggregate_multiple_funcs(self, arg, _level):
791812 # list of functions / function names
792813 columns = []
793814 for f in arg :
794- if isinstance (f , str ):
795- columns .append (f )
796- else :
797- # protect against callables without names
798- columns .append (com .get_callable_name (f ))
815+ columns .append (com .get_callable_name (f ) or f )
816+
799817 arg = zip (columns , arg )
800818
801819 results = OrderedDict ()
@@ -1296,6 +1314,26 @@ class DataFrameGroupBy(NDFrameGroupBy):
12961314 A
12971315 1 1 2 0.590716
12981316 2 3 4 0.704907
1317+
1318+ To control the output names with different aggregations per column,
1319+ pandas supports "named aggregation"
1320+
1321+ >>> df.groupby("A").agg(
1322+ ... b_min=pd.NamedAgg(column="B", aggfunc="min"),
1323+ ... c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
1324+ b_min c_sum
1325+ A
1326+ 1 1 -1.956929
1327+ 2 3 -0.322183
1328+
1329+ - The keywords are the *output* column names
1330+ - The values are tuples whose first element is the column to select
1331+ and the second element is the aggregation to apply to that column.
1332+ Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
1333+ ``['column', 'aggfunc']`` to make it clearer what the arguments are.
1334+ As usual, the aggregation can be a callable or a string alias.
1335+
1336+ See :ref:`groupby.aggregate.named` for more.
12991337 """ )
13001338
13011339 @Substitution (see_also = _agg_see_also_doc ,
@@ -1304,7 +1342,7 @@ class DataFrameGroupBy(NDFrameGroupBy):
13041342 klass = 'DataFrame' ,
13051343 axis = '' )
13061344 @Appender (_shared_docs ['aggregate' ])
1307- def aggregate (self , arg , * args , ** kwargs ):
1345+ def aggregate (self , arg = None , * args , ** kwargs ):
13081346 return super ().aggregate (arg , * args , ** kwargs )
13091347
13101348 agg = aggregate
@@ -1577,3 +1615,77 @@ def groupby_series(obj, col=None):
15771615 return results
15781616
15791617 boxplot = boxplot_frame_groupby
1618+
1619+
1620+ def _is_multi_agg_with_relabel (** kwargs ):
1621+ """
1622+ Check whether the kwargs pass to .agg look like multi-agg with relabling.
1623+
1624+ Parameters
1625+ ----------
1626+ **kwargs : dict
1627+
1628+ Returns
1629+ -------
1630+ bool
1631+
1632+ Examples
1633+ --------
1634+ >>> _is_multi_agg_with_relabel(a='max')
1635+ False
1636+ >>> _is_multi_agg_with_relabel(a_max=('a', 'max'),
1637+ ... a_min=('a', 'min'))
1638+ True
1639+ >>> _is_multi_agg_with_relabel()
1640+ False
1641+ """
1642+ return all (
1643+ isinstance (v , tuple ) and len (v ) == 2
1644+ for v in kwargs .values ()
1645+ ) and kwargs
1646+
1647+
1648+ def _normalize_keyword_aggregation (kwargs ):
1649+ """
1650+ Normalize user-provided "named aggregation" kwargs.
1651+
1652+ Transforms from the new ``Dict[str, NamedAgg]`` style kwargs
1653+ to the old OrderedDict[str, List[scalar]]].
1654+
1655+ Parameters
1656+ ----------
1657+ kwargs : dict
1658+
1659+ Returns
1660+ -------
1661+ aggspec : dict
1662+ The transformed kwargs.
1663+ columns : List[str]
1664+ The user-provided keys.
1665+ order : List[Tuple[str, str]]
1666+ Pairs of the input and output column names.
1667+
1668+ Examples
1669+ --------
1670+ >>> _normalize_keyword_aggregation({'output': ('input', 'sum')})
1671+ (OrderedDict([('input', ['sum'])]), ('output',), [('input', 'sum')])
1672+ """
1673+ if not PY36 :
1674+ kwargs = OrderedDict (sorted (kwargs .items ()))
1675+
1676+ # Normalize the aggregation functions as Dict[column, List[func]],
1677+ # process normally, then fixup the names.
1678+ # TODO(Py35): When we drop python 3.5, change this to
1679+ # defaultdict(list)
1680+ aggspec = OrderedDict () # type: typing.OrderedDict[str, List[AggScalar]]
1681+ order = []
1682+ columns , pairs = list (zip (* kwargs .items ()))
1683+
1684+ for name , (column , aggfunc ) in zip (columns , pairs ):
1685+ if column in aggspec :
1686+ aggspec [column ].append (aggfunc )
1687+ else :
1688+ aggspec [column ] = [aggfunc ]
1689+ order .append ((column ,
1690+ com .get_callable_name (aggfunc ) or aggfunc ))
1691+ return aggspec , columns , order
0 commit comments