Skip to content

Commit 956a5b0

Browse files
feat: Support builtins funcs for df.agg (#2256)
1 parent 9f497a6 commit 956a5b0

File tree

4 files changed

+35
-9
lines changed

4 files changed

+35
-9
lines changed

bigframes/core/groupby/dataframe_group_by.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,7 @@ def _agg_func(self, func) -> df.DataFrame:
593593
def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
594594
aggregations: typing.List[agg_expressions.Aggregation] = []
595595
column_labels = []
596+
function_labels = []
596597

597598
want_aggfunc_level = any(utils.is_list_like(aggs) for aggs in func.values())
598599

@@ -602,8 +603,10 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
602603
funcs_for_id if utils.is_list_like(funcs_for_id) else [funcs_for_id]
603604
)
604605
for f in func_list:
605-
aggregations.append(aggs.agg(col_id, agg_ops.lookup_agg_func(f)[0]))
606+
f_op, f_label = agg_ops.lookup_agg_func(f)
607+
aggregations.append(aggs.agg(col_id, f_op))
606608
column_labels.append(label)
609+
function_labels.append(f_label)
607610
agg_block, _ = self._block.aggregate(
608611
by_column_ids=self._by_col_ids,
609612
aggregations=aggregations,
@@ -613,10 +616,7 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
613616
agg_block = agg_block.with_column_labels(
614617
utils.combine_indices(
615618
pd.Index(column_labels),
616-
pd.Index(
617-
typing.cast(agg_ops.AggregateOp, agg.op).name
618-
for agg in aggregations
619-
),
619+
pd.Index(function_labels),
620620
)
621621
)
622622
else:

bigframes/operations/aggregations.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -717,9 +717,15 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
717717
np.all: all_op,
718718
np.any: any_op,
719719
np.unique: nunique_op,
720-
# TODO(b/443252872): Solve
721-
# list: ArrayAggOp(),
722720
np.size: size_op,
721+
# TODO(b/443252872): Solve
722+
list: ArrayAggOp(),
723+
len: size_op,
724+
sum: sum_op,
725+
min: min_op,
726+
max: max_op,
727+
any: any_op,
728+
all: all_op,
723729
}
724730

725731

tests/system/small/test_dataframe.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6151,6 +6151,28 @@ def test_agg_with_dict_strs(scalars_dfs):
61516151
)
61526152

61536153

6154+
def test_df_agg_with_builtins(scalars_dfs):
6155+
bf_df, pd_df = scalars_dfs
6156+
6157+
bf_result = (
6158+
bf_df[["int64_col", "bool_col"]]
6159+
.dropna()
6160+
.groupby(bf_df.int64_too % 2)
6161+
.agg({"int64_col": [len, sum, min, max, list], "bool_col": [all, any, max]})
6162+
.to_pandas()
6163+
)
6164+
pd_result = (
6165+
pd_df[["int64_col", "bool_col"]]
6166+
.dropna()
6167+
.groupby(pd_df.int64_too % 2)
6168+
.agg({"int64_col": [len, sum, min, max, list], "bool_col": [all, any, max]})
6169+
)
6170+
6171+
pd.testing.assert_frame_equal(
6172+
bf_result, pd_result, check_dtype=False, check_index_type=False
6173+
)
6174+
6175+
61546176
def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
61556177
bf_df, _ = scalars_dfs
61566178
agg_funcs = {

tests/system/small/test_groupby.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,8 +282,6 @@ def test_dataframe_groupby_agg_dict_with_list(
282282
)
283283
bf_result_computed = bf_result.to_pandas()
284284

285-
# some inconsistency between versions, so normalize to bigframes behavior
286-
pd_result = pd_result.rename({"amax": "max"}, axis="columns")
287285
pd.testing.assert_frame_equal(
288286
pd_result, bf_result_computed, check_dtype=False, check_index_type=False
289287
)

0 commit comments

Comments
 (0)