Skip to content
Merged
13 changes: 13 additions & 0 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,19 @@ def time_rolling_offset(self, method):
getattr(self.groupby_roll_offset, method)()


class Groupby2:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you rename to GroupbyLowNumberOfGroups or similar

# https://github.com/pandas-dev/pandas/issues/38038
# specific example where the rolling operation on a larger dataframe
# is relatively cheap, but creation of MultiIndex of result can be expensive

def setup(self):
N = 100000
self.df = pd.DataFrame({"A": [1, 2] * int(N / 2), "B": np.random.randn(N)})

def time_rolling_multiindex_creation(self):
self.df.groupby("A").rolling(3).mean()


class GroupbyEWM:

params = ["cython", "numba"]
Expand Down
37 changes: 21 additions & 16 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@

from pandas.core.aggregation import aggregate
from pandas.core.base import DataError, SelectionMixin
import pandas.core.common as com
from pandas.core.construction import extract_array
from pandas.core.groupby.base import GotItemMixin, ShallowMixin
from pandas.core.indexes.api import Index, MultiIndex
Expand Down Expand Up @@ -791,22 +790,28 @@ def _apply(
# Our result will have still kept the column in the result
result = result.drop(columns=column_keys, errors="ignore")

result_index_data = []
for key, values in self._groupby.grouper.indices.items():
for value in values:
data = [
*com.maybe_make_list(key),
*com.maybe_make_list(
grouped_object_index[value]
if grouped_object_index is not None
else []
),
]
result_index_data.append(tuple(data))

result_index = MultiIndex.from_tuples(
result_index_data, names=result_index_names
codes = self._groupby.grouper.codes
levels = self._groupby.grouper.levels

group_indices = self._groupby.grouper.indices.values()
if group_indices:
indexer = np.concatenate(list(self._groupby.grouper.indices.values()))
Copy link
Member

@mroeschke mroeschke Dec 1, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I think this can simply be indexer = np.concatenate(list(group_indices))

else:
indexer = np.array([], dtype=np.intp)
codes = [c.take(indexer) for c in codes]

if grouped_object_index is not None:
if isinstance(grouped_object_index, MultiIndex):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe more clear

idx = grouped_object_index.take(indexer)
if not isinstance(grouped_object_index, MultiIndex):
    idx = MultiIndex.from_arrays(idx)

idx = grouped_object_index.take(indexer)
else:
idx = MultiIndex.from_arrays([grouped_object_index.take(indexer)])
codes.extend(list(idx.codes))
levels.extend(list(idx.levels))

result_index = MultiIndex(
levels, codes, names=result_index_names, verify_integrity=False
)

result.index = result_index
return result

Expand Down