pandas-dev · simonjayhawkins · Dec 1, 2020 · Nov 25, 2020 · Nov 30, 2020 · Nov 30, 2020
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -225,6 +225,19 @@ def time_rolling_offset(self, method):
         getattr(self.groupby_roll_offset, method)()
 
 
+class Groupby2:
+    # https://github.com/pandas-dev/pandas/issues/38038
+    # specific example where the rolling operation on a larger dataframe
+    # is relatively cheap, but creation of MultiIndex of result can be expensive
+
+    def setup(self):
+        N = 100000
+        self.df = pd.DataFrame({"A": [1, 2] * int(N / 2), "B": np.random.randn(N)})
+
+    def time_rolling_multiindex_creation(self):
+        self.df.groupby("A").rolling(3).mean()
+
+
 class GroupbyEWM:
 
     params = ["cython", "numba"]

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -50,7 +50,6 @@
 
 from pandas.core.aggregation import aggregate
 from pandas.core.base import DataError, SelectionMixin
-import pandas.core.common as com
 from pandas.core.construction import extract_array
 from pandas.core.groupby.base import GotItemMixin, ShallowMixin
 from pandas.core.indexes.api import Index, MultiIndex
@@ -791,22 +790,28 @@ def _apply(
             # Our result will have still kept the column in the result
             result = result.drop(columns=column_keys, errors="ignore")
 
-        result_index_data = []
-        for key, values in self._groupby.grouper.indices.items():
-            for value in values:
-                data = [
-                    *com.maybe_make_list(key),
-                    *com.maybe_make_list(
-                        grouped_object_index[value]
-                        if grouped_object_index is not None
-                        else []
-                    ),
-                ]
-                result_index_data.append(tuple(data))
-
-        result_index = MultiIndex.from_tuples(
-            result_index_data, names=result_index_names
+        codes = self._groupby.grouper.codes
+        levels = self._groupby.grouper.levels
+
+        group_indices = self._groupby.grouper.indices.values()
+        if group_indices:
+            indexer = np.concatenate(list(self._groupby.grouper.indices.values()))
+        else:
+            indexer = np.array([], dtype=np.intp)
+        codes = [c.take(indexer) for c in codes]
+
+        if grouped_object_index is not None:
+            if isinstance(grouped_object_index, MultiIndex):
+                idx = grouped_object_index.take(indexer)
+            else:
+                idx = MultiIndex.from_arrays([grouped_object_index.take(indexer)])
+            codes.extend(list(idx.codes))
+            levels.extend(list(idx.levels))
+
+        result_index = MultiIndex(
+            levels, codes, names=result_index_names, verify_integrity=False
         )
+
         result.index = result_index
         return result