REF: de-duplicate wrap_agged_manager/wrap_aggregate_result (#51201)

jbrockmendel · web-flow · commit 8478cf63888d · 2023-02-06T15:12:14.000-08:00
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -163,15 +163,7 @@ def prop(self):
 
 class SeriesGroupBy(GroupBy[Series]):
     def _wrap_agged_manager(self, mgr: Manager) -> Series:
-        if mgr.ndim == 1:
-            mgr = cast(SingleManager, mgr)
-            single = mgr
-        else:
-            mgr = cast(Manager2D, mgr)
-            single = mgr.iget(0)
-        ser = self.obj._constructor(single, name=self.obj.name)
-        # NB: caller is responsible for setting ser.index
-        return ser
+        return self.obj._constructor(mgr, name=self.obj.name)
 
     def _get_data_to_aggregate(
         self, *, numeric_only: bool = False, name: str | None = None
@@ -1902,25 +1894,7 @@ def _indexed_output_to_ndframe(
         return result
 
     def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
-        if not self.as_index:
-            # GH 41998 - empty mgr always gets index of length 0
-            rows = mgr.shape[1] if mgr.shape[0] > 0 else 0
-            index = Index(range(rows))
-            mgr.set_axis(1, index)
-            result = self.obj._constructor(mgr)
-
-            result = self._insert_inaxis_grouper(result)
-            result = result._consolidate()
-        else:
-            index = self.grouper.result_index
-            mgr.set_axis(1, index)
-            result = self.obj._constructor(mgr)
-
-        if self.axis == 1:
-            result = result.T
-
-        # Note: we really only care about inferring numeric dtypes here
-        return self._reindex_output(result).infer_objects(copy=False)
+        return self.obj._constructor(mgr)
 
     def _iterate_column_groupbys(self, obj: DataFrame | Series):
         for i, colname in enumerate(obj.columns):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1501,7 +1501,6 @@ def _cython_agg_general(
         #  that goes through SeriesGroupBy
 
         data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how)
-        is_ser = data.ndim == 1
 
         def array_func(values: ArrayLike) -> ArrayLike:
             try:
@@ -1523,16 +1522,12 @@ def array_func(values: ArrayLike) -> ArrayLike:
             return result
 
         new_mgr = data.grouped_reduce(array_func)
-
         res = self._wrap_agged_manager(new_mgr)
-        if is_ser:
-            if self.as_index:
-                res.index = self.grouper.result_index
-            else:
-                res = self._insert_inaxis_grouper(res)
-            return self._reindex_output(res)
-        else:
-            return res
+        out = self._wrap_aggregated_output(res)
+        if data.ndim == 2:
+            # TODO: don't special-case DataFrame vs Series
+            out = out.infer_objects(copy=False)
+        return out
 
     def _cython_transform(
         self, how: str, numeric_only: bool = False, axis: AxisInt = 0, **kwargs
@@ -1793,19 +1788,14 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
             return counted
 
         new_mgr = data.grouped_reduce(hfunc)
+        new_obj = self._wrap_agged_manager(new_mgr)
 
         # If we are grouping on categoricals we want unobserved categories to
         # return zero, rather than the default of NaN which the reindexing in
-        # _wrap_agged_manager() returns. GH 35028
+        # _wrap_aggregated_output() returns. GH 35028
         # e.g. test_dataframe_groupby_on_2_categoricals_when_observed_is_false
         with com.temp_setattr(self, "observed", True):
-            result = self._wrap_agged_manager(new_mgr)
-
-        if result.ndim == 1:
-            if self.as_index:
-                result.index = self.grouper.result_index
-            else:
-                result = self._insert_inaxis_grouper(result)
+            result = self._wrap_aggregated_output(new_obj)
 
         return self._reindex_output(result, fill_value=0)
 
@@ -2790,9 +2780,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         mgr = obj._mgr
         res_mgr = mgr.apply(blk_func)
 
-        new_obj = obj._constructor(res_mgr)
-        if isinstance(new_obj, Series):
-            new_obj.name = obj.name
+        new_obj = self._wrap_agged_manager(res_mgr)
 
         if self.axis == 1:
             # Only relevant for DataFrameGroupBy
@@ -3197,15 +3185,10 @@ def blk_func(values: ArrayLike) -> ArrayLike:
                 out = out.reshape(ncols, ngroups * nqs)
             return post_processor(out, inference, result_mask, orig_vals)
 
-        obj = self._obj_with_exclusions
-        is_ser = obj.ndim == 1
         data = self._get_data_to_aggregate(numeric_only=numeric_only, name="quantile")
         res_mgr = data.grouped_reduce(blk_func)
 
-        if is_ser:
-            res = self._wrap_agged_manager(res_mgr)
-        else:
-            res = obj._constructor(res_mgr)
+        res = self._wrap_agged_manager(res_mgr)
 
         if orig_scalar:
             # Avoid expensive MultiIndex construction
@@ -3652,19 +3635,12 @@ def blk_func(values: ArrayLike) -> ArrayLike:
 
             return result.T
 
-        obj = self._obj_with_exclusions
-
         # Operate block-wise instead of column-by-column
-        is_ser = obj.ndim == 1
         mgr = self._get_data_to_aggregate(numeric_only=numeric_only, name=how)
 
         res_mgr = mgr.grouped_reduce(blk_func)
 
-        if is_ser:
-            out = self._wrap_agged_manager(res_mgr)
-        else:
-            out = obj._constructor(res_mgr)
-
+        out = self._wrap_agged_manager(res_mgr)
         return self._wrap_aggregated_output(out)
 
     @final
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -947,9 +947,10 @@ def grouped_reduce(self: T, func: Callable) -> T:
             result_indices.append(i)
 
         if len(result_arrays) == 0:
-            index = Index([None])  # placeholder
+            nrows = 0
         else:
-            index = Index(range(result_arrays[0].shape[0]))
+            nrows = result_arrays[0].shape[0]
+        index = Index(range(nrows))
 
         columns = self.items
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1538,9 +1538,10 @@ def grouped_reduce(self: T, func: Callable) -> T:
                 result_blocks = extend_blocks(applied, result_blocks)
 
         if len(result_blocks) == 0:
-            index = Index([None])  # placeholder
+            nrows = 0
         else:
-            index = Index(range(result_blocks[0].values.shape[-1]))
+            nrows = result_blocks[0].values.shape[-1]
+        index = Index(range(nrows))
 
         return type(self).from_blocks(result_blocks, [self.axes[0], index])