Skip to content

Commit 4017e9c

Browse files
authored
DEPR: groupby with as_index=False not including out-of-axis groupings (#52333)
* DEPR: groupby with as_index=False not including out-of-axis groupings * Fix warning msg in tests * fixup docs
1 parent e7343f7 commit 4017e9c

File tree

7 files changed

+57
-15
lines changed

7 files changed

+57
-15
lines changed

Diff for: doc/source/whatsnew/v0.15.1.rst

+7-2
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,14 @@ API changes
7070
7171
current behavior:
7272

73-
.. ipython:: python
73+
.. code-block:: ipython
7474
75-
df.groupby(ts, as_index=False).max()
75+
In [4]: df.groupby(ts, as_index=False).max()
76+
Out[4]:
77+
jim joe
78+
0 72 83
79+
1 77 84
80+
2 96 65
7681
7782
- ``groupby`` will not erroneously exclude columns if the column name conflicts
7883
with the grouper name (:issue:`8112`):

Diff for: doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ Deprecations
223223
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
224224
- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
225225
- Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
226+
- Deprecated :class:`.DataFrameGroupBy` with ``as_index=False`` not including groupings in the result when they are not columns of the DataFrame (:issue:`49519`)
226227
- Deprecated :func:`is_categorical_dtype`, use ``isinstance(obj.dtype, pd.CategoricalDtype)`` instead (:issue:`52527`)
227228
- Deprecated :func:`is_datetime64tz_dtype`, check ``isinstance(dtype, pd.DatetimeTZDtype)`` instead (:issue:`52607`)
228229
- Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`)

Diff for: pandas/core/groupby/groupby.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -1244,8 +1244,21 @@ def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
12441244
):
12451245
# GH #28549
12461246
# When using .apply(-), name will be in columns already
1247-
if in_axis and name not in columns:
1248-
result.insert(0, name, lev)
1247+
if name not in columns:
1248+
if in_axis:
1249+
result.insert(0, name, lev)
1250+
else:
1251+
msg = (
1252+
"A grouping was used that is not in the columns of the "
1253+
"DataFrame and so was excluded from the result. This grouping "
1254+
"will be included in a future version of pandas. Add the "
1255+
"grouping as a column of the DataFrame to silence this warning."
1256+
)
1257+
warnings.warn(
1258+
message=msg,
1259+
category=FutureWarning,
1260+
stacklevel=find_stack_level(),
1261+
)
12491262

12501263
return result
12511264

Diff for: pandas/tests/groupby/test_categorical.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,9 @@ def test_as_index():
771771

772772
# function grouper
773773
f = lambda r: df.loc[r, "A"]
774-
result = df.groupby(["cat", f], as_index=False, observed=True).sum()
774+
msg = "A grouping .* was excluded from the result"
775+
with tm.assert_produces_warning(FutureWarning, match=msg):
776+
result = df.groupby(["cat", f], as_index=False, observed=True).sum()
775777
expected = DataFrame(
776778
{
777779
"cat": Categorical([1, 2], categories=df.cat.cat.categories),
@@ -784,7 +786,9 @@ def test_as_index():
784786

785787
# another not in-axis grouper (conflicting names in index)
786788
s = Series(["a", "b", "b"], name="cat")
787-
result = df.groupby(["cat", s], as_index=False, observed=True).sum()
789+
msg = "A grouping .* was excluded from the result"
790+
with tm.assert_produces_warning(FutureWarning, match=msg):
791+
result = df.groupby(["cat", s], as_index=False, observed=True).sum()
788792
tm.assert_frame_equal(result, expected)
789793

790794
# is original index dropped?

Diff for: pandas/tests/groupby/test_groupby.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -244,18 +244,26 @@ def f(x, q=None, axis=0):
244244
# DataFrame
245245
for as_index in [True, False]:
246246
df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index)
247-
agg_result = df_grouped.agg(np.percentile, 80, axis=0)
248-
apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
249-
expected = df_grouped.quantile(0.8)
247+
warn = None if as_index else FutureWarning
248+
msg = "A grouping .* was excluded from the result"
249+
with tm.assert_produces_warning(warn, match=msg):
250+
agg_result = df_grouped.agg(np.percentile, 80, axis=0)
251+
with tm.assert_produces_warning(warn, match=msg):
252+
apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
253+
with tm.assert_produces_warning(warn, match=msg):
254+
expected = df_grouped.quantile(0.8)
250255
tm.assert_frame_equal(apply_result, expected, check_names=False)
251256
tm.assert_frame_equal(agg_result, expected)
252257

253258
apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8])
254-
expected_seq = df_grouped.quantile([0.4, 0.8])
259+
with tm.assert_produces_warning(warn, match=msg):
260+
expected_seq = df_grouped.quantile([0.4, 0.8])
255261
tm.assert_frame_equal(apply_result, expected_seq, check_names=False)
256262

257-
agg_result = df_grouped.agg(f, q=80)
258-
apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
263+
with tm.assert_produces_warning(warn, match=msg):
264+
agg_result = df_grouped.agg(f, q=80)
265+
with tm.assert_produces_warning(warn, match=msg):
266+
apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
259267
tm.assert_frame_equal(agg_result, expected)
260268
tm.assert_frame_equal(apply_result, expected, check_names=False)
261269

@@ -266,7 +274,10 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
266274
tsframe.columns = ["A", "B", "A", "C"]
267275
gb = tsframe.groupby(lambda x: x.month, as_index=as_index)
268276

269-
res = gb.agg(np.percentile, 80, axis=0)
277+
warn = None if as_index else FutureWarning
278+
msg = "A grouping .* was excluded from the result"
279+
with tm.assert_produces_warning(warn, match=msg):
280+
res = gb.agg(np.percentile, 80, axis=0)
270281

271282
ex_data = {
272283
1: tsframe[tsframe.index.month == 1].quantile(0.8),

Diff for: pandas/tests/groupby/test_groupby_dropna.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,13 @@ def test_categorical_reducers(
576576
gb_keepna = df.groupby(
577577
keys, dropna=False, observed=observed, sort=sort, as_index=as_index
578578
)
579-
result = getattr(gb_keepna, reduction_func)(*args)
579+
if as_index or index_kind == "range" or reduction_func == "size":
580+
warn = None
581+
else:
582+
warn = FutureWarning
583+
msg = "A grouping .* was excluded from the result"
584+
with tm.assert_produces_warning(warn, match=msg):
585+
result = getattr(gb_keepna, reduction_func)(*args)
580586

581587
# size will return a Series, others are DataFrame
582588
tm.assert_equal(result, expected)

Diff for: pandas/tests/groupby/test_grouping.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1085,7 +1085,9 @@ def test_grouping_by_key_is_in_axis():
10851085

10861086
# Currently only in-axis groupings are including in the result when as_index=False;
10871087
# This is likely to change in the future.
1088-
result = gb.sum()
1088+
msg = "A grouping .* was excluded from the result"
1089+
with tm.assert_produces_warning(FutureWarning, match=msg):
1090+
result = gb.sum()
10891091
expected = DataFrame({"b": [1, 2], "c": [7, 5]})
10901092
tm.assert_frame_equal(result, expected)
10911093

0 commit comments

Comments
 (0)