Skip to content

Commit fe2ef37

Browse files
authored
CLN: Enforce deprecation of groupby with as_index=False excluding out-of-axis groupings (#57741)
* CLN: Enforce deprecation of groupby with as_index=False excluding out-of-axis groupings * type annotation fixup
1 parent b89b2f1 commit fe2ef37

File tree

7 files changed

+62
-72
lines changed

7 files changed

+62
-72
lines changed

Diff for: doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ Removal of prior version deprecations/changes
191191
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
192192
- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`)
193193
- :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
194+
- :meth:`DataFrame.groupby` with ``as_index=False`` and aggregation methods will no longer exclude from the result the groupings that do not arise from the input (:issue:`49519`)
194195
- :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`)
195196
- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
196197
- All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)

Diff for: pandas/core/groupby/groupby.py

+35-27
Original file line numberDiff line numberDiff line change
@@ -1286,34 +1286,43 @@ def _set_result_index_ordered(
12861286
return result
12871287

12881288
@final
1289-
def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
1289+
def _insert_inaxis_grouper(
1290+
self, result: Series | DataFrame, qs: npt.NDArray[np.float64] | None = None
1291+
) -> DataFrame:
12901292
if isinstance(result, Series):
12911293
result = result.to_frame()
12921294

1295+
n_groupings = len(self._grouper.groupings)
1296+
1297+
if qs is not None:
1298+
result.insert(
1299+
0, f"level_{n_groupings}", np.tile(qs, len(result) // len(qs))
1300+
)
1301+
12931302
# zip in reverse so we can always insert at loc 0
1294-
columns = result.columns
1295-
for name, lev, in_axis in zip(
1296-
reversed(self._grouper.names),
1297-
reversed(self._grouper.get_group_levels()),
1298-
reversed([grp.in_axis for grp in self._grouper.groupings]),
1303+
for level, (name, lev, in_axis) in enumerate(
1304+
zip(
1305+
reversed(self._grouper.names),
1306+
reversed(self._grouper.get_group_levels()),
1307+
reversed([grp.in_axis for grp in self._grouper.groupings]),
1308+
)
12991309
):
1310+
if name is None:
1311+
# Behave the same as .reset_index() when a level is unnamed
1312+
name = (
1313+
"index"
1314+
if n_groupings == 1 and qs is None
1315+
else f"level_{n_groupings - level - 1}"
1316+
)
1317+
13001318
# GH #28549
13011319
# When using .apply(-), name will be in columns already
1302-
if name not in columns:
1303-
if in_axis:
1320+
if name not in result.columns:
1321+
# if in_axis:
1322+
if qs is None:
13041323
result.insert(0, name, lev)
13051324
else:
1306-
msg = (
1307-
"A grouping was used that is not in the columns of the "
1308-
"DataFrame and so was excluded from the result. This grouping "
1309-
"will be included in a future version of pandas. Add the "
1310-
"grouping as a column of the DataFrame to silence this warning."
1311-
)
1312-
warnings.warn(
1313-
message=msg,
1314-
category=FutureWarning,
1315-
stacklevel=find_stack_level(),
1316-
)
1325+
result.insert(0, name, Index(np.repeat(lev, len(qs))))
13171326

13181327
return result
13191328

@@ -1340,18 +1349,17 @@ def _wrap_aggregated_output(
13401349
if not self.as_index:
13411350
# `not self.as_index` is only relevant for DataFrameGroupBy,
13421351
# enforced in __init__
1343-
result = self._insert_inaxis_grouper(result)
1352+
result = self._insert_inaxis_grouper(result, qs=qs)
13441353
result = result._consolidate()
1345-
index = Index(range(self._grouper.ngroups))
1354+
result.index = RangeIndex(len(result))
13461355

13471356
else:
13481357
index = self._grouper.result_index
1349-
1350-
if qs is not None:
1351-
# We get here with len(qs) != 1 and not self.as_index
1352-
# in test_pass_args_kwargs
1353-
index = _insert_quantile_level(index, qs)
1354-
result.index = index
1358+
if qs is not None:
1359+
# We get here with len(qs) != 1 and not self.as_index
1360+
# in test_pass_args_kwargs
1361+
index = _insert_quantile_level(index, qs)
1362+
result.index = index
13551363

13561364
return result
13571365

Diff for: pandas/tests/groupby/aggregate/test_aggregate.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -1248,18 +1248,15 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
12481248
tsframe.columns = ["A", "B", "A", "C"]
12491249
gb = tsframe.groupby(lambda x: x.month, as_index=as_index)
12501250

1251-
warn = None if as_index else FutureWarning
1252-
msg = "A grouping .* was excluded from the result"
1253-
with tm.assert_produces_warning(warn, match=msg):
1254-
res = gb.agg(np.percentile, 80, axis=0)
1251+
res = gb.agg(np.percentile, 80, axis=0)
12551252

12561253
ex_data = {
12571254
1: tsframe[tsframe.index.month == 1].quantile(0.8),
12581255
2: tsframe[tsframe.index.month == 2].quantile(0.8),
12591256
}
12601257
expected = DataFrame(ex_data).T
12611258
if not as_index:
1262-
# TODO: try to get this more consistent?
1259+
expected.insert(0, "index", [1, 2])
12631260
expected.index = Index(range(2))
12641261

12651262
tm.assert_frame_equal(res, expected)

Diff for: pandas/tests/groupby/test_categorical.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -779,24 +779,27 @@ def test_as_index():
779779

780780
# function grouper
781781
f = lambda r: df.loc[r, "A"]
782-
msg = "A grouping .* was excluded from the result"
783-
with tm.assert_produces_warning(FutureWarning, match=msg):
784-
result = df.groupby(["cat", f], as_index=False, observed=True).sum()
782+
result = df.groupby(["cat", f], as_index=False, observed=True).sum()
785783
expected = DataFrame(
786784
{
787785
"cat": Categorical([1, 2], categories=df.cat.cat.categories),
786+
"level_1": [10, 11],
788787
"A": [10, 22],
789788
"B": [101, 205],
790789
},
791-
columns=["cat", "A", "B"],
792790
)
793791
tm.assert_frame_equal(result, expected)
794792

795793
# another not in-axis grouper (conflicting names in index)
796794
s = Series(["a", "b", "b"], name="cat")
797-
msg = "A grouping .* was excluded from the result"
798-
with tm.assert_produces_warning(FutureWarning, match=msg):
799-
result = df.groupby(["cat", s], as_index=False, observed=True).sum()
795+
result = df.groupby(["cat", s], as_index=False, observed=True).sum()
796+
expected = DataFrame(
797+
{
798+
"cat": ["a", "b"],
799+
"A": [10, 22],
800+
"B": [101, 205],
801+
},
802+
)
800803
tm.assert_frame_equal(result, expected)
801804

802805
# is original index dropped?
@@ -1852,7 +1855,7 @@ def test_category_order_reducer(
18521855
request, as_index, sort, observed, reduction_func, index_kind, ordered
18531856
):
18541857
# GH#48749
1855-
if reduction_func == "corrwith" and not as_index:
1858+
if reduction_func == "corrwith" and not as_index and index_kind != "single":
18561859
msg = "GH#49950 - corrwith with as_index=False may not have grouping column"
18571860
request.applymarker(pytest.mark.xfail(reason=msg))
18581861
elif index_kind != "range" and not as_index:

Diff for: pandas/tests/groupby/test_groupby.py

+10-14
Original file line numberDiff line numberDiff line change
@@ -103,26 +103,22 @@ def f(x, q=None, axis=0):
103103
# DataFrame
104104
for as_index in [True, False]:
105105
df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index)
106-
warn = None if as_index else FutureWarning
107-
msg = "A grouping .* was excluded from the result"
108-
with tm.assert_produces_warning(warn, match=msg):
109-
agg_result = df_grouped.agg(np.percentile, 80, axis=0)
110-
with tm.assert_produces_warning(warn, match=msg):
111-
apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
112-
with tm.assert_produces_warning(warn, match=msg):
113-
expected = df_grouped.quantile(0.8)
106+
agg_result = df_grouped.agg(np.percentile, 80, axis=0)
107+
apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
108+
expected = df_grouped.quantile(0.8)
114109
tm.assert_frame_equal(apply_result, expected, check_names=False)
115110
tm.assert_frame_equal(agg_result, expected)
116111

117112
apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8])
118-
with tm.assert_produces_warning(warn, match=msg):
119-
expected_seq = df_grouped.quantile([0.4, 0.8])
113+
expected_seq = df_grouped.quantile([0.4, 0.8])
114+
if not as_index:
115+
# apply treats the op as a transform; .quantile knows it's a reduction
116+
apply_result = apply_result.reset_index()
117+
apply_result["level_0"] = [1, 1, 2, 2]
120118
tm.assert_frame_equal(apply_result, expected_seq, check_names=False)
121119

122-
with tm.assert_produces_warning(warn, match=msg):
123-
agg_result = df_grouped.agg(f, q=80)
124-
with tm.assert_produces_warning(warn, match=msg):
125-
apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
120+
agg_result = df_grouped.agg(f, q=80)
121+
apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
126122
tm.assert_frame_equal(agg_result, expected)
127123
tm.assert_frame_equal(apply_result, expected, check_names=False)
128124

Diff for: pandas/tests/groupby/test_groupby_dropna.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -552,11 +552,6 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki
552552
expected = expected.set_index(["x", "x2"])
553553
else:
554554
expected = expected.set_index("x")
555-
elif index_kind != "range" and reduction_func != "size":
556-
# size, unlike other methods, has the desired behavior in GH#49519
557-
expected = expected.drop(columns="x")
558-
if index_kind == "multi":
559-
expected = expected.drop(columns="x2")
560555
if reduction_func in ("idxmax", "idxmin") and index_kind != "range":
561556
# expected was computed with a RangeIndex; need to translate to index values
562557
values = expected["y"].values.tolist()
@@ -572,13 +567,7 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki
572567
if as_index:
573568
expected = expected["size"].rename(None)
574569

575-
if as_index or index_kind == "range" or reduction_func == "size":
576-
warn = None
577-
else:
578-
warn = FutureWarning
579-
msg = "A grouping .* was excluded from the result"
580-
with tm.assert_produces_warning(warn, match=msg):
581-
result = getattr(gb_keepna, reduction_func)(*args)
570+
result = getattr(gb_keepna, reduction_func)(*args)
582571

583572
# size will return a Series, others are DataFrame
584573
tm.assert_equal(result, expected)

Diff for: pandas/tests/groupby/test_grouping.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1125,12 +1125,8 @@ def test_grouping_by_key_is_in_axis():
11251125
assert not gb._grouper.groupings[0].in_axis
11261126
assert gb._grouper.groupings[1].in_axis
11271127

1128-
# Currently only in-axis groupings are including in the result when as_index=False;
1129-
# This is likely to change in the future.
1130-
msg = "A grouping .* was excluded from the result"
1131-
with tm.assert_produces_warning(FutureWarning, match=msg):
1132-
result = gb.sum()
1133-
expected = DataFrame({"b": [1, 2], "c": [7, 5]})
1128+
result = gb.sum()
1129+
expected = DataFrame({"a": [1, 2], "b": [1, 2], "c": [7, 5]})
11341130
tm.assert_frame_equal(result, expected)
11351131

11361132

0 commit comments

Comments
 (0)