Skip to content

Commit 62527c0

Browse files
P-Tillmannjreback
P-Tillmann
authored andcommitted
Bug: groupby multiindex levels equals rows (#16859)
closes #16843
1 parent 6993c1b commit 62527c0

File tree

3 files changed

+19
-5
lines changed

3 files changed

+19
-5
lines changed

Diff for: doc/source/whatsnew/v0.21.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ Groupby/Resample/Rolling
383383
- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`)
384384
- Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`)
385385
- Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`)
386-
386+
- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`)
387387

388388
Sparse
389389
^^^^^^

Diff for: pandas/core/groupby.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -2629,13 +2629,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
26292629

26302630
try:
26312631
if isinstance(obj, DataFrame):
2632-
all_in_columns = all(g in obj.columns for g in keys)
2632+
all_in_columns_index = all(g in obj.columns or g in obj.index.names
2633+
for g in keys)
26332634
else:
2634-
all_in_columns = False
2635+
all_in_columns_index = False
26352636
except Exception:
2636-
all_in_columns = False
2637+
all_in_columns_index = False
26372638

2638-
if not any_callable and not all_in_columns and \
2639+
if not any_callable and not all_in_columns_index and \
26392640
not any_arraylike and not any_groupers and \
26402641
match_axis_length and level is None:
26412642
keys = [com._asarray_tuplesafe(keys)]

Diff for: pandas/tests/groupby/test_groupby.py

+13
Original file line numberDiff line numberDiff line change
@@ -3891,6 +3891,19 @@ def predictions(tool):
38913891
result = df2.groupby('Key').apply(predictions).p1
38923892
tm.assert_series_equal(expected, result)
38933893

3894+
def test_gb_key_len_equal_axis_len(self):
3895+
# GH16843
3896+
# test ensures that index and column keys are recognized correctly
3897+
# when number of keys equals axis length of groupby
3898+
df = pd.DataFrame([['foo', 'bar', 'B', 1],
3899+
['foo', 'bar', 'B', 2],
3900+
['foo', 'baz', 'C', 3]],
3901+
columns=['first', 'second', 'third', 'one'])
3902+
df = df.set_index(['first', 'second'])
3903+
df = df.groupby(['first', 'second', 'third']).size()
3904+
assert df.loc[('foo', 'bar', 'B')] == 2
3905+
assert df.loc[('foo', 'baz', 'C')] == 1
3906+
38943907

38953908
def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
38963909
tups = lmap(tuple, df[keys].values)

0 commit comments

Comments
 (0)