Skip to content

Commit 567e823

Browse files
committed
BUG: GroupBy with TimeGrouper sorts unstably
1 parent f813425 commit 567e823

File tree

3 files changed

+31
-4
lines changed

3 files changed

+31
-4
lines changed

doc/source/whatsnew/v0.18.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -241,3 +241,5 @@ Bug Fixes
241241
- Bug in ``.describe()`` resets categorical columns information (:issue:`11558`)
242242
- Bug where ``loffset`` argument was not applied when calling ``resample().count()`` on a timeseries (:issue:`12725`)
243243
- ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`)
244+
245+
- Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`)

pandas/core/groupby.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,8 @@ def _set_grouper(self, obj, sort=False):
273273

274274
# possibly sort
275275
if (self.sort or sort) and not ax.is_monotonic:
276-
indexer = self.indexer = ax.argsort(kind='quicksort')
276+
# use stable sort to suport first, last, nth
277+
indexer = self.indexer = ax.argsort(kind='mergesort')
277278
ax = ax.take(indexer)
278279
obj = obj.take(indexer, axis=self.axis,
279280
convert=False, is_copy=False)

pandas/tseries/tests/test_resample.py

+27-3
Original file line numberDiff line numberDiff line change
@@ -2365,6 +2365,28 @@ def test_fails_on_no_datetime_index(self):
23652365
"got an instance of 'PeriodIndex'"):
23662366
df.groupby(TimeGrouper('D'))
23672367

2368+
def test_aaa_group_order(self):
2369+
# GH 12840
2370+
# check TimeGrouper perform stable sorts
2371+
n = 20
2372+
data = np.random.randn(n, 4)
2373+
df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
2374+
df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
2375+
datetime(2013, 1, 3), datetime(2013, 1, 4),
2376+
datetime(2013, 1, 5)] * 4
2377+
grouped = df.groupby(TimeGrouper(key='key', freq='D'))
2378+
2379+
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)),
2380+
df[::5])
2381+
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)),
2382+
df[1::5])
2383+
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)),
2384+
df[2::5])
2385+
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)),
2386+
df[3::5])
2387+
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)),
2388+
df[4::5])
2389+
23682390
def test_aggregate_normal(self):
23692391
# check TimeGrouper's aggregation is identical as normal groupby
23702392

@@ -2402,23 +2424,25 @@ def test_aggregate_normal(self):
24022424
periods=5, name='key')
24032425
dt_result = getattr(dt_grouped, func)()
24042426
assert_series_equal(expected, dt_result)
2405-
"""
2427+
2428+
# GH 7453
24062429
for func in ['first', 'last']:
24072430
expected = getattr(normal_grouped, func)()
24082431
expected.index = date_range(start='2013-01-01', freq='D',
24092432
periods=5, name='key')
24102433
dt_result = getattr(dt_grouped, func)()
24112434
assert_frame_equal(expected, dt_result)
24122435

2436+
# if TimeGrouper is used included, 'nth' doesn't work yet
2437+
2438+
"""
24132439
for func in ['nth']:
24142440
expected = getattr(normal_grouped, func)(3)
24152441
expected.index = date_range(start='2013-01-01',
24162442
freq='D', periods=5, name='key')
24172443
dt_result = getattr(dt_grouped, func)(3)
24182444
assert_frame_equal(expected, dt_result)
24192445
"""
2420-
# if TimeGrouper is used included, 'first','last' and 'nth' doesn't
2421-
# work yet
24222446

24232447
def test_aggregate_with_nat(self):
24242448
# check TimeGrouper's aggregation is identical as normal groupby

0 commit comments

Comments
 (0)