diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 74fe7916523c5..493f50617aceb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1072,6 +1072,7 @@ Groupby/Resample/Rolling - Bug in ``.rolling()`` where ``pd.Timedelta`` or ``datetime.timedelta`` was not accepted as a ``window`` argument (:issue:`15440`) - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) +- Bug in ``.groupby()`` when calling ``first()`` or ``last()`` on TZ-aware timestamps (:issue:`15884`) Sparse ^^^^^^ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index fe764a099bb63..8091a976c07ee 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1162,11 +1162,10 @@ def first_compat(x, axis=0): def first(x): - x = np.asarray(x) x = x[notnull(x)] if len(x) == 0: return np.nan - return x[0] + return x.iloc[0] if isinstance(x, DataFrame): return x.apply(first, axis=axis) @@ -1177,11 +1176,10 @@ def last_compat(x, axis=0): def last(x): - x = np.asarray(x) x = x[notnull(x)] if len(x) == 0: return np.nan - return x[-1] + return x.iloc[-1] if isinstance(x, DataFrame): return x.apply(last, axis=axis) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c17c98c5448be..0582cb0b79903 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -81,6 +81,15 @@ def test_select_bad_cols(self): # will have to rethink regex if you change message! g[['A', 'C']] + def test_first_last_timestamp(self): + # GH15884 + df = pd.DataFrame({'time': [pd.Timestamp('2012-01-01 13:00:00+00:00')], + 'A': [3]}) + result = df.groupby('A', as_index=False).first() + assert_frame_equal(df, result) + result = df.groupby('A', as_index=False).last() + assert_frame_equal(df, result) + def test_first_last_nth(self): # tests for first / last / nth grouped = self.df.groupby('A')