Skip to content

BUG: Series.resample fails on NaT index #39229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 26, 2021
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ Groupby/resample/rolling
- Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`)
- Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`)
- Bug in :meth:`.DataFrameGroupBy.idxmin` and :meth:`.DataFrameGroupBy.idxmax` with ``ExtensionDtype`` columns (:issue:`38733`)
- Bug in :meth:`Series.resample` would raise when the index was a :class:`PeriodIndex` consisting of ``NaT`` (:issue:`39227`)

Reshaping
^^^^^^^^^
Expand Down
31 changes: 22 additions & 9 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,8 @@ def _wrap_result(self, result):

if isinstance(result, ABCSeries) and result.empty:
obj = self.obj
result.index = _asfreq_compat(obj.index, freq=self.freq)
# When index is all NaT, result is empty but index is not
result.index = _asfreq_compat(obj.index[:0], freq=self.freq)
result.name = getattr(obj, "name", None)

return result
Expand Down Expand Up @@ -1651,10 +1652,14 @@ def _get_period_bins(self, ax: PeriodIndex):
nat_count = np.sum(memb._isnan)
memb = memb[~memb._isnan]

# if index contains no valid (non-NaT) values, return empty index
if not len(memb):
# index contains no valid (non-NaT) values
bins = np.array([], dtype=np.int64)
binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name)
return binner, [], labels
if len(ax) > 0:
# index is all NaT
binner, bins, labels = _insert_nat_bin(binner, bins, labels, len(ax))
return binner, bins, labels

freq_mult = self.freq.n

Expand Down Expand Up @@ -1700,12 +1705,7 @@ def _get_period_bins(self, ax: PeriodIndex):
bins = memb.searchsorted(prng, side="left")

if nat_count > 0:
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
# shift bins by the number of NaT
bins += nat_count
bins = np.insert(bins, 0, nat_count)
binner = binner.insert(0, NaT)
labels = labels.insert(0, NaT)
binner, bins, labels = _insert_nat_bin(binner, bins, labels, nat_count)

return binner, bins, labels

Expand Down Expand Up @@ -1849,6 +1849,19 @@ def _get_period_range_edges(
return first, last


def _insert_nat_bin(
binner: PeriodIndex, bins: np.ndarray, labels: PeriodIndex, nat_count: int
) -> Tuple[PeriodIndex, np.ndarray, PeriodIndex]:
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
# shift bins by the number of NaT
assert nat_count > 0
bins += nat_count
bins = np.insert(bins, 0, nat_count)
binner = binner.insert(0, NaT)
labels = labels.insert(0, NaT)
return binner, bins, labels


def _adjust_dates_anchored(
first, last, freq, closed="right", origin="start_day", offset=None
):
Expand Down
26 changes: 25 additions & 1 deletion pandas/tests/resample/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pytest

from pandas import DataFrame, Series
from pandas import DataFrame, NaT, PeriodIndex, Series
import pandas._testing as tm
from pandas.core.groupby.groupby import DataError
from pandas.core.groupby.grouper import Grouper
Expand Down Expand Up @@ -110,6 +110,30 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method):
tm.assert_series_equal(result, expected, check_dtype=False)


@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_nat_index_series(request, freq, series, resample_method):
# GH39227

if freq == "M":
request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails"))

s = series.copy()
s.index = PeriodIndex([NaT] * len(s), freq=freq)
result = getattr(s.resample(freq), resample_method)()

if resample_method == "ohlc":
expected = DataFrame(
[], index=s.index[:0].copy(), columns=["open", "high", "low", "close"]
)
tm.assert_frame_equal(result, expected, check_dtype=False)
else:
expected = s[:0].copy()
tm.assert_series_equal(result, expected, check_dtype=False)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq


@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
@pytest.mark.parametrize("resample_method", ["count", "size"])
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/resample/test_period_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,9 +787,9 @@ def test_resample_with_nat(self, periods, values, freq, expected_values):
def test_resample_with_only_nat(self):
# GH 13224
pi = PeriodIndex([pd.NaT] * 3, freq="S")
frame = DataFrame([2, 3, 5], index=pi)
frame = DataFrame([2, 3, 5], index=pi, columns=["a"])
expected_index = PeriodIndex(data=[], freq=pi.freq)
expected = DataFrame(index=expected_index)
expected = DataFrame(index=expected_index, columns=["a"], dtype="int64")
result = frame.resample("1s").mean()
tm.assert_frame_equal(result, expected)

Expand Down