Skip to content

Commit 6d771fc

Browse files
Preserve base and loffset arguments in resample (#7444)
* [test-upstream] Preserve base and loffset arguments in resample While pandas is getting set to remove these, we have not had a chance to emit a deprecation warning yet for them in xarray. This should hopefully give users some extra time to adapt. * Emit warning when base is not None Co-authored-by: Deepak Cherian <[email protected]> * Modify warnings to refer loffset and base as parameters; add tests * Add type validation for loffset arguments * Add typing and support for pd.Timedelta as an loffset * pd.Timedelta is a subclass of datetime.timedelta * [test-upstream] Remove unneeded skipif * Fix failing tests * [test-upstream] Add return type to tests * [test-upstream] Update documentation * [test-upstream] Fix mypy errors in tests * Move _convert_base_to_offset to pdcompat and add a few more tests * Use offset instead of base in documentation --------- Co-authored-by: Deepak Cherian <[email protected]>
1 parent 821dc24 commit 6d771fc

File tree

8 files changed

+239
-64
lines changed

8 files changed

+239
-64
lines changed

doc/user-guide/weather-climate.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:
233233

234234
.. ipython:: python
235235
236-
da.resample(time="81T", closed="right", label="right", base=3).mean()
236+
da.resample(time="81T", closed="right", label="right", offset="3T").mean()
237237
238238
.. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations
239239
.. _ISO 8601 standard: https://en.wikipedia.org/wiki/ISO_8601

doc/whats-new.rst

+7
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ Breaking changes
3434

3535
Deprecations
3636
~~~~~~~~~~~~
37+
- Following pandas, the ``base`` and ``loffset`` parameters of
38+
:py:meth:`xr.DataArray.resample` and :py:meth:`xr.Dataset.resample` have been
39+
deprecated and will be removed in a future version of xarray. Using the
40+
``origin`` or ``offset`` parameters is recommended as a replacement for using
41+
the ``base`` parameter and using time offset arithmetic is recommended as a
42+
replacement for using the ``loffset`` parameter (:pull:`8459`). By `Spencer
43+
Clark <https://github.com/spencerkclark>`_.
3744

3845

3946
Bug fixes

xarray/core/common.py

+48-22
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,14 @@
1313
from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops
1414
from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed
1515
from xarray.core.options import OPTIONS, _get_keep_attrs
16+
from xarray.core.pdcompat import _convert_base_to_offset
1617
from xarray.core.pycompat import is_duck_dask_array
17-
from xarray.core.utils import Frozen, either_dict_or_kwargs, is_scalar
18+
from xarray.core.utils import (
19+
Frozen,
20+
either_dict_or_kwargs,
21+
emit_user_level_warning,
22+
is_scalar,
23+
)
1824

1925
try:
2026
import cftime
@@ -845,6 +851,12 @@ def _resample(
845851
For frequencies that evenly subdivide 1 day, the "origin" of the
846852
aggregated intervals. For example, for "24H" frequency, base could
847853
range from 0 through 23.
854+
855+
.. deprecated:: 2023.03.0
856+
Following pandas, the ``base`` parameter is deprecated in favor
857+
of the ``origin`` and ``offset`` parameters, and will be removed
858+
in a future version of xarray.
859+
848860
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
849861
The datetime on which to adjust the grouping. The timezone of origin
850862
must match the timezone of the index.
@@ -860,6 +872,12 @@ def _resample(
860872
loffset : timedelta or str, optional
861873
Offset used to adjust the resampled time labels. Some pandas date
862874
offset strings are supported.
875+
876+
.. deprecated:: 2023.03.0
877+
Following pandas, the ``loffset`` parameter is deprecated in favor
878+
of using time offset arithmetic, and will be removed in a future
879+
version of xarray.
880+
863881
restore_coord_dims : bool, optional
864882
If True, also restore the dimension order of multi-dimensional
865883
coordinates.
@@ -930,8 +948,8 @@ def _resample(
930948
"""
931949
# TODO support non-string indexer after removing the old API.
932950

933-
from xarray.coding.cftimeindex import CFTimeIndex
934951
from xarray.core.dataarray import DataArray
952+
from xarray.core.groupby import TimeResampleGrouper
935953
from xarray.core.resample import RESAMPLE_DIM
936954

937955
if keep_attrs is not None:
@@ -961,28 +979,36 @@ def _resample(
961979
dim_name: Hashable = dim
962980
dim_coord = self[dim]
963981

964-
if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex):
965-
from xarray.core.resample_cftime import CFTimeGrouper
966-
967-
grouper = CFTimeGrouper(
968-
freq=freq,
969-
closed=closed,
970-
label=label,
971-
base=base,
972-
loffset=loffset,
973-
origin=origin,
974-
offset=offset,
982+
if loffset is not None:
983+
emit_user_level_warning(
984+
"Following pandas, the `loffset` parameter to resample will be deprecated "
985+
"in a future version of xarray. Switch to using time offset arithmetic.",
986+
FutureWarning,
975987
)
976-
else:
977-
grouper = pd.Grouper(
978-
freq=freq,
979-
closed=closed,
980-
label=label,
981-
base=base,
982-
offset=offset,
983-
origin=origin,
984-
loffset=loffset,
988+
989+
if base is not None:
990+
emit_user_level_warning(
991+
"Following pandas, the `base` parameter to resample will be deprecated in "
992+
"a future version of xarray. Switch to using `origin` or `offset` instead.",
993+
FutureWarning,
985994
)
995+
996+
if base is not None and offset is not None:
997+
raise ValueError("base and offset cannot be present at the same time")
998+
999+
if base is not None:
1000+
index = self._indexes[dim_name].to_pandas_index()
1001+
offset = _convert_base_to_offset(base, freq, index)
1002+
1003+
grouper = TimeResampleGrouper(
1004+
freq=freq,
1005+
closed=closed,
1006+
label=label,
1007+
origin=origin,
1008+
offset=offset,
1009+
loffset=loffset,
1010+
)
1011+
9861012
group = DataArray(
9871013
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
9881014
)

xarray/core/groupby.py

+65-13
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
from xarray.core.dataarray import DataArray
4242
from xarray.core.dataset import Dataset
43+
from xarray.core.types import DatetimeLike, SideOptions
4344
from xarray.core.utils import Frozen
4445

4546
GroupKey = Any
@@ -245,7 +246,10 @@ def _unique_and_monotonic(group: T_Group) -> bool:
245246
return index.is_unique and index.is_monotonic_increasing
246247

247248

248-
def _apply_loffset(grouper, result):
249+
def _apply_loffset(
250+
loffset: str | pd.DateOffset | datetime.timedelta | pd.Timedelta,
251+
result: pd.Series | pd.DataFrame,
252+
):
249253
"""
250254
(copied from pandas)
251255
if loffset is set, offset the result index
@@ -258,17 +262,25 @@ def _apply_loffset(grouper, result):
258262
result : Series or DataFrame
259263
the result of resample
260264
"""
265+
# pd.Timedelta is a subclass of datetime.timedelta so we do not need to
266+
# include it in instance checks.
267+
if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta)):
268+
raise ValueError(
269+
f"`loffset` must be a str, pd.DateOffset, datetime.timedelta, or pandas.Timedelta object. "
270+
f"Got {loffset}."
271+
)
272+
273+
if isinstance(loffset, str):
274+
loffset = pd.tseries.frequencies.to_offset(loffset)
261275

262276
needs_offset = (
263-
isinstance(grouper.loffset, (pd.DateOffset, datetime.timedelta))
277+
isinstance(loffset, (pd.DateOffset, datetime.timedelta))
264278
and isinstance(result.index, pd.DatetimeIndex)
265279
and len(result.index) > 0
266280
)
267281

268282
if needs_offset:
269-
result.index = result.index + grouper.loffset
270-
271-
grouper.loffset = None
283+
result.index = result.index + loffset
272284

273285

274286
class GroupBy(Generic[T_Xarray]):
@@ -530,14 +542,7 @@ def __repr__(self) -> str:
530542
)
531543

532544
def _get_index_and_items(self, index, grouper):
533-
from xarray.core.resample_cftime import CFTimeGrouper
534-
535-
s = pd.Series(np.arange(index.size), index)
536-
if isinstance(grouper, CFTimeGrouper):
537-
first_items = grouper.first_items(index)
538-
else:
539-
first_items = s.groupby(grouper).first()
540-
_apply_loffset(grouper, first_items)
545+
first_items = grouper.first_items(index)
541546
full_index = first_items.index
542547
if first_items.isnull().any():
543548
first_items = first_items.dropna()
@@ -1365,3 +1370,50 @@ class DatasetGroupBy( # type: ignore[misc]
13651370
ImplementsDatasetReduce,
13661371
):
13671372
__slots__ = ()
1373+
1374+
1375+
class TimeResampleGrouper:
1376+
def __init__(
1377+
self,
1378+
freq: str,
1379+
closed: SideOptions | None,
1380+
label: SideOptions | None,
1381+
origin: str | DatetimeLike,
1382+
offset: pd.Timedelta | datetime.timedelta | str | None,
1383+
loffset: datetime.timedelta | str | None,
1384+
):
1385+
self.freq = freq
1386+
self.closed = closed
1387+
self.label = label
1388+
self.origin = origin
1389+
self.offset = offset
1390+
self.loffset = loffset
1391+
1392+
def first_items(self, index):
1393+
from xarray import CFTimeIndex
1394+
from xarray.core.resample_cftime import CFTimeGrouper
1395+
1396+
if isinstance(index, CFTimeIndex):
1397+
grouper = CFTimeGrouper(
1398+
freq=self.freq,
1399+
closed=self.closed,
1400+
label=self.label,
1401+
origin=self.origin,
1402+
offset=self.offset,
1403+
loffset=self.loffset,
1404+
)
1405+
return grouper.first_items(index)
1406+
else:
1407+
s = pd.Series(np.arange(index.size), index)
1408+
grouper = pd.Grouper(
1409+
freq=self.freq,
1410+
closed=self.closed,
1411+
label=self.label,
1412+
origin=self.origin,
1413+
offset=self.offset,
1414+
)
1415+
1416+
first_items = s.groupby(grouper).first()
1417+
if self.loffset is not None:
1418+
_apply_loffset(self.loffset, first_items)
1419+
return first_items

xarray/core/pdcompat.py

+23
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@
3838
from enum import Enum
3939
from typing import Literal
4040

41+
import pandas as pd
42+
43+
from xarray.coding import cftime_offsets
44+
4145

4246
def count_not_none(*args) -> int:
4347
"""Compute the number of non-None arguments.
@@ -68,3 +72,22 @@ def __repr__(self) -> str:
6872
_NoDefault.no_default
6973
) # Sentinel indicating the default value following pandas
7074
NoDefault = Literal[_NoDefault.no_default] # For typing following pandas
75+
76+
77+
def _convert_base_to_offset(base, freq, index):
78+
"""Required until we officially deprecate the base argument to resample. This
79+
translates a provided `base` argument to an `offset` argument, following logic
80+
from pandas.
81+
"""
82+
from xarray.coding.cftimeindex import CFTimeIndex
83+
84+
if isinstance(index, pd.DatetimeIndex):
85+
freq = pd.tseries.frequencies.to_offset(freq)
86+
if isinstance(freq, pd.offsets.Tick):
87+
return pd.Timedelta(base * freq.nanos // freq.n)
88+
elif isinstance(index, CFTimeIndex):
89+
freq = cftime_offsets.to_offset(freq)
90+
if isinstance(freq, cftime_offsets.Tick):
91+
return base * freq.as_timedelta() // freq.n
92+
else:
93+
raise ValueError("Can only resample using a DatetimeIndex or CFTimeIndex.")

xarray/core/resample_cftime.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -71,18 +71,13 @@ def __init__(
7171
freq: str | BaseCFTimeOffset,
7272
closed: SideOptions | None = None,
7373
label: SideOptions | None = None,
74-
base: int | None = None,
7574
loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None,
7675
origin: str | CFTimeDatetime = "start_day",
7776
offset: str | datetime.timedelta | None = None,
7877
):
7978
self.offset: datetime.timedelta | None
8079
self.closed: SideOptions
8180
self.label: SideOptions
82-
83-
if base is not None and offset is not None:
84-
raise ValueError("base and offset cannot be provided at the same time")
85-
8681
self.freq = to_offset(freq)
8782
self.loffset = loffset
8883
self.origin = origin
@@ -122,9 +117,6 @@ def __init__(
122117
else:
123118
self.label = label
124119

125-
if base is not None and isinstance(self.freq, Tick):
126-
offset = type(self.freq)(n=base % self.freq.n).as_timedelta()
127-
128120
if offset is not None:
129121
try:
130122
self.offset = _convert_offset_to_timedelta(offset)
@@ -150,6 +142,16 @@ def first_items(self, index: CFTimeIndex):
150142
index, self.freq, self.closed, self.label, self.origin, self.offset
151143
)
152144
if self.loffset is not None:
145+
if not isinstance(
146+
self.loffset, (str, datetime.timedelta, BaseCFTimeOffset)
147+
):
148+
# BaseCFTimeOffset is not public API so we do not include it in
149+
# the error message for now.
150+
raise ValueError(
151+
f"`loffset` must be a str or datetime.timedelta object. "
152+
f"Got {self.loffset}."
153+
)
154+
153155
if isinstance(self.loffset, datetime.timedelta):
154156
labels = labels + self.loffset
155157
else:

0 commit comments

Comments
 (0)