Skip to content

Commit 650a981

Browse files
Fix CFTimeIndex-related errors stemming from updates in pandas (#3764)
* Allow subtraction of a generic Index of cftime.datetimes from a CFTimeIndex * black * Test that NotImplemented logic works * Vendor _get_nearest_indexer and _filter_indexer_tolerance * Test OverflowError in __rsub__ * Fix name of pandas method in docstring * Add what's new entries * Enable use of tolerance greater than 292 years * newlinw Co-authored-by: Deepak Cherian <[email protected]>
1 parent 739b347 commit 650a981

File tree

3 files changed

+113
-10
lines changed

3 files changed

+113
-10
lines changed

doc/whats-new.rst

+6
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ Internal Changes
9696
- Changed test_open_mfdataset_list_attr to only run with dask installed
9797
(:issue:`3777`, :pull:`3780`).
9898
By `Bruno Pagani <https://github.com/ArchangeGabriel>`_.
99+
- Preserved the ability to index with ``method="nearest"`` with a
100+
:py:class:`CFTimeIndex` with pandas versions greater than 1.0.1
101+
(:issue:`3751`). By `Spencer Clark <https://github.com/spencerkclark>`_.
102+
- Greater flexibility and improved test coverage of subtracting various types
103+
of objects from a :py:class:`CFTimeIndex`. By `Spencer Clark
104+
<https://github.com/spencerkclark>`_.
99105
- Updated Azure CI MacOS image, given pending removal.
100106
By `Maximilian Roos <https://github.com/max-sixty>`_
101107
- Removed xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`).

xarray/coding/cftimeindex.py

+46-8
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949

5050
from xarray.core.utils import is_scalar
5151

52+
from ..core.common import _contains_cftime_datetimes
5253
from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name
5354

5455

@@ -326,6 +327,32 @@ def _get_string_slice(self, key):
326327
raise KeyError(key)
327328
return loc
328329

330+
def _get_nearest_indexer(self, target, limit, tolerance):
331+
"""Adapted from pandas.Index._get_nearest_indexer"""
332+
left_indexer = self.get_indexer(target, "pad", limit=limit)
333+
right_indexer = self.get_indexer(target, "backfill", limit=limit)
334+
left_distances = abs(self.values[left_indexer] - target.values)
335+
right_distances = abs(self.values[right_indexer] - target.values)
336+
337+
if self.is_monotonic_increasing:
338+
condition = (left_distances < right_distances) | (right_indexer == -1)
339+
else:
340+
condition = (left_distances <= right_distances) | (right_indexer == -1)
341+
indexer = np.where(condition, left_indexer, right_indexer)
342+
343+
if tolerance is not None:
344+
indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
345+
return indexer
346+
347+
def _filter_indexer_tolerance(self, target, indexer, tolerance):
348+
"""Adapted from pandas.Index._filter_indexer_tolerance"""
349+
if isinstance(target, pd.Index):
350+
distance = abs(self.values[indexer] - target.values)
351+
else:
352+
distance = abs(self.values[indexer] - target)
353+
indexer = np.where(distance <= tolerance, indexer, -1)
354+
return indexer
355+
329356
def get_loc(self, key, method=None, tolerance=None):
330357
"""Adapted from pandas.tseries.index.DatetimeIndex.get_loc"""
331358
if isinstance(key, str):
@@ -427,24 +454,29 @@ def __radd__(self, other):
427454
return CFTimeIndex(other + np.array(self))
428455

429456
def __sub__(self, other):
430-
import cftime
431-
432-
if isinstance(other, (CFTimeIndex, cftime.datetime)):
457+
if _contains_datetime_timedeltas(other):
458+
return CFTimeIndex(np.array(self) - other)
459+
elif isinstance(other, pd.TimedeltaIndex):
460+
return CFTimeIndex(np.array(self) - other.to_pytimedelta())
461+
elif _contains_cftime_datetimes(np.array(other)):
433462
try:
434463
return pd.TimedeltaIndex(np.array(self) - np.array(other))
435464
except OverflowError:
436465
raise ValueError(
437466
"The time difference exceeds the range of values "
438467
"that can be expressed at the nanosecond resolution."
439468
)
440-
441-
elif isinstance(other, pd.TimedeltaIndex):
442-
return CFTimeIndex(np.array(self) - other.to_pytimedelta())
443469
else:
444-
return CFTimeIndex(np.array(self) - other)
470+
return NotImplemented
445471

446472
def __rsub__(self, other):
447-
return pd.TimedeltaIndex(other - np.array(self))
473+
try:
474+
return pd.TimedeltaIndex(other - np.array(self))
475+
except OverflowError:
476+
raise ValueError(
477+
"The time difference exceeds the range of values "
478+
"that can be expressed at the nanosecond resolution."
479+
)
448480

449481
def to_datetimeindex(self, unsafe=False):
450482
"""If possible, convert this index to a pandas.DatetimeIndex.
@@ -633,6 +665,12 @@ def _parse_array_of_cftime_strings(strings, date_type):
633665
).reshape(strings.shape)
634666

635667

668+
def _contains_datetime_timedeltas(array):
669+
"""Check if an input array contains datetime.timedelta objects."""
670+
array = np.atleast_1d(array)
671+
return isinstance(array[0], timedelta)
672+
673+
636674
def _cftimeindex_from_i8(values, date_type, name):
637675
"""Construct a CFTimeIndex from an array of integers.
638676

xarray/tests/test_cftimeindex.py

+61-2
Original file line numberDiff line numberDiff line change
@@ -451,10 +451,21 @@ def test_sel_date_scalar(da, date_type, index):
451451

452452

453453
@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751")
454+
@requires_cftime
455+
def test_sel_date_distant_date(da, date_type, index):
456+
expected = xr.DataArray(4).assign_coords(time=index[3])
457+
result = da.sel(time=date_type(2000, 1, 1), method="nearest")
458+
assert_identical(result, expected)
459+
460+
454461
@requires_cftime
455462
@pytest.mark.parametrize(
456463
"sel_kwargs",
457-
[{"method": "nearest"}, {"method": "nearest", "tolerance": timedelta(days=70)}],
464+
[
465+
{"method": "nearest"},
466+
{"method": "nearest", "tolerance": timedelta(days=70)},
467+
{"method": "nearest", "tolerance": timedelta(days=1800000)},
468+
],
458469
)
459470
def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs):
460471
expected = xr.DataArray(2).assign_coords(time=index[1])
@@ -738,7 +749,7 @@ def test_timedeltaindex_add_cftimeindex(calendar):
738749

739750

740751
@requires_cftime
741-
def test_cftimeindex_sub(index):
752+
def test_cftimeindex_sub_timedelta(index):
742753
date_type = index.date_type
743754
expected_dates = [
744755
date_type(1, 1, 2),
@@ -753,6 +764,27 @@ def test_cftimeindex_sub(index):
753764
assert isinstance(result, CFTimeIndex)
754765

755766

767+
@requires_cftime
768+
@pytest.mark.parametrize(
769+
"other",
770+
[np.array(4 * [timedelta(days=1)]), np.array(timedelta(days=1))],
771+
ids=["1d-array", "scalar-array"],
772+
)
773+
def test_cftimeindex_sub_timedelta_array(index, other):
774+
date_type = index.date_type
775+
expected_dates = [
776+
date_type(1, 1, 2),
777+
date_type(1, 2, 2),
778+
date_type(2, 1, 2),
779+
date_type(2, 2, 2),
780+
]
781+
expected = CFTimeIndex(expected_dates)
782+
result = index + timedelta(days=2)
783+
result = result - other
784+
assert result.equals(expected)
785+
assert isinstance(result, CFTimeIndex)
786+
787+
756788
@requires_cftime
757789
@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
758790
def test_cftimeindex_sub_cftimeindex(calendar):
@@ -784,6 +816,14 @@ def test_cftime_datetime_sub_cftimeindex(calendar):
784816
assert isinstance(result, pd.TimedeltaIndex)
785817

786818

819+
@requires_cftime
820+
@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
821+
def test_distant_cftime_datetime_sub_cftimeindex(calendar):
822+
a = xr.cftime_range("2000", periods=5, calendar=calendar)
823+
with pytest.raises(ValueError, match="difference exceeds"):
824+
a.date_type(1, 1, 1) - a
825+
826+
787827
@requires_cftime
788828
@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
789829
def test_cftimeindex_sub_timedeltaindex(calendar):
@@ -795,6 +835,25 @@ def test_cftimeindex_sub_timedeltaindex(calendar):
795835
assert isinstance(result, CFTimeIndex)
796836

797837

838+
@requires_cftime
839+
@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
840+
def test_cftimeindex_sub_index_of_cftime_datetimes(calendar):
841+
a = xr.cftime_range("2000", periods=5, calendar=calendar)
842+
b = pd.Index(a.values)
843+
expected = a - a
844+
result = a - b
845+
assert result.equals(expected)
846+
assert isinstance(result, pd.TimedeltaIndex)
847+
848+
849+
@requires_cftime
850+
@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
851+
def test_cftimeindex_sub_not_implemented(calendar):
852+
a = xr.cftime_range("2000", periods=5, calendar=calendar)
853+
with pytest.raises(TypeError, match="unsupported operand"):
854+
a - 1
855+
856+
798857
@requires_cftime
799858
def test_cftimeindex_rsub(index):
800859
with pytest.raises(TypeError):

0 commit comments

Comments
 (0)