Skip to content

Commit cc74d3a

Browse files
Add days_in_year and decimal_year to dt accessor (#9105)
* Add days_in_year and decimal_year to dt accessor * Upd whats new - add gregorian calendar - rename to decimal_year * Add to api.rst and pr number * Add requires cftime decorators where needed * Rewrite functions using suggestions from review * cleaner custom date field - docstrings - remove bad merge * add new fields to dask access test * Revert to rollback method * Revert "Revert to rollback method" This reverts commit 3f429c9. * explicit float cast? * Revert back to rollback method * Fix dask compatibility issues * Approach that passes tests under NumPy 1.26.4 * Adapt decimal_year test to be more comprehensive * Use proper sphinx roles for cross-referencing. --------- Co-authored-by: Spencer Clark <[email protected]>
1 parent cea354f commit cc74d3a

File tree

6 files changed

+161
-52
lines changed

6 files changed

+161
-52
lines changed

doc/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -529,9 +529,11 @@ Datetimelike properties
529529
DataArray.dt.quarter
530530
DataArray.dt.days_in_month
531531
DataArray.dt.daysinmonth
532+
DataArray.dt.days_in_year
532533
DataArray.dt.season
533534
DataArray.dt.time
534535
DataArray.dt.date
536+
DataArray.dt.decimal_year
535537
DataArray.dt.calendar
536538
DataArray.dt.is_month_start
537539
DataArray.dt.is_month_end

doc/whats-new.rst

+7
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@ v2024.07.1 (unreleased)
2222

2323
New Features
2424
~~~~~~~~~~~~
25+
26+
- Add :py:attr:`~core.accessor_dt.DatetimeAccessor.days_in_year` and :py:attr:`~core.accessor_dt.DatetimeAccessor.decimal_year` to the Datetime accessor on DataArrays. (:pull:`9105`).
27+
By `Pascal Bourgault <https://github.com/aulemahal>`_.
28+
29+
Performance
30+
~~~~~~~~~~~
31+
2532
- Make chunk manager an option in ``set_options`` (:pull:`9362`).
2633
By `Tom White <https://github.com/tomwhite>`_.
2734
- Support for :ref:`grouping by multiple variables <groupby.multiple>`.

xarray/coding/calendar_ops.py

+63-52
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@
99
_should_cftime_be_used,
1010
convert_times,
1111
)
12-
from xarray.core.common import _contains_datetime_like_objects, is_np_datetime_like
12+
from xarray.core.common import (
13+
_contains_datetime_like_objects,
14+
full_like,
15+
is_np_datetime_like,
16+
)
17+
from xarray.core.computation import apply_ufunc
1318

1419
try:
1520
import cftime
@@ -25,16 +30,6 @@
2530
]
2631

2732

28-
def _days_in_year(year, calendar, use_cftime=True):
29-
"""Return the number of days in the input year according to the input calendar."""
30-
date_type = get_date_type(calendar, use_cftime=use_cftime)
31-
if year == -1 and calendar in _CALENDARS_WITHOUT_YEAR_ZERO:
32-
difference = date_type(year + 2, 1, 1) - date_type(year, 1, 1)
33-
else:
34-
difference = date_type(year + 1, 1, 1) - date_type(year, 1, 1)
35-
return difference.days
36-
37-
3833
def convert_calendar(
3934
obj,
4035
calendar,
@@ -191,11 +186,7 @@ def convert_calendar(
191186
# Special case for conversion involving 360_day calendar
192187
if align_on == "year":
193188
# Instead of translating dates directly, this tries to keep the position within a year similar.
194-
new_doy = time.groupby(f"{dim}.year").map(
195-
_interpolate_day_of_year,
196-
target_calendar=calendar,
197-
use_cftime=use_cftime,
198-
)
189+
new_doy = _interpolate_day_of_year(time, target_calendar=calendar)
199190
elif align_on == "random":
200191
# The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year.
201192
new_doy = time.groupby(f"{dim}.year").map(
@@ -242,16 +233,25 @@ def convert_calendar(
242233
return out
243234

244235

245-
def _interpolate_day_of_year(time, target_calendar, use_cftime):
246-
"""Returns the nearest day in the target calendar of the corresponding
247-
"decimal year" in the source calendar.
248-
"""
249-
year = int(time.dt.year[0])
250-
source_calendar = time.dt.calendar
236+
def _is_leap_year(years, calendar):
237+
func = np.vectorize(cftime.is_leap_year)
238+
return func(years, calendar=calendar)
239+
240+
241+
def _days_in_year(years, calendar):
242+
"""The number of days in the year according to given calendar."""
243+
if calendar == "360_day":
244+
return full_like(years, 360)
245+
return _is_leap_year(years, calendar).astype(int) + 365
246+
247+
248+
def _interpolate_day_of_year(times, target_calendar):
249+
"""Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar."""
250+
source_calendar = times.dt.calendar
251251
return np.round(
252-
_days_in_year(year, target_calendar, use_cftime)
253-
* time.dt.dayofyear
254-
/ _days_in_year(year, source_calendar, use_cftime)
252+
_days_in_year(times.dt.year, target_calendar)
253+
* times.dt.dayofyear
254+
/ _days_in_year(times.dt.year, source_calendar)
255255
).astype(int)
256256

257257

@@ -260,18 +260,18 @@ def _random_day_of_year(time, target_calendar, use_cftime):
260260
261261
Removes Feb 29th and five other days chosen randomly within five sections of 72 days.
262262
"""
263-
year = int(time.dt.year[0])
263+
year = time.dt.year[0]
264264
source_calendar = time.dt.calendar
265265
new_doy = np.arange(360) + 1
266266
rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5)
267267
if source_calendar == "360_day":
268268
for idx in rm_idx:
269269
new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1
270-
if _days_in_year(year, target_calendar, use_cftime) == 366:
270+
if _days_in_year(year, target_calendar) == 366:
271271
new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1
272272
elif target_calendar == "360_day":
273273
new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1)
274-
if _days_in_year(year, source_calendar, use_cftime) == 366:
274+
if _days_in_year(year, source_calendar) == 366:
275275
new_doy = np.insert(new_doy, 60, -1)
276276
return new_doy[time.dt.dayofyear - 1]
277277

@@ -304,32 +304,45 @@ def _convert_to_new_calendar_with_new_day_of_year(
304304
return np.nan
305305

306306

307-
def _datetime_to_decimal_year(times, dim="time", calendar=None):
308-
"""Convert a datetime DataArray to decimal years according to its calendar or the given one.
307+
def _decimal_year_cftime(time, year, days_in_year, *, date_class):
308+
year_start = date_class(year, 1, 1)
309+
delta = np.timedelta64(time - year_start, "ns")
310+
days_in_year = np.timedelta64(days_in_year, "D")
311+
return year + delta / days_in_year
312+
313+
314+
def _decimal_year_numpy(time, year, days_in_year, *, dtype):
315+
time = np.asarray(time).astype(dtype)
316+
year_start = np.datetime64(int(year) - 1970, "Y").astype(dtype)
317+
delta = time - year_start
318+
days_in_year = np.timedelta64(days_in_year, "D")
319+
return year + delta / days_in_year
320+
321+
322+
def _decimal_year(times):
323+
"""Convert a datetime DataArray to decimal years according to its calendar.
309324
310325
The decimal year of a timestamp is its year plus its sub-year component
311326
converted to the fraction of its year.
312327
Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar,
313328
2000.16301 in a "noleap" or 2000.16806 in a "360_day".
314329
"""
315-
from xarray.core.dataarray import DataArray
316-
317-
calendar = calendar or times.dt.calendar
318-
319-
if is_np_datetime_like(times.dtype):
320-
times = times.copy(data=convert_times(times.values, get_date_type("standard")))
321-
322-
def _make_index(time):
323-
year = int(time.dt.year[0])
324-
doys = cftime.date2num(time, f"days since {year:04d}-01-01", calendar=calendar)
325-
return DataArray(
326-
year + doys / _days_in_year(year, calendar),
327-
dims=(dim,),
328-
coords=time.coords,
329-
name=dim,
330-
)
331-
332-
return times.groupby(f"{dim}.year").map(_make_index)
330+
if times.dtype == "O":
331+
function = _decimal_year_cftime
332+
kwargs = {"date_class": get_date_type(times.dt.calendar, True)}
333+
else:
334+
function = _decimal_year_numpy
335+
kwargs = {"dtype": times.dtype}
336+
return apply_ufunc(
337+
function,
338+
times,
339+
times.dt.year,
340+
times.dt.days_in_year,
341+
kwargs=kwargs,
342+
vectorize=True,
343+
dask="parallelized",
344+
output_dtypes=[np.float64],
345+
)
333346

334347

335348
def interp_calendar(source, target, dim="time"):
@@ -372,9 +385,7 @@ def interp_calendar(source, target, dim="time"):
372385
f"Both 'source.{dim}' and 'target' must contain datetime objects."
373386
)
374387

375-
source_calendar = source[dim].dt.calendar
376388
target_calendar = target.dt.calendar
377-
378389
if (
379390
source[dim].time.dt.year == 0
380391
).any() and target_calendar in _CALENDARS_WITHOUT_YEAR_ZERO:
@@ -383,8 +394,8 @@ def interp_calendar(source, target, dim="time"):
383394
)
384395

385396
out = source.copy()
386-
out[dim] = _datetime_to_decimal_year(source[dim], dim=dim, calendar=source_calendar)
387-
target_idx = _datetime_to_decimal_year(target, dim=dim, calendar=target_calendar)
397+
out[dim] = _decimal_year(source[dim])
398+
target_idx = _decimal_year(target)
388399
out = out.interp(**{dim: target_idx})
389400
out[dim] = target
390401
return out

xarray/coding/cftimeindex.py

+5
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,11 @@ def round(self, freq):
801801
"""
802802
return self._round_via_method(freq, _round_to_nearest_half_even)
803803

804+
@property
805+
def is_leap_year(self):
806+
func = np.vectorize(cftime.is_leap_year)
807+
return func(self.year, calendar=self.calendar)
808+
804809

805810
def _parse_iso8601_without_reso(date_type, datetime_str):
806811
date, _ = _parse_iso8601_with_reso(date_type, datetime_str)

xarray/core/accessor_dt.py

+29
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
import numpy as np
77
import pandas as pd
88

9+
from xarray.coding.calendar_ops import _decimal_year
910
from xarray.coding.times import infer_calendar_name
1011
from xarray.core import duck_array_ops
1112
from xarray.core.common import (
1213
_contains_datetime_like_objects,
14+
full_like,
1315
is_np_datetime_like,
1416
is_np_timedelta_like,
1517
)
@@ -543,6 +545,33 @@ def calendar(self) -> CFCalendar:
543545
"""
544546
return infer_calendar_name(self._obj.data)
545547

548+
@property
549+
def days_in_year(self) -> T_DataArray:
550+
"""Each datetime as the year plus the fraction of the year elapsed."""
551+
if self.calendar == "360_day":
552+
result = full_like(self.year, 360)
553+
else:
554+
result = self.is_leap_year.astype(int) + 365
555+
newvar = Variable(
556+
dims=self._obj.dims,
557+
attrs=self._obj.attrs,
558+
encoding=self._obj.encoding,
559+
data=result,
560+
)
561+
return self._obj._replace(newvar, name="days_in_year")
562+
563+
@property
564+
def decimal_year(self) -> T_DataArray:
565+
"""Convert the dates as a fractional year."""
566+
result = _decimal_year(self._obj)
567+
newvar = Variable(
568+
dims=self._obj.dims,
569+
attrs=self._obj.attrs,
570+
encoding=self._obj.encoding,
571+
data=result,
572+
)
573+
return self._obj._replace(newvar, name="decimal_year")
574+
546575

547576
class TimedeltaAccessor(TimeAccessor[T_DataArray]):
548577
"""Access Timedelta fields for DataArrays with Timedelta-like dtypes.

xarray/tests/test_accessor_dt.py

+55
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,17 @@ def test_strftime(self) -> None:
142142
"2000-01-01 01:00:00" == self.data.time.dt.strftime("%Y-%m-%d %H:%M:%S")[1]
143143
)
144144

145+
@requires_cftime
146+
@pytest.mark.parametrize(
147+
"calendar,expected",
148+
[("standard", 366), ("noleap", 365), ("360_day", 360), ("all_leap", 366)],
149+
)
150+
def test_days_in_year(self, calendar, expected) -> None:
151+
assert (
152+
self.data.convert_calendar(calendar, align_on="year").time.dt.days_in_year
153+
== expected
154+
).all()
155+
145156
def test_not_datetime_type(self) -> None:
146157
nontime_data = self.data.copy()
147158
int_data = np.arange(len(self.data.time)).astype("int8")
@@ -177,6 +188,7 @@ def test_not_datetime_type(self) -> None:
177188
"is_year_start",
178189
"is_year_end",
179190
"is_leap_year",
191+
"days_in_year",
180192
],
181193
)
182194
def test_dask_field_access(self, field) -> None:
@@ -698,3 +710,46 @@ def test_cftime_round_accessor(
698710
result = cftime_rounding_dataarray.dt.round(freq)
699711

700712
assert_identical(result, expected)
713+
714+
715+
@pytest.mark.parametrize(
716+
"use_cftime",
717+
[False, pytest.param(True, marks=requires_cftime)],
718+
ids=lambda x: f"use_cftime={x}",
719+
)
720+
@pytest.mark.parametrize(
721+
"use_dask",
722+
[False, pytest.param(True, marks=requires_dask)],
723+
ids=lambda x: f"use_dask={x}",
724+
)
725+
def test_decimal_year(use_cftime, use_dask) -> None:
726+
year = 2000
727+
periods = 10
728+
freq = "h"
729+
730+
shape = (2, 5)
731+
dims = ["x", "y"]
732+
hours_in_year = 24 * 366
733+
734+
times = xr.date_range(f"{year}", periods=periods, freq=freq, use_cftime=use_cftime)
735+
736+
da = xr.DataArray(times.values.reshape(shape), dims=dims)
737+
738+
if use_dask:
739+
da = da.chunk({"y": 2})
740+
# Computing the decimal year for a cftime datetime array requires a
741+
# number of small computes (6):
742+
# - 4x one compute per .dt accessor call (requires inspecting one
743+
# object-dtype array element to see if it is time-like)
744+
# - 2x one compute per calendar inference (requires inspecting one
745+
# array element to read off the calendar)
746+
max_computes = 6 * use_cftime
747+
with raise_if_dask_computes(max_computes=max_computes):
748+
result = da.dt.decimal_year
749+
else:
750+
result = da.dt.decimal_year
751+
752+
expected = xr.DataArray(
753+
year + np.arange(periods).reshape(shape) / hours_in_year, dims=dims
754+
)
755+
xr.testing.assert_equal(result, expected)

0 commit comments

Comments
 (0)