Skip to content

#1621 optional decode timedelta #4071

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ New Features
feature requires cftime version 1.1.0 or greater. By
`Spencer Clark <https://github.com/spencerkclark>`_.

- Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`,
(:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`,
:py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas
independently of time decoding (:issue:`1621`)
`Aureliana Barghini <https://github.com/aurghs>`

Bug fixes
~~~~~~~~~
- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue`3977`)
Expand Down
16 changes: 16 additions & 0 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def open_dataset(
drop_variables=None,
backend_kwargs=None,
use_cftime=None,
decode_timedelta=None,
):
"""Open and decode a dataset from a file or file-like object.

Expand Down Expand Up @@ -383,6 +384,11 @@ def open_dataset(
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.

Returns
-------
Expand Down Expand Up @@ -435,6 +441,7 @@ def open_dataset(
decode_times = False
concat_characters = False
decode_coords = False
decode_timedelta = False

if cache is None:
cache = chunks is None
Expand All @@ -451,6 +458,7 @@ def maybe_decode_store(store, lock=False):
decode_coords=decode_coords,
drop_variables=drop_variables,
use_cftime=use_cftime,
decode_timedelta=decode_timedelta,
)

_protect_dataset_variables_inplace(ds, cache)
Expand All @@ -477,6 +485,7 @@ def maybe_decode_store(store, lock=False):
chunks,
drop_variables,
use_cftime,
decode_timedelta,
Copy link
Member

@shoyer shoyer May 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory, it would be better organized to add this argument after decode_times, but for now what you have done here putting it at the end is a good choice to preserve backwards compatibility. I made a note about making most open_dataset arguments keyword only in #4080.

)
name_prefix = "open_dataset-%s" % token
ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
Expand Down Expand Up @@ -561,6 +570,7 @@ def open_dataarray(
drop_variables=None,
backend_kwargs=None,
use_cftime=None,
decode_timedelta=None,
):
"""Open an DataArray from a file or file-like object containing a single
data variable.
Expand Down Expand Up @@ -640,6 +650,11 @@ def open_dataarray(
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.

Notes
-----
Expand Down Expand Up @@ -671,6 +686,7 @@ def open_dataarray(
drop_variables=drop_variables,
backend_kwargs=backend_kwargs,
use_cftime=use_cftime,
decode_timedelta=decode_timedelta,
)

if len(dataset.data_vars) != 1:
Expand Down
8 changes: 8 additions & 0 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ def open_zarr(
drop_variables=None,
consolidated=False,
overwrite_encoded_chunks=False,
decode_timedelta=None,
**kwargs,
):
"""Load and decode a dataset from a Zarr store.
Expand Down Expand Up @@ -555,6 +556,11 @@ def open_zarr(
consolidated : bool, optional
Whether to open the store using zarr's consolidated metadata
capability. Only works for stores that have already been consolidated.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.

Returns
-------
Expand Down Expand Up @@ -605,6 +611,7 @@ def open_zarr(
decode_times = False
concat_characters = False
decode_coords = False
decode_timedelta = False

def maybe_decode_store(store, lock=False):
ds = conventions.decode_cf(
Expand All @@ -614,6 +621,7 @@ def maybe_decode_store(store, lock=False):
concat_characters=concat_characters,
decode_coords=decode_coords,
drop_variables=drop_variables,
decode_timedelta=decode_timedelta,
)

# TODO: this is where we would apply caching
Expand Down
21 changes: 16 additions & 5 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ def decode_cf_variable(
decode_endianness=True,
stack_char_dim=True,
use_cftime=None,
decode_timedelta=None,
):
"""
Decodes a variable which may hold CF encoded information.
Expand Down Expand Up @@ -315,6 +316,9 @@ def decode_cf_variable(
var = as_variable(var)
original_dtype = var.dtype

if decode_timedelta is None:
decode_timedelta = decode_times

if concat_characters:
if stack_char_dim:
var = strings.CharacterArrayCoder().decode(var, name=name)
Expand All @@ -328,12 +332,10 @@ def decode_cf_variable(
]:
var = coder.decode(var, name=name)

if decode_timedelta:
var = times.CFTimedeltaCoder().decode(var, name=name)
if decode_times:
for coder in [
times.CFTimedeltaCoder(),
times.CFDatetimeCoder(use_cftime=use_cftime),
]:
var = coder.decode(var, name=name)
var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name)

dimensions, data, attributes, encoding = variables.unpack_for_decoding(var)
# TODO(shoyer): convert everything below to use coders
Expand Down Expand Up @@ -442,6 +444,7 @@ def decode_cf_variables(
decode_coords=True,
drop_variables=None,
use_cftime=None,
decode_timedelta=None,
):
"""
Decode several CF encoded variables.
Expand Down Expand Up @@ -492,6 +495,7 @@ def stackable(dim):
decode_times=decode_times,
stack_char_dim=stack_char_dim,
use_cftime=use_cftime,
decode_timedelta=decode_timedelta,
)
if decode_coords:
var_attrs = new_vars[k].attrs
Expand All @@ -518,6 +522,7 @@ def decode_cf(
decode_coords=True,
drop_variables=None,
use_cftime=None,
decode_timedelta=None,
):
"""Decode the given Dataset or Datastore according to CF conventions into
a new Dataset.
Expand Down Expand Up @@ -552,6 +557,11 @@ def decode_cf(
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.

Returns
-------
Expand Down Expand Up @@ -583,6 +593,7 @@ def decode_cf(
decode_coords,
drop_variables=drop_variables,
use_cftime=use_cftime,
decode_timedelta=decode_timedelta,
)
ds = Dataset(vars, attrs=attrs)
ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
Expand Down
35 changes: 35 additions & 0 deletions xarray/tests/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,41 @@ def test_decode_dask_times(self):
conventions.decode_cf(original).chunk(),
)

def test_decode_cf_time_kwargs(self):
ds = Dataset.from_dict(
{
"coords": {
"timedelta": {
"data": np.array([1, 2, 3], dtype="int64"),
"dims": "timedelta",
"attrs": {"units": "days"},
},
"time": {
"data": np.array([1, 2, 3], dtype="int64"),
"dims": "time",
"attrs": {"units": "days since 2000-01-01"},
},
},
"dims": {"time": 3, "timedelta": 3},
"data_vars": {
"a": {"dims": ("time", "timedelta"), "data": np.ones((3, 3))},
},
}
)

dsc = conventions.decode_cf(ds)
assert dsc.timedelta.dtype == np.dtype("m8[ns]")
assert dsc.time.dtype == np.dtype("M8[ns]")
dsc = conventions.decode_cf(ds, decode_times=False)
assert dsc.timedelta.dtype == np.dtype("int64")
assert dsc.time.dtype == np.dtype("int64")
dsc = conventions.decode_cf(ds, decode_times=True, decode_timedelta=False)
assert dsc.timedelta.dtype == np.dtype("int64")
assert dsc.time.dtype == np.dtype("M8[ns]")
dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True)
assert dsc.timedelta.dtype == np.dtype("m8[ns]")
assert dsc.time.dtype == np.dtype("int64")


class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore):
def encode_variable(self, var):
Expand Down