diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1993e543322..054996cbcd3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -60,6 +60,12 @@ New Features feature requires cftime version 1.1.0 or greater. By `Spencer Clark `_. +- Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`, + (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`, + :py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas + independently of time decoding (:issue:`1621`) + `Aureliana Barghini ` + Bug fixes ~~~~~~~~~ - ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue`3977`) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c7481e22b59..331d88be2e3 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -303,6 +303,7 @@ def open_dataset( drop_variables=None, backend_kwargs=None, use_cftime=None, + decode_timedelta=None, ): """Open and decode a dataset from a file or file-like object. @@ -383,6 +384,11 @@ def open_dataset( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- @@ -435,6 +441,7 @@ def open_dataset( decode_times = False concat_characters = False decode_coords = False + decode_timedelta = False if cache is None: cache = chunks is None @@ -451,6 +458,7 @@ def maybe_decode_store(store, lock=False): decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) _protect_dataset_variables_inplace(ds, cache) @@ -477,6 +485,7 @@ def maybe_decode_store(store, lock=False): chunks, drop_variables, use_cftime, + decode_timedelta, ) name_prefix = "open_dataset-%s" % token ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token) @@ -561,6 +570,7 @@ def open_dataarray( drop_variables=None, backend_kwargs=None, use_cftime=None, + decode_timedelta=None, ): """Open an DataArray from a file or file-like object containing a single data variable. @@ -640,6 +650,11 @@ def open_dataarray( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Notes ----- @@ -671,6 +686,7 @@ def open_dataarray( drop_variables=drop_variables, backend_kwargs=backend_kwargs, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) if len(dataset.data_vars) != 1: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 973c167911e..eb2be3086ef 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -496,6 +496,7 @@ def open_zarr( drop_variables=None, consolidated=False, overwrite_encoded_chunks=False, + decode_timedelta=None, **kwargs, ): """Load and decode a dataset from a Zarr store. @@ -555,6 +556,11 @@ def open_zarr( consolidated : bool, optional Whether to open the store using zarr's consolidated metadata capability. Only works for stores that have already been consolidated. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- @@ -605,6 +611,7 @@ def open_zarr( decode_times = False concat_characters = False decode_coords = False + decode_timedelta = False def maybe_decode_store(store, lock=False): ds = conventions.decode_cf( @@ -614,6 +621,7 @@ def maybe_decode_store(store, lock=False): concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, + decode_timedelta=decode_timedelta, ) # TODO: this is where we would apply caching diff --git a/xarray/conventions.py b/xarray/conventions.py index df24d0d3d8d..588fcea71a3 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -266,6 +266,7 @@ def decode_cf_variable( decode_endianness=True, stack_char_dim=True, use_cftime=None, + decode_timedelta=None, ): """ Decodes a variable which may hold CF encoded information. @@ -315,6 +316,9 @@ def decode_cf_variable( var = as_variable(var) original_dtype = var.dtype + if decode_timedelta is None: + decode_timedelta = decode_times + if concat_characters: if stack_char_dim: var = strings.CharacterArrayCoder().decode(var, name=name) @@ -328,12 +332,10 @@ def decode_cf_variable( ]: var = coder.decode(var, name=name) + if decode_timedelta: + var = times.CFTimedeltaCoder().decode(var, name=name) if decode_times: - for coder in [ - times.CFTimedeltaCoder(), - times.CFDatetimeCoder(use_cftime=use_cftime), - ]: - var = coder.decode(var, name=name) + var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name) dimensions, data, attributes, encoding = variables.unpack_for_decoding(var) # TODO(shoyer): convert everything below to use coders @@ -442,6 +444,7 @@ def decode_cf_variables( decode_coords=True, drop_variables=None, use_cftime=None, + decode_timedelta=None, ): """ Decode several CF encoded variables. @@ -492,6 +495,7 @@ def stackable(dim): decode_times=decode_times, stack_char_dim=stack_char_dim, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) if decode_coords: var_attrs = new_vars[k].attrs @@ -518,6 +522,7 @@ def decode_cf( decode_coords=True, drop_variables=None, use_cftime=None, + decode_timedelta=None, ): """Decode the given Dataset or Datastore according to CF conventions into a new Dataset. @@ -552,6 +557,11 @@ def decode_cf( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- @@ -583,6 +593,7 @@ def decode_cf( decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) ds = Dataset(vars, attrs=attrs) ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars)) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index acb2400ea04..dfd20a303ff 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -311,6 +311,41 @@ def test_decode_dask_times(self): conventions.decode_cf(original).chunk(), ) + def test_decode_cf_time_kwargs(self): + ds = Dataset.from_dict( + { + "coords": { + "timedelta": { + "data": np.array([1, 2, 3], dtype="int64"), + "dims": "timedelta", + "attrs": {"units": "days"}, + }, + "time": { + "data": np.array([1, 2, 3], dtype="int64"), + "dims": "time", + "attrs": {"units": "days since 2000-01-01"}, + }, + }, + "dims": {"time": 3, "timedelta": 3}, + "data_vars": { + "a": {"dims": ("time", "timedelta"), "data": np.ones((3, 3))}, + }, + } + ) + + dsc = conventions.decode_cf(ds) + assert dsc.timedelta.dtype == np.dtype("m8[ns]") + assert dsc.time.dtype == np.dtype("M8[ns]") + dsc = conventions.decode_cf(ds, decode_times=False) + assert dsc.timedelta.dtype == np.dtype("int64") + assert dsc.time.dtype == np.dtype("int64") + dsc = conventions.decode_cf(ds, decode_times=True, decode_timedelta=False) + assert dsc.timedelta.dtype == np.dtype("int64") + assert dsc.time.dtype == np.dtype("M8[ns]") + dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True) + assert dsc.timedelta.dtype == np.dtype("m8[ns]") + assert dsc.time.dtype == np.dtype("int64") + class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore): def encode_variable(self, var):