From b0c3dcf378d955dfccc1fc974510c93ef1e71543 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sat, 24 Nov 2018 17:47:46 +0100 Subject: [PATCH 1/5] CF: also decode time bounds when available --- xarray/conventions.py | 19 +++++++++++ xarray/tests/test_coding_times.py | 55 +++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/xarray/conventions.py b/xarray/conventions.py index f60ee6b2c15..42a38ce8b3d 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -350,6 +350,25 @@ def stackable(dim): drop_variables = [] drop_variables = set(drop_variables) + # Time bounds coordinates might miss the decoding attributes + # https://github.com/pydata/xarray/issues/2565 + if decode_times: + # We list all time variables with bounds + to_update = dict() + for v in variables.values(): + attrs = v.attrs + if ('units' in attrs and 'since' in attrs['units'] and + 'bounds' in attrs): + new_attrs = {'units': attrs['units']} + if 'calendar' in attrs: + new_attrs['calendar'] = attrs['calendar'] + to_update[attrs['bounds']] = new_attrs + # For all bound variables, update their time attribute if not present + for k, new_attrs in to_update.items(): + if k in variables: + for ak, av in new_attrs.items(): + variables[k].attrs.setdefault(ak, av) + new_vars = OrderedDict() for k, v in iteritems(variables): if k in drop_variables: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 0ca57f98a6d..9516865c29e 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -624,6 +624,61 @@ def test_decode_cf(calendar): assert ds.test.dtype == np.dtype('M8[ns]') +def test_decode_cf_time_bounds(): + + da = DataArray(np.arange(6).reshape((3, 2)), coords={'time': [1, 2, 3]}, + dims=('time', 'nbnd'), name='time_bnds') + + # Simplest case + ds = da.to_dataset() + ds['time'].attrs['units'] = 'days since 2001-01-01' + ds['time'].attrs['calendar'] = 'standard' + ds['time'].attrs['bounds'] = 'time_bnds' + ds = decode_cf(ds) + assert ds.time_bnds.dtype == np.dtype('M8[ns]') + + # Without calendar also OK + ds = da.to_dataset() + ds['time'].attrs['units'] = 'days since 2001-01-01' + ds['time'].attrs['bounds'] = 'time_bnds' + ds = decode_cf(ds) + assert ds.time_bnds.dtype == np.dtype('M8[ns]') + + # If previous calendar and units do not overwrite them + ds = da.to_dataset() + ds['time'].attrs['units'] = 'days since 2001-01-01' + ds['time'].attrs['calendar'] = 'standard' + ds['time'].attrs['bounds'] = 'time_bnds' + ds['time_bnds'].attrs['units'] = 'hours since 2001-01-01' + ds['time_bnds'].attrs['calendar'] = 'noleap' + ds = decode_cf(ds) + assert ds.time_bnds.dtype == np.dtype('O') + assert ds.time_bnds.encoding['units'] == 'hours since 2001-01-01' + + # If bounds not available do not complain + ds = da.to_dataset() + ds['time'].attrs['units'] = 'days since 2001-01-01' + ds['time'].attrs['calendar'] = 'standard' + ds['time'].attrs['bounds'] = 'fake_time_bnds' + ds = decode_cf(ds) + assert ds.time_bnds.dtype == np.dtype('int64') + + # If not proper time do not change bounds + ds = da.to_dataset() + ds['time'].attrs['units'] = 'days in 2001-01-01' + ds['time'].attrs['calendar'] = 'standard' + ds['time'].attrs['bounds'] = 'time_bnds' + ds = decode_cf(ds) + assert ds.time_bnds.dtype == np.dtype('int64') + + # Only decode if asked for + ds = da.to_dataset() + ds['time'].attrs['units'] = 'days since 2001-01-01' + ds['time'].attrs['bounds'] = 'time_bnds' + ds = decode_cf(ds, decode_times=False) + assert ds.time_bnds.dtype == np.dtype('int64') + + @pytest.fixture(params=_ALL_CALENDARS) def calendar(request): return request.param From de5865d09c00fa2f75eb0e6f2b4d5ddfc8c6bd0d Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sat, 24 Nov 2018 18:15:11 +0100 Subject: [PATCH 2/5] Fix failing test when cftime not present and what's new --- doc/whats-new.rst | 4 ++++ xarray/tests/test_coding_times.py | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1da1da700e7..da2cfa013e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,10 @@ Breaking changes Enhancements ~~~~~~~~~~~~ +- Time bounds variables are now also decoded according to CF conventions + (:issue:`2565`). + By `Fabien Maussion `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 9516865c29e..5c9a2d0b5f5 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -645,15 +645,16 @@ def test_decode_cf_time_bounds(): assert ds.time_bnds.dtype == np.dtype('M8[ns]') # If previous calendar and units do not overwrite them - ds = da.to_dataset() - ds['time'].attrs['units'] = 'days since 2001-01-01' - ds['time'].attrs['calendar'] = 'standard' - ds['time'].attrs['bounds'] = 'time_bnds' - ds['time_bnds'].attrs['units'] = 'hours since 2001-01-01' - ds['time_bnds'].attrs['calendar'] = 'noleap' - ds = decode_cf(ds) - assert ds.time_bnds.dtype == np.dtype('O') - assert ds.time_bnds.encoding['units'] == 'hours since 2001-01-01' + if has_cftime_or_netCDF4: + ds = da.to_dataset() + ds['time'].attrs['units'] = 'days since 2001-01-01' + ds['time'].attrs['calendar'] = 'standard' + ds['time'].attrs['bounds'] = 'time_bnds' + ds['time_bnds'].attrs['units'] = 'hours since 2001-01-01' + ds['time_bnds'].attrs['calendar'] = 'noleap' + ds = decode_cf(ds) + assert ds.time_bnds.dtype == np.dtype('O') + assert ds.time_bnds.encoding['units'] == 'hours since 2001-01-01' # If bounds not available do not complain ds = da.to_dataset() From e40861aa5d3e495647052525de73ae0023e0b0e8 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sat, 24 Nov 2018 20:06:09 +0100 Subject: [PATCH 3/5] Fix windows --- xarray/tests/test_coding_times.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 5c9a2d0b5f5..b6f633b0af9 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -626,7 +626,8 @@ def test_decode_cf(calendar): def test_decode_cf_time_bounds(): - da = DataArray(np.arange(6).reshape((3, 2)), coords={'time': [1, 2, 3]}, + da = DataArray(np.arange(6, dtype='int64').reshape((3, 2)), + coords={'time': [1, 2, 3]}, dims=('time', 'nbnd'), name='time_bnds') # Simplest case From 8240be0d089d9d278783737b7738077ec921c244 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Thu, 29 Nov 2018 22:25:25 +0100 Subject: [PATCH 4/5] Reviews --- xarray/conventions.py | 47 ++++++++++++++-------- xarray/tests/test_coding_times.py | 67 +++++++++++-------------------- 2 files changed, 53 insertions(+), 61 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 42a38ce8b3d..4c9eb728d2e 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -320,11 +320,39 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True, return Variable(dimensions, data, attributes, encoding=encoding) +def _update_bounds_attributes(variables): + """Adds time attributes to time bounds variables. + + Variables handling time bounds ("Cell boundaries" in the CF + conventions) do not necessarily carry the necessary attributes to be + decoded. This copies the attributes from the time variable to the + associated boundaries. + + See Also: + + http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/ + cf-conventions.html#cell-boundaries + + https://github.com/pydata/xarray/issues/2565 + """ + + # For all time variables with bounds + for v in variables.values(): + attrs = v.attrs + has_date_units = 'units' in attrs and 'since' in attrs['units'] + if has_date_units and 'bounds' in attrs: + if attrs['bounds'] in variables: + to_update = variables[attrs['bounds']].attrs + to_update.setdefault('units', attrs['units']) + if 'calendar' in attrs: + to_update.setdefault('calendar', attrs['calendar']) + + def decode_cf_variables(variables, attributes, concat_characters=True, mask_and_scale=True, decode_times=True, decode_coords=True, drop_variables=None): """ - Decode a several CF encoded variables. + Decode several CF encoded variables. See: decode_cf_variable """ @@ -351,23 +379,8 @@ def stackable(dim): drop_variables = set(drop_variables) # Time bounds coordinates might miss the decoding attributes - # https://github.com/pydata/xarray/issues/2565 if decode_times: - # We list all time variables with bounds - to_update = dict() - for v in variables.values(): - attrs = v.attrs - if ('units' in attrs and 'since' in attrs['units'] and - 'bounds' in attrs): - new_attrs = {'units': attrs['units']} - if 'calendar' in attrs: - new_attrs['calendar'] = attrs['calendar'] - to_update[attrs['bounds']] = new_attrs - # For all bound variables, update their time attribute if not present - for k, new_attrs in to_update.items(): - if k in variables: - for ak, av in new_attrs.items(): - variables[k].attrs.setdefault(ak, av) + _update_bounds_attributes(variables) new_vars = OrderedDict() for k, v in iteritems(variables): diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index b6f633b0af9..5b69d9adcc0 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -10,6 +10,7 @@ from xarray import DataArray, Variable, coding, decode_cf from xarray.coding.times import (_import_cftime, cftime_to_nptime, decode_cf_datetime, encode_cf_datetime) +from xarray.conventions import _update_bounds_attributes from xarray.core.common import contains_cftime_datetimes from . import ( @@ -630,55 +631,33 @@ def test_decode_cf_time_bounds(): coords={'time': [1, 2, 3]}, dims=('time', 'nbnd'), name='time_bnds') - # Simplest case - ds = da.to_dataset() - ds['time'].attrs['units'] = 'days since 2001-01-01' - ds['time'].attrs['calendar'] = 'standard' - ds['time'].attrs['bounds'] = 'time_bnds' - ds = decode_cf(ds) - assert ds.time_bnds.dtype == np.dtype('M8[ns]') + attrs = {'units': 'days since 2001-01', + 'calendar': 'standard', + 'bounds': 'time_bnds'} - # Without calendar also OK ds = da.to_dataset() - ds['time'].attrs['units'] = 'days since 2001-01-01' - ds['time'].attrs['bounds'] = 'time_bnds' - ds = decode_cf(ds) - assert ds.time_bnds.dtype == np.dtype('M8[ns]') - - # If previous calendar and units do not overwrite them - if has_cftime_or_netCDF4: - ds = da.to_dataset() - ds['time'].attrs['units'] = 'days since 2001-01-01' - ds['time'].attrs['calendar'] = 'standard' - ds['time'].attrs['bounds'] = 'time_bnds' - ds['time_bnds'].attrs['units'] = 'hours since 2001-01-01' - ds['time_bnds'].attrs['calendar'] = 'noleap' - ds = decode_cf(ds) - assert ds.time_bnds.dtype == np.dtype('O') - assert ds.time_bnds.encoding['units'] == 'hours since 2001-01-01' - - # If bounds not available do not complain - ds = da.to_dataset() - ds['time'].attrs['units'] = 'days since 2001-01-01' - ds['time'].attrs['calendar'] = 'standard' - ds['time'].attrs['bounds'] = 'fake_time_bnds' - ds = decode_cf(ds) - assert ds.time_bnds.dtype == np.dtype('int64') - - # If not proper time do not change bounds + ds['time'].attrs.update(attrs) + _update_bounds_attributes(ds.variables) + assert ds.variables['time_bnds'].attrs == {'units': 'days since 2001-01', + 'calendar': 'standard'} + dsc = decode_cf(ds) + assert dsc.time_bnds.dtype == np.dtype('M8[ns]') + dsc = decode_cf(ds, decode_times=False) + assert dsc.time_bnds.dtype == np.dtype('int64') + + # Do not overwrite existing attrs ds = da.to_dataset() - ds['time'].attrs['units'] = 'days in 2001-01-01' - ds['time'].attrs['calendar'] = 'standard' - ds['time'].attrs['bounds'] = 'time_bnds' - ds = decode_cf(ds) - assert ds.time_bnds.dtype == np.dtype('int64') + ds['time'].attrs.update(attrs) + bnd_attr = {'units': 'hours since 2001-01', 'calendar': 'noleap'} + ds['time_bnds'].attrs.update(bnd_attr) + _update_bounds_attributes(ds.variables) + assert ds.variables['time_bnds'].attrs == bnd_attr - # Only decode if asked for + # If bounds variable not available do not complain ds = da.to_dataset() - ds['time'].attrs['units'] = 'days since 2001-01-01' - ds['time'].attrs['bounds'] = 'time_bnds' - ds = decode_cf(ds, decode_times=False) - assert ds.time_bnds.dtype == np.dtype('int64') + ds['time'].attrs.update(attrs) + ds['time'].attrs['bounds'] = 'fake_var' + _update_bounds_attributes(ds.variables) @pytest.fixture(params=_ALL_CALENDARS) From 326f7ae804ccdedde84b293c031544e60881c660 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sat, 1 Dec 2018 12:50:58 +0100 Subject: [PATCH 5/5] Reviews 2 --- xarray/conventions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 4c9eb728d2e..ea85a6d5b74 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -342,10 +342,10 @@ def _update_bounds_attributes(variables): has_date_units = 'units' in attrs and 'since' in attrs['units'] if has_date_units and 'bounds' in attrs: if attrs['bounds'] in variables: - to_update = variables[attrs['bounds']].attrs - to_update.setdefault('units', attrs['units']) + bounds_attrs = variables[attrs['bounds']].attrs + bounds_attrs.setdefault('units', attrs['units']) if 'calendar' in attrs: - to_update.setdefault('calendar', attrs['calendar']) + bounds_attrs.setdefault('calendar', attrs['calendar']) def decode_cf_variables(variables, attributes, concat_characters=True,