From e2ccbdc56cf2ba49a63503cbf7fca6533fba3bf6 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 24 Jul 2015 16:47:46 -0700 Subject: [PATCH 1/2] Raise an error when encoding floats to integers without a fill value --- xray/conventions.py | 21 +++++++++++++++------ xray/test/test_conventions.py | 6 ++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/xray/conventions.py b/xray/conventions.py index cf499217a9c..31fc7a31e39 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -244,6 +244,13 @@ def encode_datetime(d): return np.vectorize(encode_datetime)(dates) +def cast_to_int_if_safe(num): + int_num = np.array(num, dtype=np.int64) + if (num == int_num).all(): + num = int_num + return num + + def encode_cf_datetime(dates, units=None, calendar=None): """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF complient time variable. @@ -279,6 +286,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): except (OutOfBoundsDatetime, ValueError, OverflowError): num = _encode_datetime_with_netcdf4(dates, units, calendar) + num = cast_to_int_if_safe(num) return (num, units, calendar) @@ -289,11 +297,7 @@ def encode_cf_timedelta(timedeltas, units=None): np_unit = _netcdf_to_numpy_timeunit(units) num = 1.0 * timedeltas / np.timedelta64(1, np_unit) num = np.where(pd.isnull(timedeltas), np.nan, num) - - int_num = np.asarray(num, dtype=np.int64) - if (num == int_num).all(): - num = int_num - + num = cast_to_int_if_safe(num) return (num, units) @@ -580,7 +584,12 @@ def maybe_encode_dtype(var): dims, data, attrs, encoding = _var_as_tuple(var) dtype = np.dtype(encoding.pop('dtype')) if dtype != var.dtype and dtype.kind != 'O': - if np.issubdtype(dtype, int): + if np.issubdtype(dtype, np.integer): + if (np.issubdtype(var.dtype, np.floating) + and '_FillValue' not in var.attrs): + raise ValueError('cannot save variable with floating ' + 'point data as integers without ' + 'providing a _FillValue to use for NaNs') data = ops.around(data)[...] if dtype == 'S1' and data.dtype != 'S1': data = string_to_char(np.asarray(data, 'S')) diff --git a/xray/test/test_conventions.py b/xray/test/test_conventions.py index 559e4cd95db..af8b0a6e8cd 100644 --- a/xray/test/test_conventions.py +++ b/xray/test/test_conventions.py @@ -445,6 +445,12 @@ def test_incompatible_attributes(self): with self.assertRaises(ValueError): conventions.encode_cf_variable(var) + def test_missing_fillvalue(self): + v = Variable(['x'], np.array([np.nan, 1, 2, 3])) + v.encoding = {'dtype': 'int16'} + with self.assertRaisesRegexp(ValueError, '_FillValue'): + conventions.encode_cf_variable(v) + @requires_netCDF4 class TestDecodeCF(TestCase): From fb0ba7336b1336cc14e3d5e19a8120cab23d529a Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sat, 1 Aug 2015 18:45:31 -0700 Subject: [PATCH 2/2] Switch error with saving floats to integers with fill value to warning --- xray/conventions.py | 15 ++++++++------- xray/test/test_conventions.py | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/xray/conventions.py b/xray/conventions.py index 31fc7a31e39..e71fdad70e9 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -579,7 +579,7 @@ def maybe_encode_fill_value(var, needs_copy=True): return var, needs_copy -def maybe_encode_dtype(var): +def maybe_encode_dtype(var, name=None): if 'dtype' in var.encoding: dims, data, attrs, encoding = _var_as_tuple(var) dtype = np.dtype(encoding.pop('dtype')) @@ -587,9 +587,10 @@ def maybe_encode_dtype(var): if np.issubdtype(dtype, np.integer): if (np.issubdtype(var.dtype, np.floating) and '_FillValue' not in var.attrs): - raise ValueError('cannot save variable with floating ' - 'point data as integers without ' - 'providing a _FillValue to use for NaNs') + warnings.warn('saving variable %s with floating ' + 'point data as an integer dtype without ' + 'any _FillValue to use for NaNs' % name, + RuntimeWarning, stacklevel=3) data = ops.around(data)[...] if dtype == 'S1' and data.dtype != 'S1': data = string_to_char(np.asarray(data, 'S')) @@ -647,7 +648,7 @@ def ensure_dtype_not_object(var): return var -def encode_cf_variable(var, needs_copy=True): +def encode_cf_variable(var, needs_copy=True, name=None): """ Converts an Variable into an Variable which follows some of the CF conventions: @@ -671,7 +672,7 @@ def encode_cf_variable(var, needs_copy=True): var = maybe_encode_timedelta(var) var, needs_copy = maybe_encode_offset_and_scale(var, needs_copy) var, needs_copy = maybe_encode_fill_value(var, needs_copy) - var = maybe_encode_dtype(var) + var = maybe_encode_dtype(var, name) var = ensure_dtype_not_object(var) return var @@ -998,6 +999,6 @@ def cf_encoder(variables, attributes): See also: encode_cf_variable """ - new_vars = OrderedDict((k, encode_cf_variable(v)) + new_vars = OrderedDict((k, encode_cf_variable(v, name=k)) for k, v in iteritems(variables)) return new_vars, attributes diff --git a/xray/test/test_conventions.py b/xray/test/test_conventions.py index af8b0a6e8cd..4380ebe69c3 100644 --- a/xray/test/test_conventions.py +++ b/xray/test/test_conventions.py @@ -448,7 +448,7 @@ def test_incompatible_attributes(self): def test_missing_fillvalue(self): v = Variable(['x'], np.array([np.nan, 1, 2, 3])) v.encoding = {'dtype': 'int16'} - with self.assertRaisesRegexp(ValueError, '_FillValue'): + with self.assertWarns('floating point data as an integer'): conventions.encode_cf_variable(v)