Skip to content
forked from pydata/xarray

Commit 4359403

Browse files
author
dcherian
committed
Merge branch 'master' into deprecate/inplace
* master: Global option to always keep/discard attrs on operations (pydata#2482) Remove tests where answers change in cftime 1.0.2.1 (pydata#2522) Finish deprecation cycle for DataArray.__contains__ checking array values (pydata#2520) Fix bug where OverflowError is not being raised (pydata#2519)
2 parents 66d3cea + 6d55f99 commit 4359403

13 files changed

+266
-76
lines changed

doc/faq.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ conventions`_. (An exception is serialization to and from netCDF files.)
119119

120120
An implication of this choice is that we do not propagate ``attrs`` through
121121
most operations unless explicitly flagged (some methods have a ``keep_attrs``
122-
option). Similarly, xarray does not check for conflicts between ``attrs`` when
122+
option, and there is a global flag for setting this to be always True or
123+
False). Similarly, xarray does not check for conflicts between ``attrs`` when
123124
combining arrays and datasets, unless explicitly requested with the option
124125
``compat='identical'``. The guiding principle is that metadata should not be
125126
allowed to get in the way.

doc/whats-new.rst

+18-6
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,14 @@ v0.11.0 (unreleased)
3333
Breaking changes
3434
~~~~~~~~~~~~~~~~
3535

36-
- ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`.
37-
Call :py:meth:`Dataset.transpose` directly instead.
38-
- Iterating over a ``Dataset`` now includes only data variables, not coordinates.
39-
Similarily, calling ``len`` and ``bool`` on a ``Dataset`` now
40-
includes only data variables
36+
- Finished deprecation cycles:
37+
- ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`.
38+
Call :py:meth:`Dataset.transpose` directly instead.
39+
- Iterating over a ``Dataset`` now includes only data variables, not coordinates.
40+
Similarily, calling ``len`` and ``bool`` on a ``Dataset`` now
41+
includes only data variables.
42+
- ``DataArray.__contains__`` (used by Python's ``in`` operator) now checks
43+
array data, not coordinates.
4144
- Xarray's storage backends now automatically open and close files when
4245
necessary, rather than requiring opening a file with ``autoclose=True``. A
4346
global least-recently-used cache is used to store open files; the default
@@ -82,7 +85,12 @@ Enhancements
8285
:py:meth:`~xarray.Dataset.differentiate`,
8386
:py:meth:`~xarray.DataArray.interp`, and
8487
:py:meth:`~xarray.Dataset.interp`.
85-
By `Spencer Clark <https://github.com/spencerkclark>`_.
88+
By `Spencer Clark <https://github.com/spencerkclark>`_
89+
- There is now a global option to either always keep or always discard
90+
dataset and dataarray attrs upon operations. The option is set with
91+
``xarray.set_options(keep_attrs=True)``, and the default is to use the old
92+
behaviour.
93+
By `Tom Nicholas <http://github.com/TomNicholas>`_.
8694
- Added a new backend for the GRIB file format based on ECMWF *cfgrib*
8795
python driver and *ecCodes* C-library. (:issue:`2475`)
8896
By `Alessandro Amici <https://github.com/alexamici>`_,
@@ -126,6 +134,10 @@ Bug fixes
126134
By `Spencer Clark <https://github.com/spencerkclark>`_.
127135
- Avoid use of Dask's deprecated ``get=`` parameter in tests
128136
by `Matthew Rocklin <https://github.com/mrocklin/>`_.
137+
- An ``OverflowError`` is now accurately raised and caught during the
138+
encoding process if a reference date is used that is so distant that
139+
the dates must be encoded using cftime rather than NumPy (:issue:`2272`).
140+
By `Spencer Clark <https://github.com/spencerkclark>`_.
129141

130142
.. _whats-new.0.10.9:
131143

xarray/coding/times.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,12 @@ def encode_cf_datetime(dates, units=None, calendar=None):
361361
delta_units = _netcdf_to_numpy_timeunit(delta)
362362
time_delta = np.timedelta64(1, delta_units).astype('timedelta64[ns]')
363363
ref_date = np.datetime64(pd.Timestamp(ref_date))
364-
num = (dates - ref_date) / time_delta
364+
365+
# Wrap the dates in a DatetimeIndex to do the subtraction to ensure
366+
# an OverflowError is raised if the ref_date is too far away from
367+
# dates to be encoded (GH 2272).
368+
num = (pd.DatetimeIndex(dates.ravel()) - ref_date) / time_delta
369+
num = num.values.reshape(dates.shape)
365370

366371
except (OutOfBoundsDatetime, OverflowError):
367372
num = _encode_datetime_with_cftime(dates, units, calendar)

xarray/core/common.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from .arithmetic import SupportsArithmetic
1212
from .pycompat import OrderedDict, basestring, dask_array_type, suppress
1313
from .utils import Frozen, ReprObject, SortedKeysDict, either_dict_or_kwargs
14+
from .options import _get_keep_attrs
1415

1516
# Used as a sentinel value to indicate a all dimensions
1617
ALL_DIMS = ReprObject('<all-dims>')
@@ -21,13 +22,13 @@ class ImplementsArrayReduce(object):
2122
def _reduce_method(cls, func, include_skipna, numeric_only):
2223
if include_skipna:
2324
def wrapped_func(self, dim=None, axis=None, skipna=None,
24-
keep_attrs=False, **kwargs):
25-
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
25+
**kwargs):
26+
return self.reduce(func, dim, axis,
2627
skipna=skipna, allow_lazy=True, **kwargs)
2728
else:
28-
def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
29+
def wrapped_func(self, dim=None, axis=None,
2930
**kwargs):
30-
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
31+
return self.reduce(func, dim, axis,
3132
allow_lazy=True, **kwargs)
3233
return wrapped_func
3334

@@ -51,14 +52,14 @@ class ImplementsDatasetReduce(object):
5152
@classmethod
5253
def _reduce_method(cls, func, include_skipna, numeric_only):
5354
if include_skipna:
54-
def wrapped_func(self, dim=None, keep_attrs=False, skipna=None,
55+
def wrapped_func(self, dim=None, skipna=None,
5556
**kwargs):
56-
return self.reduce(func, dim, keep_attrs, skipna=skipna,
57+
return self.reduce(func, dim, skipna=skipna,
5758
numeric_only=numeric_only, allow_lazy=True,
5859
**kwargs)
5960
else:
60-
def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
61-
return self.reduce(func, dim, keep_attrs,
61+
def wrapped_func(self, dim=None, **kwargs):
62+
return self.reduce(func, dim,
6263
numeric_only=numeric_only, allow_lazy=True,
6364
**kwargs)
6465
return wrapped_func
@@ -591,7 +592,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
591592
center=center)
592593

593594
def resample(self, freq=None, dim=None, how=None, skipna=None,
594-
closed=None, label=None, base=0, keep_attrs=False, **indexer):
595+
closed=None, label=None, base=0, keep_attrs=None, **indexer):
595596
"""Returns a Resample object for performing resampling operations.
596597
597598
Handles both downsampling and upsampling. If any intervals contain no
@@ -659,6 +660,9 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
659660
from .dataarray import DataArray
660661
from .resample import RESAMPLE_DIM
661662

663+
if keep_attrs is None:
664+
keep_attrs = _get_keep_attrs(default=False)
665+
662666
if dim is not None:
663667
if how is None:
664668
how = 'mean'

xarray/core/dataarray.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
assert_coordinate_consistent, remap_label_indexers)
1717
from .dataset import Dataset, merge_indexes, split_indexes
1818
from .formatting import format_item
19-
from .options import OPTIONS
19+
from .options import OPTIONS, _get_keep_attrs
2020
from .pycompat import OrderedDict, basestring, iteritems, range, zip
2121
from .utils import (
2222
_check_inplace, decode_numpy_dict_values, either_dict_or_kwargs,
@@ -504,11 +504,7 @@ def _item_sources(self):
504504
LevelCoordinatesSource(self)]
505505

506506
def __contains__(self, key):
507-
warnings.warn(
508-
'xarray.DataArray.__contains__ currently checks membership in '
509-
'DataArray.coords, but in xarray v0.11 will change to check '
510-
'membership in array values.', FutureWarning, stacklevel=2)
511-
return key in self._coords
507+
return key in self.data
512508

513509
@property
514510
def loc(self):
@@ -1564,7 +1560,7 @@ def combine_first(self, other):
15641560
"""
15651561
return ops.fillna(self, other, join="outer")
15661562

1567-
def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
1563+
def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
15681564
"""Reduce this array by applying `func` along some dimension(s).
15691565
15701566
Parameters
@@ -1593,6 +1589,7 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
15931589
DataArray with this object's array replaced with an array with
15941590
summarized data and the indicated dimension(s) removed.
15951591
"""
1592+
15961593
var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs)
15971594
return self._replace_maybe_drop_dims(var)
15981595

@@ -2275,7 +2272,7 @@ def sortby(self, variables, ascending=True):
22752272
ds = self._to_temp_dataset().sortby(variables, ascending=ascending)
22762273
return self._from_temp_dataset(ds)
22772274

2278-
def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
2275+
def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None):
22792276
"""Compute the qth quantile of the data along the specified dimension.
22802277
22812278
Returns the qth quantiles(s) of the array elements.
@@ -2321,7 +2318,7 @@ def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
23212318
q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation)
23222319
return self._from_temp_dataset(ds)
23232320

2324-
def rank(self, dim, pct=False, keep_attrs=False):
2321+
def rank(self, dim, pct=False, keep_attrs=None):
23252322
"""Ranks the data.
23262323
23272324
Equal values are assigned a rank that is the average of the ranks that
@@ -2357,6 +2354,7 @@ def rank(self, dim, pct=False, keep_attrs=False):
23572354
array([ 1., 2., 3.])
23582355
Dimensions without coordinates: x
23592356
"""
2357+
23602358
ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs)
23612359
return self._from_temp_dataset(ds)
23622360

xarray/core/dataset.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from .merge import (
2929
dataset_merge_method, dataset_update_method, merge_data_and_coords,
3030
merge_variables)
31-
from .options import OPTIONS
31+
from .options import OPTIONS, _get_keep_attrs
3232
from .pycompat import (
3333
OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
3434
from .utils import (
@@ -2851,7 +2851,7 @@ def combine_first(self, other):
28512851
out = ops.fillna(self, other, join="outer", dataset_join="outer")
28522852
return out
28532853

2854-
def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
2854+
def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
28552855
allow_lazy=False, **kwargs):
28562856
"""Reduce this dataset by applying `func` along some dimension(s).
28572857
@@ -2893,6 +2893,9 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
28932893
raise ValueError('Dataset does not contain the dimensions: %s'
28942894
% missing_dimensions)
28952895

2896+
if keep_attrs is None:
2897+
keep_attrs = _get_keep_attrs(default=False)
2898+
28962899
variables = OrderedDict()
28972900
for name, var in iteritems(self._variables):
28982901
reduce_dims = [dim for dim in var.dims if dim in dims]
@@ -2921,7 +2924,7 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
29212924
attrs = self.attrs if keep_attrs else None
29222925
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
29232926

2924-
def apply(self, func, keep_attrs=False, args=(), **kwargs):
2927+
def apply(self, func, keep_attrs=None, args=(), **kwargs):
29252928
"""Apply a function over the data variables in this dataset.
29262929
29272930
Parameters
@@ -2966,6 +2969,8 @@ def apply(self, func, keep_attrs=False, args=(), **kwargs):
29662969
variables = OrderedDict(
29672970
(k, maybe_wrap_array(v, func(v, *args, **kwargs)))
29682971
for k, v in iteritems(self.data_vars))
2972+
if keep_attrs is None:
2973+
keep_attrs = _get_keep_attrs(default=False)
29692974
attrs = self.attrs if keep_attrs else None
29702975
return type(self)(variables, attrs=attrs)
29712976

@@ -3630,7 +3635,7 @@ def sortby(self, variables, ascending=True):
36303635
return aligned_self.isel(**indices)
36313636

36323637
def quantile(self, q, dim=None, interpolation='linear',
3633-
numeric_only=False, keep_attrs=False):
3638+
numeric_only=False, keep_attrs=None):
36343639
"""Compute the qth quantile of the data along the specified dimension.
36353640
36363641
Returns the qth quantiles(s) of the array elements for each variable
@@ -3708,6 +3713,8 @@ def quantile(self, q, dim=None, interpolation='linear',
37083713

37093714
# construct the new dataset
37103715
coord_names = set(k for k in self.coords if k in variables)
3716+
if keep_attrs is None:
3717+
keep_attrs = _get_keep_attrs(default=False)
37113718
attrs = self.attrs if keep_attrs else None
37123719
new = self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
37133720
if 'quantile' in new.dims:
@@ -3716,7 +3723,7 @@ def quantile(self, q, dim=None, interpolation='linear',
37163723
new.coords['quantile'] = q
37173724
return new
37183725

3719-
def rank(self, dim, pct=False, keep_attrs=False):
3726+
def rank(self, dim, pct=False, keep_attrs=None):
37203727
"""Ranks the data.
37213728
37223729
Equal values are assigned a rank that is the average of the ranks that
@@ -3756,6 +3763,8 @@ def rank(self, dim, pct=False, keep_attrs=False):
37563763
variables[name] = var
37573764

37583765
coord_names = set(self.coords)
3766+
if keep_attrs is None:
3767+
keep_attrs = _get_keep_attrs(default=False)
37593768
attrs = self.attrs if keep_attrs else None
37603769
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
37613770

@@ -3819,11 +3828,13 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
38193828

38203829
@property
38213830
def real(self):
3822-
return self._unary_op(lambda x: x.real, keep_attrs=True)(self)
3831+
return self._unary_op(lambda x: x.real,
3832+
keep_attrs=True)(self)
38233833

38243834
@property
38253835
def imag(self):
3826-
return self._unary_op(lambda x: x.imag, keep_attrs=True)(self)
3836+
return self._unary_op(lambda x: x.imag,
3837+
keep_attrs=True)(self)
38273838

38283839
def filter_by_attrs(self, **kwargs):
38293840
"""Returns a ``Dataset`` with variables that match specific conditions.

0 commit comments

Comments
 (0)