Skip to content

Commit 6d55f99

Browse files
TomNicholasshoyer
authored andcommitted
Global option to always keep/discard attrs on operations (#2482)
* Added a global option to always keep or discard attrs. * Updated docs and options docstring to describe new keep_attrs global option * Updated all default keep_attrs arguments to check global option * New test to check attributes are retained properly * Implemented shoyer's suggestion so attribute permanence test now passes for reduce methods * Added tests to explicitly check that attrs are propagated correctly * Updated what's new with global keep_attrs option * Bugfix to stop failing tests in test_dataset * Test class now inherits from object for python2 compatibility * Fixes to documentation * Removed some unneccessary checks of the global keep_attrs option * Removed whitespace typo I just created * Removed some more unneccessary checks of global keep_attrs option (pointed out by dcherian)
1 parent 3176d8a commit 6d55f99

File tree

10 files changed

+211
-40
lines changed

10 files changed

+211
-40
lines changed

doc/faq.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ conventions`_. (An exception is serialization to and from netCDF files.)
119119

120120
An implication of this choice is that we do not propagate ``attrs`` through
121121
most operations unless explicitly flagged (some methods have a ``keep_attrs``
122-
option). Similarly, xarray does not check for conflicts between ``attrs`` when
122+
option, and there is a global flag for setting this to be always True or
123+
False). Similarly, xarray does not check for conflicts between ``attrs`` when
123124
combining arrays and datasets, unless explicitly requested with the option
124125
``compat='identical'``. The guiding principle is that metadata should not be
125126
allowed to get in the way.

doc/whats-new.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,12 @@ Enhancements
8282
:py:meth:`~xarray.Dataset.differentiate`,
8383
:py:meth:`~xarray.DataArray.interp`, and
8484
:py:meth:`~xarray.Dataset.interp`.
85-
By `Spencer Clark <https://github.com/spencerkclark>`_.
85+
By `Spencer Clark <https://github.com/spencerkclark>`_
86+
- There is now a global option to either always keep or always discard
87+
dataset and dataarray attrs upon operations. The option is set with
88+
``xarray.set_options(keep_attrs=True)``, and the default is to use the old
89+
behaviour.
90+
By `Tom Nicholas <http://github.com/TomNicholas>`_.
8691
- Added a new backend for the GRIB file format based on ECMWF *cfgrib*
8792
python driver and *ecCodes* C-library. (:issue:`2475`)
8893
By `Alessandro Amici <https://github.com/alexamici>`_,

xarray/core/common.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from .arithmetic import SupportsArithmetic
1212
from .pycompat import OrderedDict, basestring, dask_array_type, suppress
1313
from .utils import Frozen, ReprObject, SortedKeysDict, either_dict_or_kwargs
14+
from .options import _get_keep_attrs
1415

1516
# Used as a sentinel value to indicate a all dimensions
1617
ALL_DIMS = ReprObject('<all-dims>')
@@ -21,13 +22,13 @@ class ImplementsArrayReduce(object):
2122
def _reduce_method(cls, func, include_skipna, numeric_only):
2223
if include_skipna:
2324
def wrapped_func(self, dim=None, axis=None, skipna=None,
24-
keep_attrs=False, **kwargs):
25-
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
25+
**kwargs):
26+
return self.reduce(func, dim, axis,
2627
skipna=skipna, allow_lazy=True, **kwargs)
2728
else:
28-
def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
29+
def wrapped_func(self, dim=None, axis=None,
2930
**kwargs):
30-
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
31+
return self.reduce(func, dim, axis,
3132
allow_lazy=True, **kwargs)
3233
return wrapped_func
3334

@@ -51,14 +52,14 @@ class ImplementsDatasetReduce(object):
5152
@classmethod
5253
def _reduce_method(cls, func, include_skipna, numeric_only):
5354
if include_skipna:
54-
def wrapped_func(self, dim=None, keep_attrs=False, skipna=None,
55+
def wrapped_func(self, dim=None, skipna=None,
5556
**kwargs):
56-
return self.reduce(func, dim, keep_attrs, skipna=skipna,
57+
return self.reduce(func, dim, skipna=skipna,
5758
numeric_only=numeric_only, allow_lazy=True,
5859
**kwargs)
5960
else:
60-
def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
61-
return self.reduce(func, dim, keep_attrs,
61+
def wrapped_func(self, dim=None, **kwargs):
62+
return self.reduce(func, dim,
6263
numeric_only=numeric_only, allow_lazy=True,
6364
**kwargs)
6465
return wrapped_func
@@ -591,7 +592,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
591592
center=center)
592593

593594
def resample(self, freq=None, dim=None, how=None, skipna=None,
594-
closed=None, label=None, base=0, keep_attrs=False, **indexer):
595+
closed=None, label=None, base=0, keep_attrs=None, **indexer):
595596
"""Returns a Resample object for performing resampling operations.
596597
597598
Handles both downsampling and upsampling. If any intervals contain no
@@ -659,6 +660,9 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
659660
from .dataarray import DataArray
660661
from .resample import RESAMPLE_DIM
661662

663+
if keep_attrs is None:
664+
keep_attrs = _get_keep_attrs(default=False)
665+
662666
if dim is not None:
663667
if how is None:
664668
how = 'mean'

xarray/core/dataarray.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
assert_coordinate_consistent, remap_label_indexers)
1717
from .dataset import Dataset, merge_indexes, split_indexes
1818
from .formatting import format_item
19-
from .options import OPTIONS
19+
from .options import OPTIONS, _get_keep_attrs
2020
from .pycompat import OrderedDict, basestring, iteritems, range, zip
2121
from .utils import (
2222
decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution)
@@ -1555,7 +1555,7 @@ def combine_first(self, other):
15551555
"""
15561556
return ops.fillna(self, other, join="outer")
15571557

1558-
def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
1558+
def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
15591559
"""Reduce this array by applying `func` along some dimension(s).
15601560
15611561
Parameters
@@ -1584,6 +1584,7 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
15841584
DataArray with this object's array replaced with an array with
15851585
summarized data and the indicated dimension(s) removed.
15861586
"""
1587+
15871588
var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs)
15881589
return self._replace_maybe_drop_dims(var)
15891590

@@ -2266,7 +2267,7 @@ def sortby(self, variables, ascending=True):
22662267
ds = self._to_temp_dataset().sortby(variables, ascending=ascending)
22672268
return self._from_temp_dataset(ds)
22682269

2269-
def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
2270+
def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None):
22702271
"""Compute the qth quantile of the data along the specified dimension.
22712272
22722273
Returns the qth quantiles(s) of the array elements.
@@ -2312,7 +2313,7 @@ def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
23122313
q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation)
23132314
return self._from_temp_dataset(ds)
23142315

2315-
def rank(self, dim, pct=False, keep_attrs=False):
2316+
def rank(self, dim, pct=False, keep_attrs=None):
23162317
"""Ranks the data.
23172318
23182319
Equal values are assigned a rank that is the average of the ranks that
@@ -2348,6 +2349,7 @@ def rank(self, dim, pct=False, keep_attrs=False):
23482349
array([ 1., 2., 3.])
23492350
Dimensions without coordinates: x
23502351
"""
2352+
23512353
ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs)
23522354
return self._from_temp_dataset(ds)
23532355

xarray/core/dataset.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from .merge import (
2929
dataset_merge_method, dataset_update_method, merge_data_and_coords,
3030
merge_variables)
31-
from .options import OPTIONS
31+
from .options import OPTIONS, _get_keep_attrs
3232
from .pycompat import (
3333
OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
3434
from .utils import (
@@ -2842,7 +2842,7 @@ def combine_first(self, other):
28422842
out = ops.fillna(self, other, join="outer", dataset_join="outer")
28432843
return out
28442844

2845-
def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
2845+
def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
28462846
allow_lazy=False, **kwargs):
28472847
"""Reduce this dataset by applying `func` along some dimension(s).
28482848
@@ -2884,6 +2884,9 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
28842884
raise ValueError('Dataset does not contain the dimensions: %s'
28852885
% missing_dimensions)
28862886

2887+
if keep_attrs is None:
2888+
keep_attrs = _get_keep_attrs(default=False)
2889+
28872890
variables = OrderedDict()
28882891
for name, var in iteritems(self._variables):
28892892
reduce_dims = [dim for dim in var.dims if dim in dims]
@@ -2912,7 +2915,7 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
29122915
attrs = self.attrs if keep_attrs else None
29132916
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
29142917

2915-
def apply(self, func, keep_attrs=False, args=(), **kwargs):
2918+
def apply(self, func, keep_attrs=None, args=(), **kwargs):
29162919
"""Apply a function over the data variables in this dataset.
29172920
29182921
Parameters
@@ -2957,6 +2960,8 @@ def apply(self, func, keep_attrs=False, args=(), **kwargs):
29572960
variables = OrderedDict(
29582961
(k, maybe_wrap_array(v, func(v, *args, **kwargs)))
29592962
for k, v in iteritems(self.data_vars))
2963+
if keep_attrs is None:
2964+
keep_attrs = _get_keep_attrs(default=False)
29602965
attrs = self.attrs if keep_attrs else None
29612966
return type(self)(variables, attrs=attrs)
29622967

@@ -3621,7 +3626,7 @@ def sortby(self, variables, ascending=True):
36213626
return aligned_self.isel(**indices)
36223627

36233628
def quantile(self, q, dim=None, interpolation='linear',
3624-
numeric_only=False, keep_attrs=False):
3629+
numeric_only=False, keep_attrs=None):
36253630
"""Compute the qth quantile of the data along the specified dimension.
36263631
36273632
Returns the qth quantiles(s) of the array elements for each variable
@@ -3699,6 +3704,8 @@ def quantile(self, q, dim=None, interpolation='linear',
36993704

37003705
# construct the new dataset
37013706
coord_names = set(k for k in self.coords if k in variables)
3707+
if keep_attrs is None:
3708+
keep_attrs = _get_keep_attrs(default=False)
37023709
attrs = self.attrs if keep_attrs else None
37033710
new = self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
37043711
if 'quantile' in new.dims:
@@ -3707,7 +3714,7 @@ def quantile(self, q, dim=None, interpolation='linear',
37073714
new.coords['quantile'] = q
37083715
return new
37093716

3710-
def rank(self, dim, pct=False, keep_attrs=False):
3717+
def rank(self, dim, pct=False, keep_attrs=None):
37113718
"""Ranks the data.
37123719
37133720
Equal values are assigned a rank that is the average of the ranks that
@@ -3747,6 +3754,8 @@ def rank(self, dim, pct=False, keep_attrs=False):
37473754
variables[name] = var
37483755

37493756
coord_names = set(self.coords)
3757+
if keep_attrs is None:
3758+
keep_attrs = _get_keep_attrs(default=False)
37503759
attrs = self.attrs if keep_attrs else None
37513760
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
37523761

@@ -3810,11 +3819,13 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
38103819

38113820
@property
38123821
def real(self):
3813-
return self._unary_op(lambda x: x.real, keep_attrs=True)(self)
3822+
return self._unary_op(lambda x: x.real,
3823+
keep_attrs=True)(self)
38143824

38153825
@property
38163826
def imag(self):
3817-
return self._unary_op(lambda x: x.imag, keep_attrs=True)(self)
3827+
return self._unary_op(lambda x: x.imag,
3828+
keep_attrs=True)(self)
38183829

38193830
def filter_by_attrs(self, **kwargs):
38203831
"""Returns a ``Dataset`` with variables that match specific conditions.

xarray/core/groupby.py

+22-13
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .pycompat import integer_types, range, zip
1414
from .utils import hashable, maybe_wrap_array, peek_at, safe_cast_to_index
1515
from .variable import IndexVariable, Variable, as_variable
16+
from .options import _get_keep_attrs
1617

1718

1819
def unique_value_groups(ar, sort=True):
@@ -404,15 +405,17 @@ def _first_or_last(self, op, skipna, keep_attrs):
404405
# NB. this is currently only used for reductions along an existing
405406
# dimension
406407
return self._obj
408+
if keep_attrs is None:
409+
keep_attrs = _get_keep_attrs(default=True)
407410
return self.reduce(op, self._group_dim, skipna=skipna,
408411
keep_attrs=keep_attrs, allow_lazy=True)
409412

410-
def first(self, skipna=None, keep_attrs=True):
413+
def first(self, skipna=None, keep_attrs=None):
411414
"""Return the first element of each group along the group dimension
412415
"""
413416
return self._first_or_last(duck_array_ops.first, skipna, keep_attrs)
414417

415-
def last(self, skipna=None, keep_attrs=True):
418+
def last(self, skipna=None, keep_attrs=None):
416419
"""Return the last element of each group along the group dimension
417420
"""
418421
return self._first_or_last(duck_array_ops.last, skipna, keep_attrs)
@@ -539,8 +542,8 @@ def _combine(self, applied, shortcut=False):
539542
combined = self._maybe_unstack(combined)
540543
return combined
541544

542-
def reduce(self, func, dim=None, axis=None, keep_attrs=False,
543-
shortcut=True, **kwargs):
545+
def reduce(self, func, dim=None, axis=None,
546+
keep_attrs=None, shortcut=True, **kwargs):
544547
"""Reduce the items in this group by applying `func` along some
545548
dimension(s).
546549
@@ -580,6 +583,9 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
580583
"warning, pass dim=xarray.ALL_DIMS explicitly.",
581584
FutureWarning, stacklevel=2)
582585

586+
if keep_attrs is None:
587+
keep_attrs = _get_keep_attrs(default=False)
588+
583589
def reduce_array(ar):
584590
return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs)
585591
return self.apply(reduce_array, shortcut=shortcut)
@@ -590,12 +596,12 @@ def reduce_array(ar):
590596
def _reduce_method(cls, func, include_skipna, numeric_only):
591597
if include_skipna:
592598
def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, skipna=None,
593-
keep_attrs=False, **kwargs):
599+
keep_attrs=None, **kwargs):
594600
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
595601
skipna=skipna, allow_lazy=True, **kwargs)
596602
else:
597603
def wrapped_func(self, dim=DEFAULT_DIMS, axis=None,
598-
keep_attrs=False, **kwargs):
604+
keep_attrs=None, **kwargs):
599605
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
600606
allow_lazy=True, **kwargs)
601607
return wrapped_func
@@ -651,7 +657,7 @@ def _combine(self, applied):
651657
combined = self._maybe_unstack(combined)
652658
return combined
653659

654-
def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
660+
def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
655661
"""Reduce the items in this group by applying `func` along some
656662
dimension(s).
657663
@@ -692,6 +698,9 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
692698
elif dim is None:
693699
dim = self._group_dim
694700

701+
if keep_attrs is None:
702+
keep_attrs = _get_keep_attrs(default=False)
703+
695704
def reduce_dataset(ds):
696705
return ds.reduce(func, dim, keep_attrs, **kwargs)
697706
return self.apply(reduce_dataset)
@@ -701,15 +710,15 @@ def reduce_dataset(ds):
701710
@classmethod
702711
def _reduce_method(cls, func, include_skipna, numeric_only):
703712
if include_skipna:
704-
def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False,
713+
def wrapped_func(self, dim=DEFAULT_DIMS,
705714
skipna=None, **kwargs):
706-
return self.reduce(func, dim, keep_attrs, skipna=skipna,
707-
numeric_only=numeric_only, allow_lazy=True,
708-
**kwargs)
715+
return self.reduce(func, dim,
716+
skipna=skipna, numeric_only=numeric_only,
717+
allow_lazy=True, **kwargs)
709718
else:
710-
def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False,
719+
def wrapped_func(self, dim=DEFAULT_DIMS,
711720
**kwargs):
712-
return self.reduce(func, dim, keep_attrs,
721+
return self.reduce(func, dim,
713722
numeric_only=numeric_only, allow_lazy=True,
714723
**kwargs)
715724
return wrapped_func

0 commit comments

Comments
 (0)