Skip to content

Commit d8bb620

Browse files
Control attrs of result in merge(), concat(), combine_by_coords() and combine_nested() (#3877)
* Optionally promote attrs from DataArray to Dataset in to_dataset Adds option 'promote_attrs' to DataArray.to_dataset(). By default promote_attrs=False, maintaining current behaviour. If promote_attrs=True, the attrs of the DataArray are shallow-copied to the Dataset returned by to_dataset(). * utils.ordered_dict_union returns the union of two compatible dicts If the values of any shared key are not equivalent, then raises an error. * combine_attrs argument for merge() Provides several options for how to combine the attributes of the passed objects and give them to the returned Dataset. * combine_attrs argument for concat() Provides several options for how to combine the attributes of the passed objects and give them to the returned DataArray or Dataset. * combine_attrs argument for combine_by_coords() and combine_nested() Provides several options for how to combine the attributes of the passed objects and give them to the returned Dataset. * Add combine_attrs changes to whats-new.rst * Update docstrings to note default values Apply suggestions from code review Co-Authored-By: Maximilian Roos <[email protected]> * First argument of update_safety_check and ordered_dict_union not mutable No need for these arguments to be MutableMapping rather than just Mapping. * Rename ordered_dict_union -> compat_dict_union Do not use OrderedDicts any more, so name did not make sense. * Move combine_attrs to v0.16.0 in whats-new.rst * Fix merge of whats-new.rst Co-authored-by: Maximilian Roos <[email protected]>
1 parent c10c992 commit d8bb620

12 files changed

+420
-25
lines changed

doc/whats-new.rst

+4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ Breaking changes
2424

2525
New Features
2626
~~~~~~~~~~~~
27+
- Control over attributes of result in :py:func:`merge`, :py:func:`concat`,
28+
:py:func:`combine_by_coords` and :py:func:`combine_nested` using
29+
combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`)
30+
By `John Omotani <https://github.com/johnomotani>`_
2731

2832

2933
Bug fixes

xarray/core/combine.py

+47-3
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ def _combine_nd(
155155
compat="no_conflicts",
156156
fill_value=dtypes.NA,
157157
join="outer",
158+
combine_attrs="drop",
158159
):
159160
"""
160161
Combines an N-dimensional structure of datasets into one by applying a
@@ -202,13 +203,21 @@ def _combine_nd(
202203
compat=compat,
203204
fill_value=fill_value,
204205
join=join,
206+
combine_attrs=combine_attrs,
205207
)
206208
(combined_ds,) = combined_ids.values()
207209
return combined_ds
208210

209211

210212
def _combine_all_along_first_dim(
211-
combined_ids, dim, data_vars, coords, compat, fill_value=dtypes.NA, join="outer"
213+
combined_ids,
214+
dim,
215+
data_vars,
216+
coords,
217+
compat,
218+
fill_value=dtypes.NA,
219+
join="outer",
220+
combine_attrs="drop",
212221
):
213222

214223
# Group into lines of datasets which must be combined along dim
@@ -223,7 +232,7 @@ def _combine_all_along_first_dim(
223232
combined_ids = dict(sorted(group))
224233
datasets = combined_ids.values()
225234
new_combined_ids[new_id] = _combine_1d(
226-
datasets, dim, compat, data_vars, coords, fill_value, join
235+
datasets, dim, compat, data_vars, coords, fill_value, join, combine_attrs
227236
)
228237
return new_combined_ids
229238

@@ -236,6 +245,7 @@ def _combine_1d(
236245
coords="different",
237246
fill_value=dtypes.NA,
238247
join="outer",
248+
combine_attrs="drop",
239249
):
240250
"""
241251
Applies either concat or merge to 1D list of datasets depending on value
@@ -252,6 +262,7 @@ def _combine_1d(
252262
compat=compat,
253263
fill_value=fill_value,
254264
join=join,
265+
combine_attrs=combine_attrs,
255266
)
256267
except ValueError as err:
257268
if "encountered unexpected variable" in str(err):
@@ -265,7 +276,13 @@ def _combine_1d(
265276
else:
266277
raise
267278
else:
268-
combined = merge(datasets, compat=compat, fill_value=fill_value, join=join)
279+
combined = merge(
280+
datasets,
281+
compat=compat,
282+
fill_value=fill_value,
283+
join=join,
284+
combine_attrs=combine_attrs,
285+
)
269286

270287
return combined
271288

@@ -284,6 +301,7 @@ def _nested_combine(
284301
ids,
285302
fill_value=dtypes.NA,
286303
join="outer",
304+
combine_attrs="drop",
287305
):
288306

289307
if len(datasets) == 0:
@@ -311,6 +329,7 @@ def _nested_combine(
311329
coords=coords,
312330
fill_value=fill_value,
313331
join=join,
332+
combine_attrs=combine_attrs,
314333
)
315334
return combined
316335

@@ -323,6 +342,7 @@ def combine_nested(
323342
coords="different",
324343
fill_value=dtypes.NA,
325344
join="outer",
345+
combine_attrs="drop",
326346
):
327347
"""
328348
Explicitly combine an N-dimensional grid of datasets into one by using a
@@ -390,6 +410,16 @@ def combine_nested(
390410
- 'override': if indexes are of same size, rewrite indexes to be
391411
those of the first object with that dimension. Indexes for the same
392412
dimension must have the same size in all objects.
413+
combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'},
414+
default 'drop'
415+
String indicating how to combine attrs of the objects being merged:
416+
417+
- 'drop': empty attrs on returned Dataset.
418+
- 'identical': all attrs must be the same on every object.
419+
- 'no_conflicts': attrs from all objects are combined, any that have
420+
the same name must also have the same value.
421+
- 'override': skip comparing and copy attrs from the first dataset to
422+
the result.
393423
394424
Returns
395425
-------
@@ -468,6 +498,7 @@ def combine_nested(
468498
ids=False,
469499
fill_value=fill_value,
470500
join=join,
501+
combine_attrs=combine_attrs,
471502
)
472503

473504

@@ -482,6 +513,7 @@ def combine_by_coords(
482513
coords="different",
483514
fill_value=dtypes.NA,
484515
join="outer",
516+
combine_attrs="no_conflicts",
485517
):
486518
"""
487519
Attempt to auto-magically combine the given datasets into one by using
@@ -557,6 +589,16 @@ def combine_by_coords(
557589
- 'override': if indexes are of same size, rewrite indexes to be
558590
those of the first object with that dimension. Indexes for the same
559591
dimension must have the same size in all objects.
592+
combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'},
593+
default 'drop'
594+
String indicating how to combine attrs of the objects being merged:
595+
596+
- 'drop': empty attrs on returned Dataset.
597+
- 'identical': all attrs must be the same on every object.
598+
- 'no_conflicts': attrs from all objects are combined, any that have
599+
the same name must also have the same value.
600+
- 'override': skip comparing and copy attrs from the first dataset to
601+
the result.
560602
561603
Returns
562604
-------
@@ -700,6 +742,7 @@ def combine_by_coords(
700742
compat=compat,
701743
fill_value=fill_value,
702744
join=join,
745+
combine_attrs=combine_attrs,
703746
)
704747

705748
# Check the overall coordinates are monotonically increasing
@@ -717,6 +760,7 @@ def combine_by_coords(
717760
compat=compat,
718761
fill_value=fill_value,
719762
join=join,
763+
combine_attrs=combine_attrs,
720764
)
721765

722766

xarray/core/concat.py

+26-8
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from . import dtypes, utils
44
from .alignment import align
55
from .duck_array_ops import lazy_array_equiv
6-
from .merge import _VALID_COMPAT, unique_variable
6+
from .merge import _VALID_COMPAT, merge_attrs, unique_variable
77
from .variable import IndexVariable, Variable, as_variable
88
from .variable import concat as concat_vars
99

@@ -17,6 +17,7 @@ def concat(
1717
positions=None,
1818
fill_value=dtypes.NA,
1919
join="outer",
20+
combine_attrs="override",
2021
):
2122
"""Concatenate xarray objects along a new or existing dimension.
2223
@@ -92,15 +93,21 @@ def concat(
9293
- 'override': if indexes are of same size, rewrite indexes to be
9394
those of the first object with that dimension. Indexes for the same
9495
dimension must have the same size in all objects.
96+
combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'},
97+
default 'override
98+
String indicating how to combine attrs of the objects being merged:
99+
100+
- 'drop': empty attrs on returned Dataset.
101+
- 'identical': all attrs must be the same on every object.
102+
- 'no_conflicts': attrs from all objects are combined, any that have
103+
the same name must also have the same value.
104+
- 'override': skip comparing and copy attrs from the first dataset to
105+
the result.
95106
96107
Returns
97108
-------
98109
concatenated : type of objs
99110
100-
Notes
101-
-----
102-
Each concatenated Variable preserves corresponding ``attrs`` from the first element of ``objs``.
103-
104111
See also
105112
--------
106113
merge
@@ -132,7 +139,9 @@ def concat(
132139
"can only concatenate xarray Dataset and DataArray "
133140
"objects, got %s" % type(first_obj)
134141
)
135-
return f(objs, dim, data_vars, coords, compat, positions, fill_value, join)
142+
return f(
143+
objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs
144+
)
136145

137146

138147
def _calc_concat_dim_coord(dim):
@@ -306,6 +315,7 @@ def _dataset_concat(
306315
positions,
307316
fill_value=dtypes.NA,
308317
join="outer",
318+
combine_attrs="override",
309319
):
310320
"""
311321
Concatenate a sequence of datasets along a new or existing dimension
@@ -362,7 +372,7 @@ def _dataset_concat(
362372
result_vars.update(dim_coords)
363373

364374
# assign attrs and encoding from first dataset
365-
result_attrs = datasets[0].attrs
375+
result_attrs = merge_attrs([ds.attrs for ds in datasets], combine_attrs)
366376
result_encoding = datasets[0].encoding
367377

368378
# check that global attributes are fixed across all datasets if necessary
@@ -425,6 +435,7 @@ def _dataarray_concat(
425435
positions,
426436
fill_value=dtypes.NA,
427437
join="outer",
438+
combine_attrs="override",
428439
):
429440
arrays = list(arrays)
430441

@@ -453,5 +464,12 @@ def _dataarray_concat(
453464
positions,
454465
fill_value=fill_value,
455466
join=join,
467+
combine_attrs="drop",
456468
)
457-
return arrays[0]._from_temp_dataset(ds, name)
469+
470+
merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs)
471+
472+
result = arrays[0]._from_temp_dataset(ds, name)
473+
result.attrs = merged_attrs
474+
475+
return result

xarray/core/dataarray.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,13 @@ def _to_dataset_whole(
475475
dataset = Dataset._construct_direct(variables, coord_names, indexes=indexes)
476476
return dataset
477477

478-
def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset:
478+
def to_dataset(
479+
self,
480+
dim: Hashable = None,
481+
*,
482+
name: Hashable = None,
483+
promote_attrs: bool = False,
484+
) -> Dataset:
479485
"""Convert a DataArray to a Dataset.
480486
481487
Parameters
@@ -487,6 +493,8 @@ def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset:
487493
name : hashable, optional
488494
Name to substitute for this array's name. Only valid if ``dim`` is
489495
not provided.
496+
promote_attrs : bool, default False
497+
Set to True to shallow copy attrs of DataArray to returned Dataset.
490498
491499
Returns
492500
-------
@@ -500,9 +508,14 @@ def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset:
500508
if dim is not None:
501509
if name is not None:
502510
raise TypeError("cannot supply both dim and name arguments")
503-
return self._to_dataset_split(dim)
511+
result = self._to_dataset_split(dim)
504512
else:
505-
return self._to_dataset_whole(name)
513+
result = self._to_dataset_whole(name)
514+
515+
if promote_attrs:
516+
result.attrs = dict(self.attrs)
517+
518+
return result
506519

507520
@property
508521
def name(self) -> Optional[Hashable]:

xarray/core/dataset.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ def __init__(
536536
if isinstance(coords, Dataset):
537537
coords = coords.variables
538538

539-
variables, coord_names, dims, indexes = merge_data_and_coords(
539+
variables, coord_names, dims, indexes, _ = merge_data_and_coords(
540540
data_vars, coords, compat="broadcast_equals"
541541
)
542542

0 commit comments

Comments
 (0)