Skip to content

Commit 5e80189

Browse files
authored
Fix in vectorized item assignment (#1746)
* Fix in NumpyVindexAdapter.__setitem__ and DataArray.__setitem__ * Update what's new * Broadcasting in setitem * Small clean up. Revert unintended change. * Check coordinate consistency for DataArray.__setitem__ * Only pass a dict of variables to `assert_coordinate_consistent` * Update docs. * still working * Coordinate validation in .loc.__setitem__ * remove extra parenthesis * Refactoring. remap_label_indexers is moved to coordinates.py * Added an exception in doc for coordinate confliction * Added a TODO for unused indexing part in Loc.__setitem__
1 parent ea72303 commit 5e80189

9 files changed

+286
-53
lines changed

doc/indexing.rst

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ Like numpy and pandas, xarray supports indexing many array elements at once in a
277277
If you only provide integers, slices, or unlabeled arrays (array without
278278
dimension names, such as ``np.ndarray``, ``list``, but not
279279
:py:meth:`~xarray.DataArray` or :py:meth:`~xarray.Variable`) indexing can be
280-
understand as orthogonally. Each indexer component selects independently along
280+
understood as orthogonally. Each indexer component selects independently along
281281
the corresponding dimension, similar to how vector indexing works in Fortran or
282282
MATLAB, or after using the :py:func:`numpy.ix_` helper:
283283

@@ -357,6 +357,14 @@ These methods may and also be applied to ``Dataset`` objects
357357
``isel_points`` and ``sel_points`` are now deprecated.
358358
See :ref:`more_advanced_indexing` for their alternative.
359359

360+
.. note::
361+
362+
If an indexer is a :py:meth:`~xarray.DataArray`, its coordinates should not
363+
conflict with the selected subpart of the target array (except for the
364+
explicitly indexed dimensions with ``.loc``/``.sel``).
365+
Otherwise, ``IndexError`` will be raised.
366+
367+
360368
.. _assigning_values:
361369

362370
Assigning values with indexing
@@ -401,6 +409,11 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t
401409
Dask array does not support value assignment
402410
(see :ref:`dask` for the details).
403411

412+
.. note::
413+
414+
Coordinates in both the left- and right-hand-side arrays should not
415+
conflict with each other.
416+
Otherwise, ``IndexError`` will be raised.
404417

405418
.. warning::
406419

@@ -457,6 +470,7 @@ method:
457470
arr.sel(space=xr.DataArray(['IA', 'IL', 'IN'], dims=['new_time']),
458471
time=times)
459472
473+
460474
.. _align and reindex:
461475

462476
Align and reindex

doc/whats-new.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ Enhancements
3333
Bug fixes
3434
~~~~~~~~~
3535

36+
- Bug fix in vectorized assignment (:issue:`1743`, `1744`).
37+
Now item assignment to :py:meth:`~DataArray.__setitem__` checks
38+
coordinates of target, destination and keys. If there are any conflict among
39+
these coordinates, ``IndexError`` will be raised.
40+
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
41+
42+
3643
.. _whats-new.0.10.0:
3744

3845
- Properly point DataArray.__dask_scheduler__ to dask.threaded.get

xarray/core/coordinates.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
from contextlib import contextmanager
66
import pandas as pd
77

8-
from . import formatting
8+
from . import formatting, indexing
99
from .utils import Frozen
1010
from .merge import (
1111
merge_coords, expand_and_merge_variables, merge_coords_for_inplace_math)
1212
from .pycompat import OrderedDict
13+
from .variable import Variable
1314

1415

1516
class AbstractCoordinates(Mapping, formatting.ReprMixin):
@@ -301,3 +302,54 @@ def __getitem__(self, key):
301302

302303
def __unicode__(self):
303304
return formatting.indexes_repr(self)
305+
306+
307+
def assert_coordinate_consistent(obj, coords):
308+
""" Maeke sure the dimension coordinate of obj is
309+
consistent with coords.
310+
311+
obj: DataArray or Dataset
312+
coords: Dict-like of variables
313+
"""
314+
for k in obj.dims:
315+
# make sure there are no conflict in dimension coordinates
316+
if k in coords and k in obj.coords:
317+
if not coords[k].equals(obj[k].variable):
318+
raise IndexError(
319+
'dimension coordinate {!r} conflicts between '
320+
'indexed and indexing objects:\n{}\nvs.\n{}'
321+
.format(k, obj[k], coords[k]))
322+
323+
324+
def remap_label_indexers(obj, method=None, tolerance=None, **indexers):
325+
"""
326+
Remap **indexers from obj.coords.
327+
If indexer is an instance of DataArray and it has coordinate, then this
328+
coordinate will be attached to pos_indexers.
329+
330+
Returns
331+
-------
332+
pos_indexers: Same type of indexers.
333+
np.ndarray or Variable or DataArra
334+
new_indexes: mapping of new dimensional-coordinate.
335+
"""
336+
from .dataarray import DataArray
337+
338+
v_indexers = {k: v.variable.data if isinstance(v, DataArray) else v
339+
for k, v in indexers.items()}
340+
341+
pos_indexers, new_indexes = indexing.remap_label_indexers(
342+
obj, v_indexers, method=method, tolerance=tolerance
343+
)
344+
# attach indexer's coordinate to pos_indexers
345+
for k, v in indexers.items():
346+
if isinstance(v, Variable):
347+
pos_indexers[k] = Variable(v.dims, pos_indexers[k])
348+
elif isinstance(v, DataArray):
349+
# drop coordinates found in indexers since .sel() already
350+
# ensures alignments
351+
coords = OrderedDict((k, v) for k, v in v._coords.items()
352+
if k not in indexers)
353+
pos_indexers[k] = DataArray(pos_indexers[k],
354+
coords=coords, dims=v.dims)
355+
return pos_indexers, new_indexes

xarray/core/dataarray.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
from .alignment import align, reindex_like_indexers
2121
from .common import AbstractArray, BaseDataObject
2222
from .coordinates import (DataArrayCoordinates, LevelCoordinatesSource,
23-
Indexes)
23+
Indexes, assert_coordinate_consistent,
24+
remap_label_indexers)
2425
from .dataset import Dataset, merge_indexes, split_indexes
2526
from .pycompat import iteritems, basestring, OrderedDict, zip, range
2627
from .variable import (as_variable, Variable, as_compatible_data,
@@ -102,22 +103,20 @@ class _LocIndexer(object):
102103
def __init__(self, data_array):
103104
self.data_array = data_array
104105

105-
def _remap_key(self, key):
106+
def __getitem__(self, key):
106107
if not utils.is_dict_like(key):
107108
# expand the indexer so we can handle Ellipsis
108109
labels = indexing.expanded_indexer(key, self.data_array.ndim)
109110
key = dict(zip(self.data_array.dims, labels))
110-
return indexing.remap_label_indexers(self.data_array, key)
111+
return self.data_array.sel(**key)
111112

112-
def __getitem__(self, key):
113+
def __setitem__(self, key, value):
113114
if not utils.is_dict_like(key):
114115
# expand the indexer so we can handle Ellipsis
115116
labels = indexing.expanded_indexer(key, self.data_array.ndim)
116117
key = dict(zip(self.data_array.dims, labels))
117-
return self.data_array.sel(**key)
118118

119-
def __setitem__(self, key, value):
120-
pos_indexers, _ = self._remap_key(key)
119+
pos_indexers, _ = remap_label_indexers(self.data_array, **key)
121120
self.data_array[pos_indexers] = value
122121

123122

@@ -484,7 +483,15 @@ def __setitem__(self, key, value):
484483
if isinstance(key, basestring):
485484
self.coords[key] = value
486485
else:
487-
# xarray-style array indexing
486+
# Coordinates in key, value and self[key] should be consistent.
487+
# TODO Coordinate consistency in key is checked here, but it
488+
# causes unnecessary indexing. It should be optimized.
489+
obj = self[key]
490+
if isinstance(value, DataArray):
491+
assert_coordinate_consistent(value, obj.coords.variables)
492+
# DataArray key -> Variable key
493+
key = {k: v.variable if isinstance(v, DataArray) else v
494+
for k, v in self._item_key_to_dict(key).items()}
488495
self.variable[key] = value
489496

490497
def __delitem__(self, key):

xarray/core/dataset.py

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
from . import duck_array_ops
2424
from .. import conventions
2525
from .alignment import align
26-
from .coordinates import DatasetCoordinates, LevelCoordinatesSource, Indexes
26+
from .coordinates import (DatasetCoordinates, LevelCoordinatesSource, Indexes,
27+
assert_coordinate_consistent, remap_label_indexers)
2728
from .common import ImplementsDatasetReduce, BaseDataObject
2829
from .dtypes import is_datetime_like
2930
from .merge import (dataset_update_method, dataset_merge_method,
@@ -1305,15 +1306,7 @@ def _get_indexers_coordinates(self, indexers):
13051306
# we don't need to call align() explicitly, because merge_variables
13061307
# already checks for exact alignment between dimension coordinates
13071308
coords = merge_variables(coord_list)
1308-
1309-
for k in self.dims:
1310-
# make sure there are not conflict in dimension coordinates
1311-
if (k in coords and k in self._variables and
1312-
not coords[k].equals(self._variables[k])):
1313-
raise IndexError(
1314-
'dimension coordinate {!r} conflicts between '
1315-
'indexed and indexing objects:\n{}\nvs.\n{}'
1316-
.format(k, self._variables[k], coords[k]))
1309+
assert_coordinate_consistent(self, coords)
13171310

13181311
attached_coords = OrderedDict()
13191312
for k, v in coords.items(): # silently drop the conflicted variables.
@@ -1437,25 +1430,8 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers):
14371430
Dataset.isel
14381431
DataArray.sel
14391432
"""
1440-
from .dataarray import DataArray
1441-
1442-
v_indexers = {k: v.variable.data if isinstance(v, DataArray) else v
1443-
for k, v in indexers.items()}
1444-
1445-
pos_indexers, new_indexes = indexing.remap_label_indexers(
1446-
self, v_indexers, method=method, tolerance=tolerance
1447-
)
1448-
# attach indexer's coordinate to pos_indexers
1449-
for k, v in indexers.items():
1450-
if isinstance(v, Variable):
1451-
pos_indexers[k] = Variable(v.dims, pos_indexers[k])
1452-
elif isinstance(v, DataArray):
1453-
# drop coordinates found in indexers since .sel() already
1454-
# ensures alignments
1455-
coords = OrderedDict((k, v) for k, v in v._coords.items()
1456-
if k not in indexers)
1457-
pos_indexers[k] = DataArray(pos_indexers[k],
1458-
coords=coords, dims=v.dims)
1433+
pos_indexers, new_indexes = remap_label_indexers(self, method,
1434+
tolerance, **indexers)
14591435
result = self.isel(drop=drop, **pos_indexers)
14601436
return result._replace_indexes(new_indexes)
14611437

xarray/core/variable.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -637,17 +637,22 @@ def __setitem__(self, key, value):
637637
"""
638638
dims, index_tuple, new_order = self._broadcast_indexes(key)
639639

640-
if isinstance(value, Variable):
641-
value = value.set_dims(dims).data
642-
643-
if new_order:
644-
value = duck_array_ops.asarray(value)
640+
if not isinstance(value, Variable):
641+
value = as_compatible_data(value)
645642
if value.ndim > len(dims):
646643
raise ValueError(
647644
'shape mismatch: value array of shape %s could not be'
648645
'broadcast to indexing result with %s dimensions'
649646
% (value.shape, len(dims)))
647+
if value.ndim == 0:
648+
value = Variable((), value)
649+
else:
650+
value = Variable(dims[-value.ndim:], value)
651+
# broadcast to become assignable
652+
value = value.set_dims(dims).data
650653

654+
if new_order:
655+
value = duck_array_ops.asarray(value)
651656
value = value[(len(dims) - value.ndim) * (np.newaxis,) +
652657
(Ellipsis,)]
653658
value = np.moveaxis(value, new_order, range(len(new_order)))

0 commit comments

Comments
 (0)