Skip to content

Commit 9b5140e

Browse files
authored
Add missing_dims argument allowing isel() to ignore missing dimensions (#3923)
* Add missing_dims argument allowing isel() to ignore missing dimensions * Add missing_dims to whats-new.rst * Fix typos in TestVariable.test_isel() * Change values for missing_dims argument to {'raise', 'warn', 'ignore'} Matches the possible values used elsewhere for drop_vars arguments. * Add missing_dims argument Dataset.isel() * Mention Dataset.isel in whats-new.rst description of missing_dims
1 parent 6bccbff commit 9b5140e

File tree

8 files changed

+151
-18
lines changed

8 files changed

+151
-18
lines changed

doc/whats-new.rst

+5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ New Features
3535
:py:func:`combine_by_coords` and :py:func:`combine_nested` using
3636
combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`)
3737
By `John Omotani <https://github.com/johnomotani>`_
38+
- 'missing_dims' argument to :py:meth:`Dataset.isel`,
39+
`:py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing
40+
the exception when a dimension passed to ``isel`` is not present with a
41+
warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`)
42+
By `John Omotani <https://github.com/johnomotani>`_
3843
- Limited the length of array items with long string reprs to a
3944
reasonable width (:pull:`3900`)
4045
By `Maximilian Roos <https://github.com/max-sixty>`_

xarray/core/dataarray.py

+28-2
Original file line numberDiff line numberDiff line change
@@ -1007,25 +1007,51 @@ def isel(
10071007
self,
10081008
indexers: Mapping[Hashable, Any] = None,
10091009
drop: bool = False,
1010+
missing_dims: str = "raise",
10101011
**indexers_kwargs: Any,
10111012
) -> "DataArray":
10121013
"""Return a new DataArray whose data is given by integer indexing
10131014
along the specified dimension(s).
10141015
1016+
Parameters
1017+
----------
1018+
indexers : dict, optional
1019+
A dict with keys matching dimensions and values given
1020+
by integers, slice objects or arrays.
1021+
indexer can be a integer, slice, array-like or DataArray.
1022+
If DataArrays are passed as indexers, xarray-style indexing will be
1023+
carried out. See :ref:`indexing` for the details.
1024+
One of indexers or indexers_kwargs must be provided.
1025+
drop : bool, optional
1026+
If ``drop=True``, drop coordinates variables indexed by integers
1027+
instead of making them scalar.
1028+
missing_dims : {"raise", "warn", "ignore"}, default "raise"
1029+
What to do if dimensions that should be selected from are not present in the
1030+
DataArray:
1031+
- "exception": raise an exception
1032+
- "warning": raise a warning, and ignore the missing dimensions
1033+
- "ignore": ignore the missing dimensions
1034+
**indexers_kwargs : {dim: indexer, ...}, optional
1035+
The keyword arguments form of ``indexers``.
1036+
10151037
See Also
10161038
--------
10171039
Dataset.isel
10181040
DataArray.sel
10191041
"""
1042+
10201043
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
1044+
10211045
if any(is_fancy_indexer(idx) for idx in indexers.values()):
1022-
ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop)
1046+
ds = self._to_temp_dataset()._isel_fancy(
1047+
indexers, drop=drop, missing_dims=missing_dims
1048+
)
10231049
return self._from_temp_dataset(ds)
10241050

10251051
# Much faster algorithm for when all indexers are ints, slices, one-dimensional
10261052
# lists, or zero or one-dimensional np.ndarray's
10271053

1028-
variable = self._variable.isel(indexers)
1054+
variable = self._variable.isel(indexers, missing_dims=missing_dims)
10291055

10301056
coords = {}
10311057
for coord_name, coord_value in self._coords.items():

xarray/core/dataset.py

+20-10
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
_check_inplace,
8888
_default,
8989
decode_numpy_dict_values,
90+
drop_dims_from_indexers,
9091
either_dict_or_kwargs,
9192
hashable,
9293
infix_dims,
@@ -1767,7 +1768,7 @@ def maybe_chunk(name, var, chunks):
17671768
return self._replace(variables)
17681769

17691770
def _validate_indexers(
1770-
self, indexers: Mapping[Hashable, Any]
1771+
self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise",
17711772
) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]:
17721773
""" Here we make sure
17731774
+ indexer has a valid keys
@@ -1777,9 +1778,7 @@ def _validate_indexers(
17771778
"""
17781779
from .dataarray import DataArray
17791780

1780-
invalid = indexers.keys() - self.dims.keys()
1781-
if invalid:
1782-
raise ValueError("dimensions %r do not exist" % invalid)
1781+
indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
17831782

17841783
# all indexers should be int, slice, np.ndarrays, or Variable
17851784
for k, v in indexers.items():
@@ -1875,6 +1874,7 @@ def isel(
18751874
self,
18761875
indexers: Mapping[Hashable, Any] = None,
18771876
drop: bool = False,
1877+
missing_dims: str = "raise",
18781878
**indexers_kwargs: Any,
18791879
) -> "Dataset":
18801880
"""Returns a new dataset with each array indexed along the specified
@@ -1896,6 +1896,12 @@ def isel(
18961896
drop : bool, optional
18971897
If ``drop=True``, drop coordinates variables indexed by integers
18981898
instead of making them scalar.
1899+
missing_dims : {"raise", "warn", "ignore"}, default "raise"
1900+
What to do if dimensions that should be selected from are not present in the
1901+
Dataset:
1902+
- "exception": raise an exception
1903+
- "warning": raise a warning, and ignore the missing dimensions
1904+
- "ignore": ignore the missing dimensions
18991905
**indexers_kwargs : {dim: indexer, ...}, optional
19001906
The keyword arguments form of ``indexers``.
19011907
One of indexers or indexers_kwargs must be provided.
@@ -1918,13 +1924,11 @@ def isel(
19181924
"""
19191925
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
19201926
if any(is_fancy_indexer(idx) for idx in indexers.values()):
1921-
return self._isel_fancy(indexers, drop=drop)
1927+
return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims)
19221928

19231929
# Much faster algorithm for when all indexers are ints, slices, one-dimensional
19241930
# lists, or zero or one-dimensional np.ndarray's
1925-
invalid = indexers.keys() - self.dims.keys()
1926-
if invalid:
1927-
raise ValueError("dimensions %r do not exist" % invalid)
1931+
indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
19281932

19291933
variables = {}
19301934
dims: Dict[Hashable, Tuple[int, ...]] = {}
@@ -1958,10 +1962,16 @@ def isel(
19581962
file_obj=self._file_obj,
19591963
)
19601964

1961-
def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset":
1965+
def _isel_fancy(
1966+
self,
1967+
indexers: Mapping[Hashable, Any],
1968+
*,
1969+
drop: bool,
1970+
missing_dims: str = "raise",
1971+
) -> "Dataset":
19621972
# Note: we need to preserve the original indexers variable in order to merge the
19631973
# coords below
1964-
indexers_list = list(self._validate_indexers(indexers))
1974+
indexers_list = list(self._validate_indexers(indexers, missing_dims))
19651975

19661976
variables: Dict[Hashable, Variable] = {}
19671977
indexes: Dict[Hashable, pd.Index] = {}

xarray/core/utils.py

+49
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
Sequence,
2525
Tuple,
2626
TypeVar,
27+
Union,
2728
cast,
2829
)
2930

@@ -738,6 +739,54 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable:
738739
return new_dim
739740

740741

742+
def drop_dims_from_indexers(
743+
indexers: Mapping[Hashable, Any],
744+
dims: Union[list, Mapping[Hashable, int]],
745+
missing_dims: str,
746+
) -> Mapping[Hashable, Any]:
747+
""" Depending on the setting of missing_dims, drop any dimensions from indexers that
748+
are not present in dims.
749+
750+
Parameters
751+
----------
752+
indexers : dict
753+
dims : sequence
754+
missing_dims : {"raise", "warn", "ignore"}
755+
"""
756+
757+
if missing_dims == "raise":
758+
invalid = indexers.keys() - set(dims)
759+
if invalid:
760+
raise ValueError(
761+
f"dimensions {invalid} do not exist. Expected one or more of {dims}"
762+
)
763+
764+
return indexers
765+
766+
elif missing_dims == "warn":
767+
768+
# don't modify input
769+
indexers = dict(indexers)
770+
771+
invalid = indexers.keys() - set(dims)
772+
if invalid:
773+
warnings.warn(
774+
f"dimensions {invalid} do not exist. Expected one or more of {dims}"
775+
)
776+
for key in invalid:
777+
indexers.pop(key)
778+
779+
return indexers
780+
781+
elif missing_dims == "ignore":
782+
return {key: val for key, val in indexers.items() if key in dims}
783+
784+
else:
785+
raise ValueError(
786+
f"Unrecognised option {missing_dims} for missing_dims argument"
787+
)
788+
789+
741790
# Singleton type, as per https://github.com/python/typing/pull/240
742791
class Default(Enum):
743792
token = 0

xarray/core/variable.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
OrderedSet,
2929
_default,
3030
decode_numpy_dict_values,
31+
drop_dims_from_indexers,
3132
either_dict_or_kwargs,
3233
ensure_us_time_resolution,
3334
infix_dims,
@@ -1030,6 +1031,7 @@ def _to_dense(self):
10301031
def isel(
10311032
self: VariableType,
10321033
indexers: Mapping[Hashable, Any] = None,
1034+
missing_dims: str = "raise",
10331035
**indexers_kwargs: Any,
10341036
) -> VariableType:
10351037
"""Return a new array indexed along the specified dimension(s).
@@ -1039,6 +1041,12 @@ def isel(
10391041
**indexers : {dim: indexer, ...}
10401042
Keyword arguments with names matching dimensions and values given
10411043
by integers, slice objects or arrays.
1044+
missing_dims : {"raise", "warn", "ignore"}, default "raise"
1045+
What to do if dimensions that should be selected from are not present in the
1046+
DataArray:
1047+
- "exception": raise an exception
1048+
- "warning": raise a warning, and ignore the missing dimensions
1049+
- "ignore": ignore the missing dimensions
10421050
10431051
Returns
10441052
-------
@@ -1050,11 +1058,7 @@ def isel(
10501058
"""
10511059
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
10521060

1053-
invalid = indexers.keys() - set(self.dims)
1054-
if invalid:
1055-
raise ValueError(
1056-
f"dimensions {invalid} do not exist. Expected one or more of {self.dims}"
1057-
)
1061+
indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
10581062

10591063
key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)
10601064
return self[key]

xarray/tests/test_dataarray.py

+13
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,19 @@ def test_isel(self):
781781
assert_identical(self.dv, self.dv.isel(x=slice(None)))
782782
assert_identical(self.dv[:3], self.dv.isel(x=slice(3)))
783783
assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5)))
784+
with raises_regex(
785+
ValueError,
786+
r"dimensions {'not_a_dim'} do not exist. Expected "
787+
r"one or more of \('x', 'y'\)",
788+
):
789+
self.dv.isel(not_a_dim=0)
790+
with pytest.warns(
791+
UserWarning,
792+
match=r"dimensions {'not_a_dim'} do not exist. "
793+
r"Expected one or more of \('x', 'y'\)",
794+
):
795+
self.dv.isel(not_a_dim=0, missing_dims="warn")
796+
assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore"))
784797

785798
def test_isel_types(self):
786799
# regression test for #1405

xarray/tests/test_dataset.py

+15
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,21 @@ def test_isel(self):
10231023

10241024
with pytest.raises(ValueError):
10251025
data.isel(not_a_dim=slice(0, 2))
1026+
with raises_regex(
1027+
ValueError,
1028+
r"dimensions {'not_a_dim'} do not exist. Expected "
1029+
r"one or more of "
1030+
r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
1031+
):
1032+
data.isel(not_a_dim=slice(0, 2))
1033+
with pytest.warns(
1034+
UserWarning,
1035+
match=r"dimensions {'not_a_dim'} do not exist. "
1036+
r"Expected one or more of "
1037+
r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
1038+
):
1039+
data.isel(not_a_dim=slice(0, 2), missing_dims="warn")
1040+
assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore"))
10261041

10271042
ret = data.isel(dim1=0)
10281043
assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims

xarray/tests/test_variable.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -1254,8 +1254,19 @@ def test_isel(self):
12541254
assert_identical(v.isel(x=0), v[:, 0])
12551255
assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]])
12561256
assert_identical(v.isel(time=[]), v[[]])
1257-
with raises_regex(ValueError, "do not exist"):
1257+
with raises_regex(
1258+
ValueError,
1259+
r"dimensions {'not_a_dim'} do not exist. Expected one or more of "
1260+
r"\('time', 'x'\)",
1261+
):
12581262
v.isel(not_a_dim=0)
1263+
with pytest.warns(
1264+
UserWarning,
1265+
match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of "
1266+
r"\('time', 'x'\)",
1267+
):
1268+
v.isel(not_a_dim=0, missing_dims="warn")
1269+
assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore"))
12591270

12601271
def test_index_0d_numpy_string(self):
12611272
# regression test to verify our work around for indexing 0d strings

0 commit comments

Comments
 (0)