Add missing_dims argument allowing isel() to ignore missing dimensions (#3923)

johnomotani · web-flow · commit 9b5140e07112 · 2020-04-03T15:47:07.000-04:00
* Add missing_dims argument allowing isel() to ignore missing dimensions

* Add missing_dims to whats-new.rst

* Fix typos in TestVariable.test_isel()

* Change values for missing_dims argument to {'raise', 'warn', 'ignore'}

Matches the possible values used elsewhere for drop_vars arguments.

* Add missing_dims argument Dataset.isel()

* Mention Dataset.isel in whats-new.rst description of missing_dims
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -35,6 +35,11 @@ New Features
   :py:func:`combine_by_coords` and :py:func:`combine_nested` using
   combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`)
   By `John Omotani <https://github.com/johnomotani>`_
+- 'missing_dims' argument to :py:meth:`Dataset.isel`,
+  `:py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing
+  the exception when a dimension passed to ``isel`` is not present with a
+  warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`)
+  By `John Omotani <https://github.com/johnomotani>`_
 - Limited the length of array items with long string reprs to a
   reasonable width (:pull:`3900`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -1007,25 +1007,51 @@ def isel(
         self,
         indexers: Mapping[Hashable, Any] = None,
         drop: bool = False,
+        missing_dims: str = "raise",
         **indexers_kwargs: Any,
     ) -> "DataArray":
         """Return a new DataArray whose data is given by integer indexing
         along the specified dimension(s).
 
+        Parameters
+        ----------
+        indexers : dict, optional
+            A dict with keys matching dimensions and values given
+            by integers, slice objects or arrays.
+            indexer can be a integer, slice, array-like or DataArray.
+            If DataArrays are passed as indexers, xarray-style indexing will be
+            carried out. See :ref:`indexing` for the details.
+            One of indexers or indexers_kwargs must be provided.
+        drop : bool, optional
+            If ``drop=True``, drop coordinates variables indexed by integers
+            instead of making them scalar.
+        missing_dims : {"raise", "warn", "ignore"}, default "raise"
+            What to do if dimensions that should be selected from are not present in the
+            DataArray:
+            - "exception": raise an exception
+            - "warning": raise a warning, and ignore the missing dimensions
+            - "ignore": ignore the missing dimensions
+        **indexers_kwargs : {dim: indexer, ...}, optional
+            The keyword arguments form of ``indexers``.
+
         See Also
         --------
         Dataset.isel
         DataArray.sel
         """
+
         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
+
         if any(is_fancy_indexer(idx) for idx in indexers.values()):
-            ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop)
+            ds = self._to_temp_dataset()._isel_fancy(
+                indexers, drop=drop, missing_dims=missing_dims
+            )
             return self._from_temp_dataset(ds)
 
         # Much faster algorithm for when all indexers are ints, slices, one-dimensional
         # lists, or zero or one-dimensional np.ndarray's
 
-        variable = self._variable.isel(indexers)
+        variable = self._variable.isel(indexers, missing_dims=missing_dims)
 
         coords = {}
         for coord_name, coord_value in self._coords.items():
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -87,6 +87,7 @@
     _check_inplace,
     _default,
     decode_numpy_dict_values,
+    drop_dims_from_indexers,
     either_dict_or_kwargs,
     hashable,
     infix_dims,
@@ -1767,7 +1768,7 @@ def maybe_chunk(name, var, chunks):
         return self._replace(variables)
 
     def _validate_indexers(
-        self, indexers: Mapping[Hashable, Any]
+        self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise",
     ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]:
         """ Here we make sure
         + indexer has a valid keys
@@ -1777,9 +1778,7 @@ def _validate_indexers(
         """
         from .dataarray import DataArray
 
-        invalid = indexers.keys() - self.dims.keys()
-        if invalid:
-            raise ValueError("dimensions %r do not exist" % invalid)
+        indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
 
         # all indexers should be int, slice, np.ndarrays, or Variable
         for k, v in indexers.items():
@@ -1875,6 +1874,7 @@ def isel(
         self,
         indexers: Mapping[Hashable, Any] = None,
         drop: bool = False,
+        missing_dims: str = "raise",
         **indexers_kwargs: Any,
     ) -> "Dataset":
         """Returns a new dataset with each array indexed along the specified
@@ -1896,6 +1896,12 @@ def isel(
         drop : bool, optional
             If ``drop=True``, drop coordinates variables indexed by integers
             instead of making them scalar.
+        missing_dims : {"raise", "warn", "ignore"}, default "raise"
+            What to do if dimensions that should be selected from are not present in the
+            Dataset:
+            - "exception": raise an exception
+            - "warning": raise a warning, and ignore the missing dimensions
+            - "ignore": ignore the missing dimensions
         **indexers_kwargs : {dim: indexer, ...}, optional
             The keyword arguments form of ``indexers``.
             One of indexers or indexers_kwargs must be provided.
@@ -1918,13 +1924,11 @@ def isel(
         """
         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
         if any(is_fancy_indexer(idx) for idx in indexers.values()):
-            return self._isel_fancy(indexers, drop=drop)
+            return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims)
 
         # Much faster algorithm for when all indexers are ints, slices, one-dimensional
         # lists, or zero or one-dimensional np.ndarray's
-        invalid = indexers.keys() - self.dims.keys()
-        if invalid:
-            raise ValueError("dimensions %r do not exist" % invalid)
+        indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
 
         variables = {}
         dims: Dict[Hashable, Tuple[int, ...]] = {}
@@ -1958,10 +1962,16 @@ def isel(
             file_obj=self._file_obj,
         )
 
-    def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset":
+    def _isel_fancy(
+        self,
+        indexers: Mapping[Hashable, Any],
+        *,
+        drop: bool,
+        missing_dims: str = "raise",
+    ) -> "Dataset":
         # Note: we need to preserve the original indexers variable in order to merge the
         # coords below
-        indexers_list = list(self._validate_indexers(indexers))
+        indexers_list = list(self._validate_indexers(indexers, missing_dims))
 
         variables: Dict[Hashable, Variable] = {}
         indexes: Dict[Hashable, pd.Index] = {}
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
@@ -24,6 +24,7 @@
     Sequence,
     Tuple,
     TypeVar,
+    Union,
     cast,
 )
 
@@ -738,6 +739,54 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable:
     return new_dim
 
 
+def drop_dims_from_indexers(
+    indexers: Mapping[Hashable, Any],
+    dims: Union[list, Mapping[Hashable, int]],
+    missing_dims: str,
+) -> Mapping[Hashable, Any]:
+    """ Depending on the setting of missing_dims, drop any dimensions from indexers that
+    are not present in dims.
+
+    Parameters
+    ----------
+    indexers : dict
+    dims : sequence
+    missing_dims : {"raise", "warn", "ignore"}
+    """
+
+    if missing_dims == "raise":
+        invalid = indexers.keys() - set(dims)
+        if invalid:
+            raise ValueError(
+                f"dimensions {invalid} do not exist. Expected one or more of {dims}"
+            )
+
+        return indexers
+
+    elif missing_dims == "warn":
+
+        # don't modify input
+        indexers = dict(indexers)
+
+        invalid = indexers.keys() - set(dims)
+        if invalid:
+            warnings.warn(
+                f"dimensions {invalid} do not exist. Expected one or more of {dims}"
+            )
+        for key in invalid:
+            indexers.pop(key)
+
+        return indexers
+
+    elif missing_dims == "ignore":
+        return {key: val for key, val in indexers.items() if key in dims}
+
+    else:
+        raise ValueError(
+            f"Unrecognised option {missing_dims} for missing_dims argument"
+        )
+
+
 # Singleton type, as per https://github.com/python/typing/pull/240
 class Default(Enum):
     token = 0
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -28,6 +28,7 @@
     OrderedSet,
     _default,
     decode_numpy_dict_values,
+    drop_dims_from_indexers,
     either_dict_or_kwargs,
     ensure_us_time_resolution,
     infix_dims,
@@ -1030,6 +1031,7 @@ def _to_dense(self):
     def isel(
         self: VariableType,
         indexers: Mapping[Hashable, Any] = None,
+        missing_dims: str = "raise",
         **indexers_kwargs: Any,
     ) -> VariableType:
         """Return a new array indexed along the specified dimension(s).
@@ -1039,6 +1041,12 @@ def isel(
         **indexers : {dim: indexer, ...}
             Keyword arguments with names matching dimensions and values given
             by integers, slice objects or arrays.
+        missing_dims : {"raise", "warn", "ignore"}, default "raise"
+            What to do if dimensions that should be selected from are not present in the
+            DataArray:
+            - "exception": raise an exception
+            - "warning": raise a warning, and ignore the missing dimensions
+            - "ignore": ignore the missing dimensions
 
         Returns
         -------
@@ -1050,11 +1058,7 @@ def isel(
         """
         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
 
-        invalid = indexers.keys() - set(self.dims)
-        if invalid:
-            raise ValueError(
-                f"dimensions {invalid} do not exist. Expected one or more of {self.dims}"
-            )
+        indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
 
         key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)
         return self[key]
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -781,6 +781,19 @@ def test_isel(self):
         assert_identical(self.dv, self.dv.isel(x=slice(None)))
         assert_identical(self.dv[:3], self.dv.isel(x=slice(3)))
         assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5)))
+        with raises_regex(
+            ValueError,
+            r"dimensions {'not_a_dim'} do not exist. Expected "
+            r"one or more of \('x', 'y'\)",
+        ):
+            self.dv.isel(not_a_dim=0)
+        with pytest.warns(
+            UserWarning,
+            match=r"dimensions {'not_a_dim'} do not exist. "
+            r"Expected one or more of \('x', 'y'\)",
+        ):
+            self.dv.isel(not_a_dim=0, missing_dims="warn")
+        assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore"))
 
     def test_isel_types(self):
         # regression test for #1405
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -1023,6 +1023,21 @@ def test_isel(self):
 
         with pytest.raises(ValueError):
             data.isel(not_a_dim=slice(0, 2))
+        with raises_regex(
+            ValueError,
+            r"dimensions {'not_a_dim'} do not exist. Expected "
+            r"one or more of "
+            r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
+        ):
+            data.isel(not_a_dim=slice(0, 2))
+        with pytest.warns(
+            UserWarning,
+            match=r"dimensions {'not_a_dim'} do not exist. "
+            r"Expected one or more of "
+            r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
+        ):
+            data.isel(not_a_dim=slice(0, 2), missing_dims="warn")
+        assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore"))
 
         ret = data.isel(dim1=0)
         assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
@@ -1254,8 +1254,19 @@ def test_isel(self):
         assert_identical(v.isel(x=0), v[:, 0])
         assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]])
         assert_identical(v.isel(time=[]), v[[]])
-        with raises_regex(ValueError, "do not exist"):
+        with raises_regex(
+            ValueError,
+            r"dimensions {'not_a_dim'} do not exist. Expected one or more of "
+            r"\('time', 'x'\)",
+        ):
             v.isel(not_a_dim=0)
+        with pytest.warns(
+            UserWarning,
+            match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of "
+            r"\('time', 'x'\)",
+        ):
+            v.isel(not_a_dim=0, missing_dims="warn")
+        assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore"))
 
     def test_index_0d_numpy_string(self):
         # regression test to verify our work around for indexing 0d strings