pydata · dcherian · Nov 26, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -66,6 +66,9 @@ New Features
   By `Sam Levang <https://github.com/slevang>`_.
 - Speed up loading of large zarr stores using dask arrays. (:issue:`8902`)
   By `Deepak Cherian <https://github.com/dcherian>`_.
+- Better support wrapping additional array types (e.g. ``cupy`` or ``jax``) by calling generalized
+  duck array operations throughout more xarray methods. (:issue:`7848`, :pull:`9798`).
+  By `Sam Levang <https://github.com/slevang>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~

diff --git a/xarray/core/array_api_compat.py b/xarray/core/array_api_compat.py
@@ -1,5 +1,7 @@
 import numpy as np
 
+from xarray.namedarray.pycompat import array_type
+
 
 def is_weak_scalar_type(t):
     return isinstance(t, bool | int | float | complex | str | bytes)
@@ -42,3 +44,39 @@ def result_type(*arrays_and_dtypes, xp) -> np.dtype:
         return xp.result_type(*arrays_and_dtypes)
     else:
         return _future_array_api_result_type(*arrays_and_dtypes, xp=xp)
+
+
+def get_array_namespace(*values):
+    def _get_single_namespace(x):
+        if hasattr(x, "__array_namespace__"):
+            return x.__array_namespace__()
+        elif isinstance(x, array_type("cupy")):
+            # cupy is fully compliant from xarray's perspective, but will not expose
+            # __array_namespace__ until at least v14. Special case it for now
+            import cupy as cp
+
+            return cp
+        else:
+            return np
+
+    namespaces = {_get_single_namespace(t) for t in values}
+    non_numpy = namespaces - {np}
+
+    if len(non_numpy) > 1:
+        names = [module.__name__ for module in non_numpy]
+        raise TypeError(f"Mixed array types {names} are not supported.")
+    elif non_numpy:
+        [xp] = non_numpy
+    else:
+        xp = np
+
+    return xp
+
+
+def to_like_array(array, like):
+    # Mostly for cupy compatibility, because cupy binary ops require all cupy arrays
+    xp = get_array_namespace(like)
+    if xp is not np:
+        return xp.asarray(array)
+    # avoid casting things like pint quantities to numpy arrays
+    return array
diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -496,7 +496,7 @@ def clip(
             keep_attrs = _get_keep_attrs(default=True)
 
         return apply_ufunc(
-            np.clip, self, min, max, keep_attrs=keep_attrs, dask="allowed"
+            duck_array_ops.clip, self, min, max, keep_attrs=keep_attrs, dask="allowed"
         )
 
     def get_index(self, key: Hashable) -> pd.Index:
@@ -1760,7 +1760,8 @@ def _full_like_variable(
             **from_array_kwargs,
         )
     else:
-        data = np.full_like(other.data, fill_value, dtype=dtype)
+        xp = duck_array_ops.get_array_namespace(other.data)
+        data = xp.full_like(other.data, fill_value, dtype=dtype)
 
     return Variable(dims=other.dims, data=data, attrs=other.attrs)
 

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
@@ -24,6 +24,7 @@
 
 from xarray.core import dtypes, duck_array_ops, utils
 from xarray.core.alignment import align, deep_align
+from xarray.core.array_api_compat import to_like_array
 from xarray.core.common import zeros_like
 from xarray.core.duck_array_ops import datetime_to_numeric
 from xarray.core.formatting import limit_lines
@@ -1702,7 +1703,7 @@ def cross(
             )
 
     c = apply_ufunc(
-        np.cross,
+        duck_array_ops.cross,
         a,
         b,
         input_core_dims=[[dim], [dim]],
@@ -2170,13 +2171,14 @@ def _calc_idxminmax(
         chunks = dict(zip(array.dims, array.chunks, strict=True))
         dask_coord = chunkmanager.from_array(array[dim].data, chunks=chunks[dim])
         data = dask_coord[duck_array_ops.ravel(indx.data)]
-        res = indx.copy(data=duck_array_ops.reshape(data, indx.shape))
-        # we need to attach back the dim name
-        res.name = dim
     else:
-        res = array[dim][(indx,)]
-        # The dim is gone but we need to remove the corresponding coordinate.
-        del res.coords[dim]
+        arr_coord = to_like_array(array[dim].data, array.data)
+        data = arr_coord[duck_array_ops.ravel(indx.data)]
+
+    # rebuild like the argmin/max output, and rename as the dim name
+    data = duck_array_ops.reshape(data, indx.shape)
+    res = indx.copy(data=data)
+    res.name = dim
 
     if skipna or (skipna is None and array.dtype.kind in na_dtypes):
         # Put the NaN values back in after removing them

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -55,6 +55,7 @@
     align,
 )
 from xarray.core.arithmetic import DatasetArithmetic
+from xarray.core.array_api_compat import to_like_array
 from xarray.core.common import (
     DataWithCoords,
     _contains_datetime_like_objects,
@@ -127,7 +128,7 @@
     calculate_dimensions,
 )
 from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager
-from xarray.namedarray.pycompat import array_type, is_chunked_array
+from xarray.namedarray.pycompat import array_type, is_chunked_array, to_numpy
 from xarray.plot.accessor import DatasetPlotAccessor
 from xarray.util.deprecation_helpers import _deprecate_positional_args, deprecate_dims
 
@@ -6622,7 +6623,7 @@ def dropna(
             array = self._variables[k]
             if dim in array.dims:
                 dims = [d for d in array.dims if d != dim]
-                count += np.asarray(array.count(dims))
+                count += to_numpy(array.count(dims).data)
                 size += math.prod([self.sizes[d] for d in dims])
 
         if thresh is not None:
@@ -8736,16 +8737,17 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False):
                     coord_names.add(k)
             else:
                 if k in self.data_vars and dim in v.dims:
+                    coord_data = to_like_array(coord_var.data, like=v.data)
                     if _contains_datetime_like_objects(v):
                         v = datetime_to_numeric(v, datetime_unit=datetime_unit)
                     if cumulative:
                         integ = duck_array_ops.cumulative_trapezoid(
-                            v.data, coord_var.data, axis=v.get_axis_num(dim)
+                            v.data, coord_data, axis=v.get_axis_num(dim)
                         )
                         v_dims = v.dims
                     else:
                         integ = duck_array_ops.trapz(
-                            v.data, coord_var.data, axis=v.get_axis_num(dim)
+                            v.data, coord_data, axis=v.get_axis_num(dim)
                         )
                         v_dims = list(v.dims)
                         v_dims.remove(dim)

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
@@ -18,21 +18,16 @@
 import pandas as pd
 from numpy import all as array_all  # noqa: F401
 from numpy import any as array_any  # noqa: F401
-from numpy import concatenate as _concatenate
 from numpy import (  # noqa: F401
-    full_like,
-    gradient,
     isclose,
-    isin,
     isnat,
     take,
-    tensordot,
-    transpose,
     unravel_index,
 )
 from pandas.api.types import is_extension_array_dtype
 
 from xarray.core import dask_array_compat, dask_array_ops, dtypes, nputils
+from xarray.core.array_api_compat import get_array_namespace
 from xarray.core.options import OPTIONS
 from xarray.core.utils import is_duck_array, is_duck_dask_array, module_available
 from xarray.namedarray.parallelcompat import get_chunked_array_type
@@ -52,28 +47,6 @@
 dask_available = module_available("dask")
 
 
-def get_array_namespace(*values):
-    def _get_array_namespace(x):
-        if hasattr(x, "__array_namespace__"):
-            return x.__array_namespace__()
-        else:
-            return np
-
-    namespaces = {_get_array_namespace(t) for t in values}
-    non_numpy = namespaces - {np}
-
-    if len(non_numpy) > 1:
-        raise TypeError(
-            "cannot deal with more than one type supporting the array API at the same time"
-        )
-    elif non_numpy:
-        [xp] = non_numpy
-    else:
-        xp = np
-
-    return xp
-
-
 def einsum(*args, **kwargs):
     from xarray.core.options import OPTIONS
 
@@ -82,7 +55,23 @@ def einsum(*args, **kwargs):
 
         return opt_einsum.contract(*args, **kwargs)
     else:
-        return np.einsum(*args, **kwargs)
+        xp = get_array_namespace(*args)
+        return xp.einsum(*args, **kwargs)
+
+
+def tensordot(*args, **kwargs):
+    xp = get_array_namespace(*args)
+    return xp.tensordot(*args, **kwargs)
+
+
+def cross(*args, **kwargs):
+    xp = get_array_namespace(*args)
+    return xp.cross(*args, **kwargs)
+
+
+def gradient(f, *varargs, axis=None, edge_order=1):
+    xp = get_array_namespace(f)
+    return xp.gradient(f, *varargs, axis=axis, edge_order=edge_order)
 
 
 def _dask_or_eager_func(
@@ -131,15 +120,20 @@ def fail_on_dask_array_input(values, msg=None, func_name=None):
     "masked_invalid", eager_module=np.ma, dask_module="dask.array.ma"
 )
 
-# sliding_window_view will not dispatch arbitrary kwargs (automatic_rechunk),
-# so we need to hand-code this.
-sliding_window_view = _dask_or_eager_func(
-    "sliding_window_view",
-    eager_module=np.lib.stride_tricks,
-    dask_module=dask_array_compat,
-    dask_only_kwargs=("automatic_rechunk",),
-    numpy_only_kwargs=("subok", "writeable"),
-)
+
+def sliding_window_view(array, window_shape, axis=None, **kwargs):
+    # TODO: some libraries (e.g. jax) don't have this, implement an alternative?
+    xp = get_array_namespace(array)
+    # sliding_window_view will not dispatch arbitrary kwargs (automatic_rechunk),
+    # so we need to hand-code this.
+    func = _dask_or_eager_func(
+        "sliding_window_view",
+        eager_module=xp.lib.stride_tricks,
+        dask_module=dask_array_compat,
+        dask_only_kwargs=("automatic_rechunk",),
+        numpy_only_kwargs=("subok", "writeable"),
+    )
+    return func(array, window_shape, axis=axis, **kwargs)
 
 
 def round(array):
@@ -172,7 +166,8 @@ def isnull(data):
         )
     ):
         # these types cannot represent missing values
-        return full_like(data, dtype=bool, fill_value=False)
+        dtype = xp.bool_ if hasattr(xp, "bool_") else xp.bool
+        return full_like(data, dtype=dtype, fill_value=False)
     else:
         # at this point, array should have dtype=object
         if isinstance(data, np.ndarray) or is_extension_array_dtype(data):
@@ -213,11 +208,23 @@ def cumulative_trapezoid(y, x, axis):
 
     # Pad so that 'axis' has same length in result as it did in y
     pads = [(1, 0) if i == axis else (0, 0) for i in range(y.ndim)]
-    integrand = np.pad(integrand, pads, mode="constant", constant_values=0.0)
+
+    xp = get_array_namespace(y, x)
+    integrand = xp.pad(integrand, pads, mode="constant", constant_values=0.0)
 
     return cumsum(integrand, axis=axis, skipna=False)
 
 
+def full_like(a, fill_value, **kwargs):
+    xp = get_array_namespace(a)
+    return xp.full_like(a, fill_value, **kwargs)
+
+
+def empty_like(a, **kwargs):
+    xp = get_array_namespace(a)
+    return xp.empty_like(a, **kwargs)
+
+
 def astype(data, dtype, **kwargs):
     if hasattr(data, "__array_namespace__"):
         xp = get_array_namespace(data)
@@ -348,7 +355,8 @@ def array_notnull_equiv(arr1, arr2):
 
 def count(data, axis=None):
     """Count the number of non-NA in this array along the given axis or axes"""
-    return np.sum(np.logical_not(isnull(data)), axis=axis)
+    xp = get_array_namespace(data)
+    return xp.sum(xp.logical_not(isnull(data)), axis=axis)
 
 
 def sum_where(data, axis=None, dtype=None, where=None):
@@ -363,7 +371,7 @@ def sum_where(data, axis=None, dtype=None, where=None):
 
 def where(condition, x, y):
     """Three argument where() with better dtype promotion rules."""
-    xp = get_array_namespace(condition)
+    xp = get_array_namespace(condition, x, y)
     return xp.where(condition, *as_shared_dtype([x, y], xp=xp))
 
 
@@ -380,15 +388,25 @@ def fillna(data, other):
     return where(notnull(data), data, other)
 
 
+def logical_not(data):
+    xp = get_array_namespace(data)
+    return xp.logical_not(data)
+
+
+def clip(data, min=None, max=None):
+    xp = get_array_namespace(data)
+    return xp.clip(data, min, max)
+
+
 def concatenate(arrays, axis=0):
     """concatenate() with better dtype promotion rules."""
-    # TODO: remove the additional check once `numpy` adds `concat` to its array namespace
-    if hasattr(arrays[0], "__array_namespace__") and not isinstance(
-        arrays[0], np.ndarray
-    ):
-        xp = get_array_namespace(arrays[0])
+    # TODO: `concat` is the xp compliant name, but fallback to concatenate for
+    # older numpy and for cupy
+    xp = get_array_namespace(*arrays)
+    if hasattr(xp, "concat"):
         return xp.concat(as_shared_dtype(arrays, xp=xp), axis=axis)
-    return _concatenate(as_shared_dtype(arrays), axis=axis)
+    else:
+        return xp.concatenate(as_shared_dtype(arrays, xp=xp), axis=axis)
 
 
 def stack(arrays, axis=0):
@@ -406,6 +424,26 @@ def ravel(array):
     return reshape(array, (-1,))
 
 
+def transpose(array, axes=None):
+    xp = get_array_namespace(array)
+    return xp.transpose(array, axes)
+
+
+def moveaxis(array, source, destination):
+    xp = get_array_namespace(array)
+    return xp.moveaxis(array, source, destination)
+
+
+def pad(array, pad_width, **kwargs):
+    xp = get_array_namespace(array)
+    return xp.pad(array, pad_width, **kwargs)
+
+
+def quantile(array, q, axis=None, **kwargs):
+    xp = get_array_namespace(array)
+    return xp.quantile(array, q, axis=axis, **kwargs)
+
+
 @contextlib.contextmanager
 def _ignore_warnings_if(condition):
     if condition:
@@ -747,6 +785,11 @@ def last(values, axis, skipna=None):
     return take(values, -1, axis=axis)
 
 
+def isin(element, test_elements, **kwargs):
+    xp = get_array_namespace(element, test_elements)
+    return xp.isin(element, test_elements, **kwargs)
+
+
 def least_squares(lhs, rhs, rcond=None, skipna=False):
     """Return the coefficients and residuals of a least-squares fit."""
     if is_duck_dask_array(rhs):