pydata · Illviljan · Mar 27, 2022 · Dec 10, 2021 · Dec 10, 2021 · Dec 10, 2021
diff --git a/doc/api.rst b/doc/api.rst
@@ -943,6 +943,7 @@ Dataset
 
    DatasetWeighted
    DatasetWeighted.mean
+   DatasetWeighted.quantile
    DatasetWeighted.sum
    DatasetWeighted.std
    DatasetWeighted.var
@@ -957,6 +958,7 @@ DataArray
 
    DataArrayWeighted
    DataArrayWeighted.mean
+   DataArrayWeighted.quantile
    DataArrayWeighted.sum
    DataArrayWeighted.std
    DataArrayWeighted.var

diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst
@@ -263,7 +263,7 @@ Weighted array reductions
 
 :py:class:`DataArray` and :py:class:`Dataset` objects include :py:meth:`DataArray.weighted`
 and :py:meth:`Dataset.weighted` array reduction methods. They currently
-support weighted ``sum``, ``mean``, ``std`` and ``var``.
+support weighted ``sum``, ``mean``, ``std``, ``var`` and ``quantile``.
 
 .. ipython:: python
 
@@ -291,6 +291,12 @@ Calculate the weighted mean:
 
     weighted_prec.mean(dim="month")
 
+Calculate the weighted quantile:
+
+.. ipython:: python
+
+    weighted_prec.quantile(q=0.5, dim="month")
+
 The weighted sum corresponds to:
 
 .. ipython:: python

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -77,6 +77,8 @@ New Features
 ~~~~~~~~~~~~
 - New top-level function :py:func:`cross`. (:issue:`3279`, :pull:`5365`).
   By `Jimmy Westling <https://github.com/illviljan>`_.
+- Add ``quantile`` method to :py:class:`~core.weighted.DatasetWeighted` and :py:class:`~core.weighted.DataArrayWeighted` (:pull:`6059`).
+  By `Christian Jauvin <https://github.com/cjauvin>`_ and `David Huard <https://github.com/huard>`_.
 - ``keep_attrs`` support for :py:func:`where` (:issue:`4141`, :issue:`4682`, :pull:`4687`).
   By `Justus Magin <https://github.com/keewis>`_.
 - Enable the limit option for dask array in the following methods :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill` and :py:meth:`Dataset.bfill` (:issue:`6112`)

diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py
@@ -1,12 +1,33 @@
-from typing import TYPE_CHECKING, Generic, Hashable, Iterable, Optional, Union, cast
+from typing import (
+    TYPE_CHECKING,
+    Generic,
+    Hashable,
+    Iterable,
+    Literal,
+    Optional,
+    Sequence,
+    Union,
+    cast,
+)
 
 import numpy as np
 
-from . import duck_array_ops
-from .computation import dot
+from . import duck_array_ops, utils
+from .alignment import align
+from .computation import apply_ufunc, dot
 from .pycompat import is_duck_dask_array
 from .types import T_Xarray
 
+# Weighted quantile methods are a subset of the numpy supported quantile methods.
+QUANTILE_METHODS = Literal[
+    "linear",
+    "interpolated_inverted_cdf",
+    "hazen",
+    "weibull",
+    "median_unbiased",
+    "normal_unbiased",
+]
+
 _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """
     Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s).
 
@@ -54,6 +75,61 @@
         New {cls} object with the sum of the weights over the given dimension.
     """
 
+_WEIGHTED_QUANTILE_DOCSTRING_TEMPLATE = """
+    Apply a a weighted ``quantile`` to this {cls}'s data along some dimension(s).
+
+    Weights are interpreted as *sampling weights* (or probability weights) and
+    describe how a sample is scaled to the whole population [1]_. Although there are
+    other possible interpretations for weights, *precision weights* describing the
+    precision of observations, or *frequency weights* counting the number of identical
+    observations, they are not implemented here.
+
+    For compatibility with NumPy's non-weighted ``quantile`` (which is used by
+    ``DataArray.quantile`` and ``Dataset.quantile``), the only interpolation
+    method supported by this weighted version corresponds to the default "linear"
+    option of ``numpy.quantile``. This is "Type 7" option, described in Hyndman
+    and Fan (1996) [2]_. The implementation is largely inspired from A. Akinshin's [3]_.
+
+    Parameters
+    ----------
+    q : float or sequence of float
+        Quantile to compute, which must be between 0 and 1 inclusive.
+    dim : str or sequence of str, optional
+        Dimension(s) over which to apply the weighted ``quantile``.
+    skipna : bool, optional
+        If True, skip missing values (as marked by NaN). By default, only
+        skips missing values for float dtypes; other dtypes either do not
+        have a sentinel missing value (int) or skipna=True has not been
+        implemented (object, datetime64 or timedelta64).
+    keep_attrs : bool, optional
+        If True, the attributes (``attrs``) will be copied from the original
+        object to the new one.  If False (default), the new object will be
+        returned without attributes.
+
+    Returns
+    -------
+    quantiles : {cls}
+        New {cls} object with weighted ``quantile`` applied to its data and
+        the indicated dimension(s) removed.
+
+    See Also
+    --------
+    numpy.nanquantile, pandas.Series.quantile, Dataset.quantile
+    DataArray.quantile
+
+    Notes
+    -----
+    Returns NaN if the ``weights`` sum to 0.0 along the reduced
+    dimension(s).
+
+    References
+    ----------
+    .. [1] https://notstatschat.rbind.io/2020/08/04/weights-in-statistics/
+    .. [2] Hyndman, R. J. & Fan, Y. (1996). Sample Quantiles in Statistical Packages.
+           The American Statistician, 50(4), 361–365. https://doi.org/10.2307/2684934
+    .. [3] https://aakinshin.net/posts/weighted-quantiles
+    """
+
 
 if TYPE_CHECKING:
     from .dataarray import DataArray
@@ -239,6 +315,123 @@ def _weighted_std(
 
         return cast("DataArray", np.sqrt(self._weighted_var(da, dim, skipna)))
 
+    def _weighted_quantile(
+        self,
+        da: "DataArray",
+        q: Union[float, Iterable[float]],
+        dim: Union[Hashable, Iterable[Hashable]] = None,
+        skipna: Optional[bool] = None,
+        keep_attrs: Optional[bool] = None,
+    ) -> "DataArray":
+        """Apply a weighted ``quantile`` to a DataArray along some dimension(s)."""
+
+        def _get_h(n: float, q: np.ndarray, method: QUANTILE_METHODS) -> np.ndarray:
+            """Return the interpolation parameter."""
+            # Note that options are not yet exposed in the public API.
+            if method == "linear":
+                h = (n - 1) * q + 1
+            elif method == "interpolated_inverted_cdf":
+                h = n * q
+            elif method == "hazen":
+                h = n * q + 0.5
+            elif method == "weibull":
+                h = (n + 1) * q
+            elif method == "median_unbiased":
+                h = (n + 1 / 3) * q + 1 / 3
+            elif method == "normal_unbiased":
+                h = (n + 1 / 4) * q + 3 / 8
+            else:
+                raise ValueError(f"Invalid method: {method}.")
+            return h.clip(1, n)
+
+        def _weighted_quantile_1d(
+            data: np.ndarray,
+            weights: np.ndarray,
+            q: np.ndarray,
+            skipna: bool,
+            method: QUANTILE_METHODS = "linear",
+        ) -> np.ndarray:
+            # This algorithm has been adapted from:
+            #   https://aakinshin.net/posts/weighted-quantiles/#reference-implementation
+            is_nan = np.isnan(data)
+            if skipna:
+                # Remove nans from data and weights
+                not_nan = ~is_nan
+                data = data[not_nan]
+                weights = weights[not_nan]
+            elif is_nan.any():
+                # Return nan if data contains any nan
+                return np.full(q.size, np.nan)
+
+            # Filter out data (and weights) associated with zero weights, which also flattens them
+            nonzero_weights = weights != 0
+            data = data[nonzero_weights]
+            weights = weights[nonzero_weights]
+            n = data.size
+
+            if n == 0:
+                # Possibly empty after nan or zero weight filtering above
+                return np.full(q.size, np.nan)
+
+            # Kish's effective sample size
+            nw = weights.sum() ** 2 / (weights**2).sum()
+
+            # Sort data and weights
+            sorter = np.argsort(data)
+            data = data[sorter]
+            weights = weights[sorter]
+
+            # Normalize and sum the weights
+            weights = weights / weights.sum()
+            weights_cum = np.append(0, weights.cumsum())
+
+            # Vectorize the computation for q
+            q = np.atleast_2d(q).T
+            h = _get_h(nw, q, method)
+            u = np.maximum((h - 1) / nw, np.minimum(h / nw, weights_cum))
+            v = u * nw - h + 1
+            w = np.diff(v)
+            return (data * w).sum(axis=1)
+
+        if da.shape != self.weights.shape:
+            raise ValueError("da and weights must have the same shape")
+
+        q = np.atleast_1d(np.asarray(q, dtype=np.float64))
+
+        if q.ndim > 1:
+            raise ValueError("q must be a scalar or 1d")
+
+        if np.any((q < 0) | (q > 1)):
+            raise ValueError("q values must be between 0 and 1")
+
+        if dim is None:
+            dim = da.dims
+
+        if utils.is_scalar(dim):
+            dim = [dim]
+
+        dim = cast(Sequence, dim)
+
+        da, w = align(da, self.weights)
+        result = apply_ufunc(
+            _weighted_quantile_1d,
+            da,
+            w,
+            input_core_dims=[dim, dim],
+            output_core_dims=[["quantile"]],
+            output_dtypes=[np.float64],
+            join="override",
+            dask_gufunc_kwargs=dict(output_sizes={"quantile": len(q)}),
+            dask="parallelized",
+            vectorize=True,
+            kwargs={"q": q, "skipna": skipna},
+        )
+
+        result = result.transpose("quantile", ...)
+        result = result.assign_coords(quantile=q).squeeze()
+
+        return result
+
     def _implementation(self, func, dim, **kwargs):
 
         raise NotImplementedError("Use `Dataset.weighted` or `DataArray.weighted`")
@@ -308,6 +501,19 @@ def std(
             self._weighted_std, dim=dim, skipna=skipna, keep_attrs=keep_attrs
         )
 
+    def quantile(
+        self,
+        q: Union[float, Iterable[float]],
+        *,
+        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
+        skipna: Optional[bool] = None,
+        keep_attrs: Optional[bool] = None,
+    ) -> T_Xarray:
+
+        return self._implementation(
+            self._weighted_quantile, q=q, dim=dim, skipna=skipna, keep_attrs=keep_attrs
+        )
+
     def __repr__(self):
         """provide a nice str repr of our Weighted object"""
 
@@ -358,6 +564,8 @@ def _inject_docstring(cls, cls_name):
         cls=cls_name, fcn="std", on_zero="NaN"
     )
 
+    cls.quantile.__doc__ = _WEIGHTED_QUANTILE_DOCSTRING_TEMPLATE.format(cls=cls_name)
+
 
 _inject_docstring(DataArrayWeighted, "DataArray")
 _inject_docstring(DatasetWeighted, "Dataset")