From f36f8f268b0035df091a1dc478d4106b801654de Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Sun, 17 Jun 2018 20:00:05 +0900 Subject: [PATCH 01/25] Inhouse nanops --- xarray/core/duck_array_ops.py | 122 +----- xarray/core/nanops.py | 575 ++++++++++++++++++++++++++++ xarray/tests/test_duck_array_ops.py | 14 +- 3 files changed, 592 insertions(+), 119 deletions(-) create mode 100644 xarray/core/nanops.py diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 065ac165a0d..d4c56472e48 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -16,6 +16,7 @@ from . import dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast from .pycompat import dask_array_type +from . import nanops try: import bottleneck as bn @@ -213,79 +214,6 @@ def _ignore_warnings_if(condition): yield -def _nansum_object(value, axis=None, **kwargs): - """ In house nansum for object array """ - value = fillna(value, 0) - return _dask_or_eager_func('sum')(value, axis=axis, **kwargs) - - -def _nan_minmax_object(func, get_fill_value, value, axis=None, **kwargs): - """ In house nanmin and nanmax for object array """ - fill_value = get_fill_value(value.dtype) - valid_count = count(value, axis=axis) - filled_value = fillna(value, fill_value) - data = _dask_or_eager_func(func)(filled_value, axis=axis, **kwargs) - if not hasattr(data, 'dtype'): # scalar case - data = dtypes.fill_value(value.dtype) if valid_count == 0 else data - return np.array(data, dtype=value.dtype) - return where_method(data, valid_count != 0) - - -def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs): - """ In house nanargmin, nanargmax for object arrays. Always return integer - type """ - fill_value = get_fill_value(value.dtype) - valid_count = count(value, axis=axis) - value = fillna(value, fill_value) - data = _dask_or_eager_func(func)(value, axis=axis, **kwargs) - # dask seems return non-integer type - if isinstance(value, dask_array_type): - data = data.astype(int) - - if (valid_count == 0).any(): - raise ValueError('All-NaN slice encountered') - - return np.array(data, dtype=int) - - -def _nanmean_ddof_object(ddof, value, axis=None, **kwargs): - """ In house nanmean. ddof argument will be used in _nanvar method """ - valid_count = count(value, axis=axis) - value = fillna(value, 0) - # As dtype inference is impossible for object dtype, we assume float - # https://github.com/dask/dask/issues/3162 - dtype = kwargs.pop('dtype', None) - if dtype is None and value.dtype.kind == 'O': - dtype = value.dtype if value.dtype.kind in ['cf'] else float - - data = _dask_or_eager_func('sum')(value, axis=axis, dtype=dtype, **kwargs) - data = data / (valid_count - ddof) - return where_method(data, valid_count != 0) - - -def _nanvar_object(value, axis=None, **kwargs): - ddof = kwargs.pop('ddof', 0) - kwargs_mean = kwargs.copy() - kwargs_mean.pop('keepdims', None) - value_mean = _nanmean_ddof_object(ddof=0, value=value, axis=axis, - keepdims=True, **kwargs_mean) - squared = (value.astype(value_mean.dtype) - value_mean)**2 - return _nanmean_ddof_object(ddof, squared, axis=axis, **kwargs) - - -_nan_object_funcs = { - 'sum': _nansum_object, - 'min': partial(_nan_minmax_object, 'min', dtypes.get_pos_infinity), - 'max': partial(_nan_minmax_object, 'max', dtypes.get_neg_infinity), - 'argmin': partial(_nan_argminmax_object, 'argmin', - dtypes.get_pos_infinity), - 'argmax': partial(_nan_argminmax_object, 'argmax', - dtypes.get_neg_infinity), - 'mean': partial(_nanmean_ddof_object, 0), - 'var': _nanvar_object, -} - - def _create_nan_agg_method(name, numeric_only=False, np_compat=False, no_bottleneck=False, coerce_strings=False): def f(values, axis=None, skipna=None, **kwargs): @@ -296,57 +224,15 @@ def f(values, axis=None, skipna=None, **kwargs): dtype = kwargs.get('dtype', None) values = asarray(values) - # dask requires dtype argument for object dtype - if (values.dtype == 'object' and name in ['sum', ]): - kwargs['dtype'] = values.dtype if dtype is None else dtype - if coerce_strings and values.dtype.kind in 'SU': values = values.astype(object) if skipna or (skipna is None and values.dtype.kind in 'cfO'): - if values.dtype.kind not in ['u', 'i', 'f', 'c']: - func = _nan_object_funcs.get(name, None) - using_numpy_nan_func = True - if func is None or values.dtype.kind not in 'Ob': - raise NotImplementedError( - 'skipna=True not yet implemented for %s with dtype %s' - % (name, values.dtype)) - else: - nanname = 'nan' + name - if (isinstance(axis, tuple) or not values.dtype.isnative or - no_bottleneck or (dtype is not None and - np.dtype(dtype) != values.dtype)): - # bottleneck can't handle multiple axis arguments or - # non-native endianness - if np_compat: - eager_module = npcompat - else: - eager_module = np - else: - kwargs.pop('dtype', None) - eager_module = bn - func = _dask_or_eager_func(nanname, eager_module) - using_numpy_nan_func = (eager_module is np or - eager_module is npcompat) + nanname = 'nan' + name + func = getattr(nanops, nanname) else: func = _dask_or_eager_func(name) - using_numpy_nan_func = False - with _ignore_warnings_if(using_numpy_nan_func): - try: - return func(values, axis=axis, **kwargs) - except AttributeError: - if isinstance(values, dask_array_type): - try: # dask/dask#3133 dask sometimes needs dtype argument - return func(values, axis=axis, dtype=values.dtype, - **kwargs) - except AttributeError: - msg = '%s is not yet implemented on dask arrays' % name - else: - assert using_numpy_nan_func - msg = ('%s is not available with skipna=False with the ' - 'installed version of numpy; upgrade to numpy 1.12 ' - 'or newer to use skipna=True or skipna=None' % name) - raise NotImplementedError(msg) + return func(values, axis=axis, **kwargs) f.numeric_only = numeric_only f.__name__ = name return f diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py new file mode 100644 index 00000000000..684f67c9cb2 --- /dev/null +++ b/xarray/core/nanops.py @@ -0,0 +1,575 @@ +from __future__ import absolute_import, division, print_function + +import contextlib +import inspect +import warnings +import functools + +import numpy as np +import pandas as pd +from pandas.core.nanops import disallow + +from . import dask_array_ops, dtypes, npcompat, nputils +from .nputils import nanfirst, nanlast +from .pycompat import dask_array_type + + +try: + import bottleneck as bn + _USE_BOTTLENECK = True +except ImportError: + # use numpy methods instead + bn = np + _USE_BOTTLENECK = False + + +def _bn_ok_dtype(dt, name): + # This function is taken from pandas.core.nanops + # Bottleneck chokes on datetime64 + if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)): + + # GH 15507 + # bottleneck does not properly upcast during the sum + # so can overflow + + # GH 9422 + # further we also want to preserve NaN when all elements + # are NaN, unlinke bottleneck/numpy which consider this + # to be 0 + if name in ['nansum', 'nanprod']: + return False + + return True + return False + + +class bottleneck_switch(object): + # This function is taken from pandas.core.nanops + + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __call__(self, alt): + bn_name = alt.__name__ + + try: + bn_func = getattr(bn, bn_name) + except (AttributeError, NameError): # pragma: no cover + bn_func = None + + @functools.wraps(alt) + def f(values, axis=None, **kwds): + if len(self.kwargs) > 0: + for k, v in compat.iteritems(self.kwargs): + if k not in kwds: + kwds[k] = v + try: + if values.size == 0 and kwds.get('min_count') is None: + # We are empty, returning NA for our type + # Only applies for the default `min_count` of None + # since that affects how empty arrays are handled. + # TODO(GH-18976) update all the nanops methods to + # correctly handle empty inputs and remove this check. + # It *may* just be `var` + return _na_for_min_count(values, axis) + + if (_USE_BOTTLENECK and not isinstance(value, dask_array_type) + and _bn_ok_dtype(values.dtype, bn_name)): + result = bn_func(values, axis=axis, **kwds) + + # prefer to treat inf/-inf as NA, but must compute the func + # twice :( + if _has_infs(result): + result = alt(values, axis=axis, **kwds) + else: + result = alt(values, axis=axis, **kwds) + except Exception: + result = alt(values, axis=axis, **kwds) + + return result + + return f + + +def _replace_nan(a, val): + """ + If `a` is of inexact type, make a copy of `a`, replace NaNs with + the `val` value, and return the copy together with a boolean mask + marking the locations where NaNs were present. If `a` is not of + inexact type, do nothing and return `a` together with a mask of None. + Note that scalars will end up as array scalars, which is important + for using the result as the value of the out argument in some + operations. + Parameters + ---------- + a : array-like + Input array. + val : float + NaN values are set to val before doing the operation. + Returns + ------- + y : ndarray + If `a` is of inexact type, return a copy of `a` with the NaNs + replaced by the fill value, otherwise return `a`. + mask: {bool, None} + If `a` is of inexact type, return a boolean mask marking locations of + NaNs, otherwise return None. + + This function is taken from + https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py + """ + a = np.array(a, subok=True, copy=True) + + if a.dtype == np.object_: + # object arrays do not support `isnan` (gh-9009), so make a guess + mask = a != a + elif issubclass(a.dtype.type, np.inexact): + mask = np.isnan(a) + else: + mask = None + + if mask is not None: + np.copyto(a, val, where=mask) + + return a, mask + + +def _copyto(a, val, mask): + """ + Replace values in `a` with NaN where `mask` is True. This differs from + copyto in that it will deal with the case where `a` is a numpy scalar. + Parameters + ---------- + a : ndarray or numpy scalar + Array or numpy scalar some of whose values are to be replaced + by val. + val : numpy scalar + Value used a replacement. + mask : ndarray, scalar + Boolean array. Where True the corresponding element of `a` is + replaced by `val`. Broadcasts. + Returns + ------- + res : ndarray, scalar + Array with elements replaced or scalar `val`. + + This function is taken from + https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py + """ + if isinstance(a, np.ndarray): + np.copyto(a, val, where=mask, casting='unsafe') + else: + a = a.dtype.type(val) + return a + + +def _divide_by_count(a, b, out=None): + """ + Compute a/b ignoring invalid results. If `a` is an array the division + is done in place. If `a` is a scalar, then its type is preserved in the + output. If out is None, then then a is used instead so that the + division is in place. Note that this is only called with `a` an inexact + type. + Parameters + ---------- + a : {ndarray, numpy scalar} + Numerator. Expected to be of inexact type but not checked. + b : {ndarray, numpy scalar} + Denominator. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + Returns + ------- + ret : {ndarray, numpy scalar} + The return value is a/b. If `a` was an ndarray the division is done + in place. If `a` is a numpy scalar, the division preserves its type. + + This function is taken from + https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py + """ + with np.errstate(invalid='ignore', divide='ignore'): + if isinstance(a, np.ndarray): + if out is None: + return np.divide(a, b, out=a, casting='unsafe') + else: + return np.divide(a, b, out=out, casting='unsafe') + else: + if out is None: + return a.dtype.type(a / b) + else: + # This is questionable, but currently a numpy scalar can + # be output to a zero dimensional array. + return np.divide(a, b, out=out, casting='unsafe') + + +@bottleneck_switch() +def nanmin(a, axis=None, out=None, keepdims=np._NoValue): + """ + taken from + https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py + """ + if a.dtype.kind == 'O': + return _nan_minmax_object('min', dtypes.get_pos_infinity, a, axis) + kwargs = {} + if keepdims is not np._NoValue: + kwargs['keepdims'] = keepdims + if type(a) is np.ndarray and a.dtype != np.object_: + # Fast, but not safe for subclasses of ndarray, or object arrays, + # which do not implement isnan (gh-9009), or fmin correctly (gh-8975) + res = np.fmin.reduce(a, axis=axis, out=out, **kwargs) + if np.isnan(res).any(): + warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2) + else: + # Slow, but safe for subclasses of ndarray + a, mask = _replace_nan(a, +np.inf) + res = np.amin(a, axis=axis, out=out, **kwargs) + if mask is None: + return res + + # Check for all-NaN axis + mask = np.all(mask, axis=axis, **kwargs) + if np.any(mask): + res = _copyto(res, np.nan, mask) + warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) + return res + + +@bottleneck_switch() +def nanmax(a, axis=None, out=None, keepdims=np._NoValue): + """ + taken from + https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py + """ + if a.dtype.kind == 'O': + return _nan_minmax_object('max', dtypes.get_neg_infinity, a, axis) + kwargs = {} + if keepdims is not np._NoValue: + kwargs['keepdims'] = keepdims + if type(a) is np.ndarray and a.dtype != np.object_: + # Fast, but not safe for subclasses of ndarray, or object arrays, + # which do not implement isnan (gh-9009), or fmax correctly (gh-8975) + res = np.fmax.reduce(a, axis=axis, out=out, **kwargs) + if np.isnan(res).any(): + warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2) + else: + # Slow, but safe for subclasses of ndarray + a, mask = _replace_nan(a, -np.inf) + res = np.amax(a, axis=axis, out=out, **kwargs) + if mask is None: + return res + + # Check for all-NaN axis + mask = np.all(mask, axis=axis, **kwargs) + if np.any(mask): + res = _copyto(res, np.nan, mask) + warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) + return res + + +def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs): + """ In house nanargmin, nanargmax for object arrays. Always return integer + type """ + from .duck_array_ops import isnull, count, fillna + + fill_value = get_fill_value(value.dtype) + valid_count = count(value, axis=axis) + value = fillna(value, fill_value) + data = getattr(np, func)(value, axis=axis, **kwargs) + # dask seems return non-integer type + if isinstance(value, dask_array_type): + data = data.astype(int) + + if (valid_count == 0).any(): + raise ValueError('All-NaN slice encountered') + + return np.array(data, dtype=int) + + +def _nan_minmax_object(func, get_fill_value, value, axis=None, **kwargs): + """ In house nanmin and nanmax for object array """ + from .duck_array_ops import isnull, count, fillna, where_method + + fill_value = get_fill_value(value.dtype) + valid_count = count(value, axis=axis) + filled_value = fillna(value, fill_value) + data = getattr(np, func)(filled_value, axis=axis, **kwargs) + if not hasattr(data, 'dtype'): # scalar case + data = dtypes.fill_value(value.dtype) if valid_count == 0 else data + return np.array(data, dtype=value.dtype) + return where_method(data, valid_count != 0) + + +@bottleneck_switch() +def nanargmin(a, axis=None): + if a.dtype.kind == 'O': + return _nan_argminmax_object('argmin', dtypes.get_pos_infinity, + a, axis=axis) + a, mask = _replace_nan(a, np.inf) + res = np.argmin(a, axis=axis) + if mask is not None: + mask = np.all(mask, axis=axis) + if np.any(mask): + raise ValueError("All-NaN slice encountered") + return res + + +@bottleneck_switch() +def nanargmax(a, axis=None): + """ + taken from + https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py + """ + if a.dtype.kind == 'O': + return _nan_argminmax_object('argmax', dtypes.get_neg_infinity, + a, axis=axis) + a, mask = _replace_nan(a, -np.inf) + res = np.argmax(a, axis=axis) + if mask is not None: + mask = np.all(mask, axis=axis) + if np.any(mask): + raise ValueError("All-NaN slice encountered") + return res + + +@bottleneck_switch() +def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): + a, mask = _replace_nan(a, 0) + return np.sum(a, axis=axis, dtype=dtype, keepdims=keepdims) + + +@bottleneck_switch() +def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): + a, mask = _replace_nan(a, 1) + return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + + +@bottleneck_switch() +def nancumsum(a, axis=None, dtype=None, out=None): + a, mask = _replace_nan(a, 0) + return np.cumsum(a, axis=axis, dtype=dtype, out=out) + + +@bottleneck_switch() +def nancumprod(a, axis=None, dtype=None, out=None): + a, mask = _replace_nan(a, 1) + return np.cumprod(a, axis=axis, dtype=dtype, out=out) + + +@bottleneck_switch() +def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): + arr, mask = _replace_nan(a, 0) + if mask is None: + return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + + if dtype is not None: + dtype = np.dtype(dtype) + if dtype is not None and not issubclass(dtype.type, np.inexact): + raise TypeError("If a is inexact, then dtype must be inexact") + if out is not None and not issubclass(out.dtype.type, np.inexact): + raise TypeError("If a is inexact, then out must be inexact") + + cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims) + tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + avg = _divide_by_count(tot, cnt, out=out) + + isbad = (cnt == 0) + if isbad.any(): + warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2) + # NaN is the only possible bad value, so no further + # action is needed to handle bad results. + return avg + + +@bottleneck_switch() +def _nanmedian1d(arr1d, overwrite_input=False): + """ + Private function for rank 1 arrays. Compute the median ignoring NaNs. + See nanmedian for parameter usage + """ + arr1d, overwrite_input = _remove_nan_1d(arr1d, + overwrite_input=overwrite_input) + if arr1d.size == 0: + return np.nan + + return np.median(arr1d, overwrite_input=overwrite_input) + + +@bottleneck_switch() +def _nanmedian(a, axis=None, out=None, overwrite_input=False): + """ + Private function that doesn't support extended axis or keepdims. + These methods are extended to this function using _ureduce + See nanmedian for parameter usage + """ + if axis is None or a.ndim == 1: + part = a.ravel() + if out is None: + return _nanmedian1d(part, overwrite_input) + else: + out[...] = _nanmedian1d(part, overwrite_input) + return out + else: + # for small medians use sort + indexing which is still faster than + # apply_along_axis + # benchmarked with shuffled (50, 50, x) containing a few NaN + if a.shape[axis] < 600: + return _nanmedian_small(a, axis, out, overwrite_input) + result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) + if out is not None: + out[...] = result + return result + + +def _nanmedian_small(a, axis=None, out=None, overwrite_input=False): + """ + sort + indexing median, faster for small medians along multiple + dimensions due to the high overhead of apply_along_axis + see nanmedian for parameter usage + """ + a = np.ma.masked_array(a, np.isnan(a)) + m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input) + for i in range(np.count_nonzero(m.mask.ravel())): + warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3) + if out is not None: + out[...] = m.filled(np.nan) + return out + return m.filled(np.nan) + + +@bottleneck_switch() +def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue): + a = np.asanyarray(a) + # apply_along_axis in _nanmedian doesn't handle empty arrays well, + # so deal them upfront + if a.size == 0: + return np.nanmean(a, axis, out=out, keepdims=keepdims) + + r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out, + overwrite_input=overwrite_input) + if keepdims and keepdims is not np._NoValue: + return r.reshape(k) + else: + return r + + +@bottleneck_switch() +def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, + interpolation='linear', keepdims=np._NoValue): + a = np.asanyarray(a) + q = np.asanyarray(q) + # apply_along_axis in _nanpercentile doesn't handle empty arrays well, + # so deal them upfront + if a.size == 0: + return np.nanmean(a, axis, out=out, keepdims=keepdims) + + r, k = _ureduce(a, func=_nanpercentile, q=q, axis=axis, out=out, + overwrite_input=overwrite_input, + interpolation=interpolation) + if keepdims and keepdims is not np._NoValue: + return r.reshape(q.shape + k) + else: + return r + + +def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False, + interpolation='linear'): + """ + Private function that doesn't support extended axis or keepdims. + These methods are extended to this function using _ureduce + See nanpercentile for parameter usage + """ + if axis is None or a.ndim == 1: + part = a.ravel() + result = _nanpercentile1d(part, q, overwrite_input, interpolation) + else: + result = np.apply_along_axis(_nanpercentile1d, axis, a, q, + overwrite_input, interpolation) + # apply_along_axis fills in collapsed axis with results. + # Move that axis to the beginning to match percentile's + # convention. + if q.ndim != 0: + result = np.moveaxis(result, axis, 0) + + if out is not None: + out[...] = result + return result + + +def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'): + """ + Private function for rank 1 arrays. Compute percentile ignoring NaNs. + See nanpercentile for parameter usage + """ + arr1d, overwrite_input = _remove_nan_1d(arr1d, + overwrite_input=overwrite_input) + if arr1d.size == 0: + return np.full(q.shape, np.nan)[()] # convert to scalar + + return np.percentile(arr1d, q, overwrite_input=overwrite_input, + interpolation=interpolation) + + +@bottleneck_switch() +def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): + arr, mask = _replace_nan(a, 0) + if mask is None: + return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) + + if dtype is not None: + dtype = np.dtype(dtype) + if dtype is not None and not issubclass(dtype.type, np.inexact): + raise TypeError("If a is inexact, then dtype must be inexact") + if out is not None and not issubclass(out.dtype.type, np.inexact): + raise TypeError("If a is inexact, then out must be inexact") + + # Compute mean + if type(arr) is np.matrix: + _keepdims = np._NoValue + else: + _keepdims = True + # we need to special case matrix for reverse compatibility + # in order for this to work, these sums need to be called with + # keepdims=True, however matrix now raises an error in this case, but + # the reason that it drops the keepdims kwarg is to force keepdims=True + # so this used to work by serendipity. + cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims) + avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims) + avg = _divide_by_count(avg, cnt) + + # Compute squared deviation from mean. + np.subtract(arr, avg, out=arr, casting='unsafe') + arr = _copyto(arr, 0, mask) + if issubclass(arr.dtype.type, np.complexfloating): + sqr = np.multiply(arr, arr.conj(), out=arr).real + else: + sqr = np.multiply(arr, arr, out=arr) + + # Compute variance. + var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + if var.ndim < cnt.ndim: + # Subclasses of ndarray may ignore keepdims, so check here. + cnt = cnt.squeeze(axis) + dof = cnt - ddof + var = _divide_by_count(var, dof) + + isbad = (dof <= 0) + if np.any(isbad): + warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, stacklevel=2) + # NaN, inf, or negative numbers are all possible bad + # values, so explicitly replace them with NaN. + var = _copyto(var, np.nan, isbad) + return var + + +@bottleneck_switch() +def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): + var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) + if isinstance(var, np.ndarray): + std = np.sqrt(var, out=var) + else: + std = var.dtype.type(np.sqrt(var)) + return std diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 3f4adee6713..8938d4b8c6c 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -101,7 +101,10 @@ def test_concatenate_type_promotion(self): assert_array_equal(result, np.array([1, 'b'], dtype=object)) def test_all_nan_arrays(self): - assert np.isnan(mean([np.nan, np.nan])) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'All-NaN slice') + warnings.filterwarnings('ignore', 'Mean of empty slice') + assert np.isnan(mean([np.nan, np.nan])) def test_cumsum_1d(): @@ -260,6 +263,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): # TODO: remove these after resolving # https://github.com/dask/dask/issues/3245 with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Mean of empty slice') warnings.filterwarnings('ignore', 'All-NaN slice') warnings.filterwarnings('ignore', 'invalid value encountered in') @@ -274,6 +278,8 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): expected = getattr(np, func)(da.values, axis=axis) actual = getattr(da, func)(skipna=skipna, dim=aggdim) + if dask: + isinstance(da.data, dask_array_type) assert np.allclose(actual.values, np.array(expected), rtol=1.0e-4, equal_nan=True) except (TypeError, AttributeError, ZeroDivisionError): @@ -289,6 +295,8 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): assert_allclose(actual, expected, rtol=rtol) # also check ddof!=0 case actual = getattr(da, func)(skipna=skipna, dim=aggdim, ddof=5) + if dask: + isinstance(da.data, dask_array_type) expected = series_reduce(da, func, skipna=skipna, dim=aggdim, ddof=5) assert_allclose(actual, expected, rtol=rtol) @@ -299,11 +307,15 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): # make sure the dtype argument if func not in ['max', 'min']: actual = getattr(da, func)(skipna=skipna, dim=aggdim, dtype=float) + if dask: + isinstance(da.data, dask_array_type) assert actual.dtype == float # without nan da = construct_dataarray(dim_num, dtype, contains_nan=False, dask=dask) actual = getattr(da, func)(skipna=skipna) + if dask: + isinstance(da.data, dask_array_type) expected = getattr(np, 'nan{}'.format(func))(da.values) if actual.dtype == object: assert actual.values == np.array(expected) From 76218b2540939a3292d4f27271402da6b9b876b7 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Sun, 17 Jun 2018 22:13:10 +0900 Subject: [PATCH 02/25] Cleanup nanops --- xarray/core/dtypes.py | 3 + xarray/core/duck_array_ops.py | 38 ++- xarray/core/nanops.py | 484 +++++----------------------- xarray/tests/test_duck_array_ops.py | 35 +- 4 files changed, 146 insertions(+), 414 deletions(-) diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py index 7326b936e2e..7ad44472f06 100644 --- a/xarray/core/dtypes.py +++ b/xarray/core/dtypes.py @@ -98,6 +98,9 @@ def maybe_promote(dtype): return np.dtype(dtype), fill_value +NAT_TYPES = (np.datetime64('NaT'), np.timedelta64('NaT')) + + def get_fill_value(dtype): """Return an appropriate fill value for this dtype. diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index d4c56472e48..cf78ca66b13 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -176,7 +176,7 @@ def array_notnull_equiv(arr1, arr2): def count(data, axis=None): """Count the number of non-NA in this array along the given axis or axes """ - return sum(~isnull(data), axis=axis) + return np.sum(~isnull(data), axis=axis) def where(condition, x, y): @@ -227,12 +227,34 @@ def f(values, axis=None, skipna=None, **kwargs): if coerce_strings and values.dtype.kind in 'SU': values = values.astype(object) + np_module = npcompat if np_compat else np + func = None if skipna or (skipna is None and values.dtype.kind in 'cfO'): nanname = 'nan' + name - func = getattr(nanops, nanname) - else: - func = _dask_or_eager_func(name) - return func(values, axis=axis, **kwargs) + func = getattr( + nanops, nanname, _dask_or_eager_func( + nanname, eager_module=np_module)) + if func is None: + if dtype is None: + func = _dask_or_eager_func(name) + else: + func = getattr(np, name) + + try: + return func(values, axis=axis, **kwargs) + except AttributeError: + if isinstance(values, dask_array_type): + try: # dask/dask#3133 dask sometimes needs dtype argument + return func(values, axis=axis, dtype=values.dtype, + **kwargs) + except AttributeError: + msg = '%s is not yet implemented on dask arrays' % name + else: + msg = ('%s is not available with skipna=False with the ' + 'installed version of numpy; upgrade to numpy 1.12 ' + 'or newer to use skipna=True or skipna=None' % name) + raise NotImplementedError(msg) + f.numeric_only = numeric_only f.__name__ = name return f @@ -247,11 +269,11 @@ def f(values, axis=None, skipna=None, **kwargs): std = _create_nan_agg_method('std', numeric_only=True) var = _create_nan_agg_method('var', numeric_only=True) median = _create_nan_agg_method('median', numeric_only=True) -prod = _create_nan_agg_method('prod', numeric_only=True, no_bottleneck=True) +prod = _create_nan_agg_method('prod', numeric_only=True) cumprod_1d = _create_nan_agg_method( - 'cumprod', numeric_only=True, np_compat=True, no_bottleneck=True) + 'cumprod', numeric_only=True, np_compat=True) cumsum_1d = _create_nan_agg_method( - 'cumsum', numeric_only=True, np_compat=True, no_bottleneck=True) + 'cumsum', numeric_only=True, np_compat=True) def _nd_cum_func(cum_func, array, axis, **kwargs): diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 684f67c9cb2..cc4cc0d0c62 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -1,16 +1,10 @@ from __future__ import absolute_import, division, print_function -import contextlib -import inspect -import warnings import functools import numpy as np -import pandas as pd -from pandas.core.nanops import disallow -from . import dask_array_ops, dtypes, npcompat, nputils -from .nputils import nanfirst, nanlast +from . import dtypes from .pycompat import dask_array_type @@ -22,33 +16,16 @@ bn = np _USE_BOTTLENECK = False - -def _bn_ok_dtype(dt, name): - # This function is taken from pandas.core.nanops - # Bottleneck chokes on datetime64 - if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)): - - # GH 15507 - # bottleneck does not properly upcast during the sum - # so can overflow - - # GH 9422 - # further we also want to preserve NaN when all elements - # are NaN, unlinke bottleneck/numpy which consider this - # to be 0 - if name in ['nansum', 'nanprod']: - return False - - return True - return False +try: + import dask.array as dask_array + from . import dask_array_compat +except ImportError: + dask_array = None + dask_array_compat = None class bottleneck_switch(object): - # This function is taken from pandas.core.nanops - - def __init__(self, **kwargs): - self.kwargs = kwargs - + """ xarray-version of pandas.core.nanops.bottleneck_switch """ def __call__(self, alt): bn_name = alt.__name__ @@ -59,31 +36,20 @@ def __call__(self, alt): @functools.wraps(alt) def f(values, axis=None, **kwds): - if len(self.kwargs) > 0: - for k, v in compat.iteritems(self.kwargs): - if k not in kwds: - kwds[k] = v - try: - if values.size == 0 and kwds.get('min_count') is None: - # We are empty, returning NA for our type - # Only applies for the default `min_count` of None - # since that affects how empty arrays are handled. - # TODO(GH-18976) update all the nanops methods to - # correctly handle empty inputs and remove this check. - # It *may* just be `var` - return _na_for_min_count(values, axis) - - if (_USE_BOTTLENECK and not isinstance(value, dask_array_type) - and _bn_ok_dtype(values.dtype, bn_name)): - result = bn_func(values, axis=axis, **kwds) - - # prefer to treat inf/-inf as NA, but must compute the func - # twice :( - if _has_infs(result): - result = alt(values, axis=axis, **kwds) - else: - result = alt(values, axis=axis, **kwds) - except Exception: + dtype = kwds.get('dtype', None) + min_count = kwds.get('min_count', 1) + + if (not isinstance(values, dask_array_type) and _USE_BOTTLENECK + and not isinstance(axis, tuple) + and values.dtype.kind in 'uifc' + and values.dtype.isnative + and (dtype is None or np.dtype(dtype) == values.dtype) + and min_count != 1): + # bottleneck does not take care dtype, min_count + kwds.pop('dtype', None) + kwds.pop('min_count', 1) + result = bn_func(values, axis=axis, **kwds) + else: result = alt(values, axis=axis, **kwds) return result @@ -118,8 +84,6 @@ def _replace_nan(a, val): This function is taken from https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py """ - a = np.array(a, subok=True, copy=True) - if a.dtype == np.object_: # object arrays do not support `isnan` (gh-9009), so make a guess mask = a != a @@ -129,149 +93,57 @@ def _replace_nan(a, val): mask = None if mask is not None: - np.copyto(a, val, where=mask) + if isinstance(a, dask_array_type): + return dask_array.where(mask, val, a), mask + return np.where(mask, val, a), mask return a, mask -def _copyto(a, val, mask): +def _maybe_null_out(result, axis, mask, min_count=1): """ - Replace values in `a` with NaN where `mask` is True. This differs from - copyto in that it will deal with the case where `a` is a numpy scalar. - Parameters - ---------- - a : ndarray or numpy scalar - Array or numpy scalar some of whose values are to be replaced - by val. - val : numpy scalar - Value used a replacement. - mask : ndarray, scalar - Boolean array. Where True the corresponding element of `a` is - replaced by `val`. Broadcasts. - Returns - ------- - res : ndarray, scalar - Array with elements replaced or scalar `val`. - - This function is taken from - https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py + xarray version of pandas.core.nanops._maybe_null_out """ - if isinstance(a, np.ndarray): - np.copyto(a, val, where=mask, casting='unsafe') - else: - a = a.dtype.type(val) - return a + if axis is not None and getattr(result, 'ndim', False): + null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 + if np.any(null_mask): + dtype, fill_value = dtypes.maybe_promote(result.dtype) + result = result.astype(dtype) + result[null_mask] = fill_value + elif (not isinstance(result, dask_array_type) and + result not in dtypes.NAT_TYPES): + null_mask = mask.size - mask.sum() + if null_mask < min_count: + result = np.nan -def _divide_by_count(a, b, out=None): - """ - Compute a/b ignoring invalid results. If `a` is an array the division - is done in place. If `a` is a scalar, then its type is preserved in the - output. If out is None, then then a is used instead so that the - division is in place. Note that this is only called with `a` an inexact - type. - Parameters - ---------- - a : {ndarray, numpy scalar} - Numerator. Expected to be of inexact type but not checked. - b : {ndarray, numpy scalar} - Denominator. - out : ndarray, optional - Alternate output array in which to place the result. The default - is ``None``; if provided, it must have the same shape as the - expected output, but the type will be cast if necessary. - Returns - ------- - ret : {ndarray, numpy scalar} - The return value is a/b. If `a` was an ndarray the division is done - in place. If `a` is a numpy scalar, the division preserves its type. - - This function is taken from - https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py - """ - with np.errstate(invalid='ignore', divide='ignore'): - if isinstance(a, np.ndarray): - if out is None: - return np.divide(a, b, out=a, casting='unsafe') - else: - return np.divide(a, b, out=out, casting='unsafe') - else: - if out is None: - return a.dtype.type(a / b) - else: - # This is questionable, but currently a numpy scalar can - # be output to a zero dimensional array. - return np.divide(a, b, out=out, casting='unsafe') + return result @bottleneck_switch() def nanmin(a, axis=None, out=None, keepdims=np._NoValue): - """ - taken from - https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py - """ if a.dtype.kind == 'O': return _nan_minmax_object('min', dtypes.get_pos_infinity, a, axis) - kwargs = {} - if keepdims is not np._NoValue: - kwargs['keepdims'] = keepdims - if type(a) is np.ndarray and a.dtype != np.object_: - # Fast, but not safe for subclasses of ndarray, or object arrays, - # which do not implement isnan (gh-9009), or fmin correctly (gh-8975) - res = np.fmin.reduce(a, axis=axis, out=out, **kwargs) - if np.isnan(res).any(): - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2) - else: - # Slow, but safe for subclasses of ndarray - a, mask = _replace_nan(a, +np.inf) - res = np.amin(a, axis=axis, out=out, **kwargs) - if mask is None: - return res - - # Check for all-NaN axis - mask = np.all(mask, axis=axis, **kwargs) - if np.any(mask): - res = _copyto(res, np.nan, mask) - warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) - return res + + if isinstance(a, dask_array_type): + return dask_array.nanmin(a, axis=axis) + return np.nanmin(a, axis=axis) @bottleneck_switch() def nanmax(a, axis=None, out=None, keepdims=np._NoValue): - """ - taken from - https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py - """ if a.dtype.kind == 'O': return _nan_minmax_object('max', dtypes.get_neg_infinity, a, axis) - kwargs = {} - if keepdims is not np._NoValue: - kwargs['keepdims'] = keepdims - if type(a) is np.ndarray and a.dtype != np.object_: - # Fast, but not safe for subclasses of ndarray, or object arrays, - # which do not implement isnan (gh-9009), or fmax correctly (gh-8975) - res = np.fmax.reduce(a, axis=axis, out=out, **kwargs) - if np.isnan(res).any(): - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2) - else: - # Slow, but safe for subclasses of ndarray - a, mask = _replace_nan(a, -np.inf) - res = np.amax(a, axis=axis, out=out, **kwargs) - if mask is None: - return res - - # Check for all-NaN axis - mask = np.all(mask, axis=axis, **kwargs) - if np.any(mask): - res = _copyto(res, np.nan, mask) - warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) - return res + + if isinstance(a, dask_array_type): + return dask_array.nanmax(a, axis=axis) + return np.nanmax(a, axis=axis) def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs): """ In house nanargmin, nanargmax for object arrays. Always return integer type """ - from .duck_array_ops import isnull, count, fillna + from .duck_array_ops import count, fillna fill_value = get_fill_value(value.dtype) valid_count = count(value, axis=axis) @@ -289,7 +161,7 @@ def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs): def _nan_minmax_object(func, get_fill_value, value, axis=None, **kwargs): """ In house nanmin and nanmax for object array """ - from .duck_array_ops import isnull, count, fillna, where_method + from .duck_array_ops import count, fillna, where_method fill_value = get_fill_value(value.dtype) valid_count = count(value, axis=axis) @@ -334,242 +206,50 @@ def nanargmax(a, axis=None): @bottleneck_switch() -def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): +def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, + min_count=None): a, mask = _replace_nan(a, 0) - return np.sum(a, axis=axis, dtype=dtype, keepdims=keepdims) - - -@bottleneck_switch() -def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): - a, mask = _replace_nan(a, 1) - return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) - - -@bottleneck_switch() -def nancumsum(a, axis=None, dtype=None, out=None): - a, mask = _replace_nan(a, 0) - return np.cumsum(a, axis=axis, dtype=dtype, out=out) - - -@bottleneck_switch() -def nancumprod(a, axis=None, dtype=None, out=None): - a, mask = _replace_nan(a, 1) - return np.cumprod(a, axis=axis, dtype=dtype, out=out) - - -@bottleneck_switch() -def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): - arr, mask = _replace_nan(a, 0) - if mask is None: - return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) - - if dtype is not None: - dtype = np.dtype(dtype) - if dtype is not None and not issubclass(dtype.type, np.inexact): - raise TypeError("If a is inexact, then dtype must be inexact") - if out is not None and not issubclass(out.dtype.type, np.inexact): - raise TypeError("If a is inexact, then out must be inexact") - - cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims) - tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) - avg = _divide_by_count(tot, cnt, out=out) - - isbad = (cnt == 0) - if isbad.any(): - warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2) - # NaN is the only possible bad value, so no further - # action is needed to handle bad results. - return avg - - -@bottleneck_switch() -def _nanmedian1d(arr1d, overwrite_input=False): - """ - Private function for rank 1 arrays. Compute the median ignoring NaNs. - See nanmedian for parameter usage - """ - arr1d, overwrite_input = _remove_nan_1d(arr1d, - overwrite_input=overwrite_input) - if arr1d.size == 0: - return np.nan - - return np.median(arr1d, overwrite_input=overwrite_input) - - -@bottleneck_switch() -def _nanmedian(a, axis=None, out=None, overwrite_input=False): - """ - Private function that doesn't support extended axis or keepdims. - These methods are extended to this function using _ureduce - See nanmedian for parameter usage - """ - if axis is None or a.ndim == 1: - part = a.ravel() - if out is None: - return _nanmedian1d(part, overwrite_input) - else: - out[...] = _nanmedian1d(part, overwrite_input) - return out + result = np.sum(a, axis=axis, dtype=dtype, keepdims=keepdims) + if min_count is not None: + return _maybe_null_out(result, axis, mask, min_count) else: - # for small medians use sort + indexing which is still faster than - # apply_along_axis - # benchmarked with shuffled (50, 50, x) containing a few NaN - if a.shape[axis] < 600: - return _nanmedian_small(a, axis, out, overwrite_input) - result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) - if out is not None: - out[...] = result return result -def _nanmedian_small(a, axis=None, out=None, overwrite_input=False): - """ - sort + indexing median, faster for small medians along multiple - dimensions due to the high overhead of apply_along_axis - see nanmedian for parameter usage - """ - a = np.ma.masked_array(a, np.isnan(a)) - m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input) - for i in range(np.count_nonzero(m.mask.ravel())): - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3) - if out is not None: - out[...] = m.filled(np.nan) - return out - return m.filled(np.nan) - - -@bottleneck_switch() -def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue): - a = np.asanyarray(a) - # apply_along_axis in _nanmedian doesn't handle empty arrays well, - # so deal them upfront - if a.size == 0: - return np.nanmean(a, axis, out=out, keepdims=keepdims) - - r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out, - overwrite_input=overwrite_input) - if keepdims and keepdims is not np._NoValue: - return r.reshape(k) - else: - return r - - -@bottleneck_switch() -def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear', keepdims=np._NoValue): - a = np.asanyarray(a) - q = np.asanyarray(q) - # apply_along_axis in _nanpercentile doesn't handle empty arrays well, - # so deal them upfront - if a.size == 0: - return np.nanmean(a, axis, out=out, keepdims=keepdims) - - r, k = _ureduce(a, func=_nanpercentile, q=q, axis=axis, out=out, - overwrite_input=overwrite_input, - interpolation=interpolation) - if keepdims and keepdims is not np._NoValue: - return r.reshape(q.shape + k) - else: - return r - - -def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear'): - """ - Private function that doesn't support extended axis or keepdims. - These methods are extended to this function using _ureduce - See nanpercentile for parameter usage - """ - if axis is None or a.ndim == 1: - part = a.ravel() - result = _nanpercentile1d(part, q, overwrite_input, interpolation) - else: - result = np.apply_along_axis(_nanpercentile1d, axis, a, q, - overwrite_input, interpolation) - # apply_along_axis fills in collapsed axis with results. - # Move that axis to the beginning to match percentile's - # convention. - if q.ndim != 0: - result = np.moveaxis(result, axis, 0) - - if out is not None: - out[...] = result - return result - - -def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'): - """ - Private function for rank 1 arrays. Compute percentile ignoring NaNs. - See nanpercentile for parameter usage - """ - arr1d, overwrite_input = _remove_nan_1d(arr1d, - overwrite_input=overwrite_input) - if arr1d.size == 0: - return np.full(q.shape, np.nan)[()] # convert to scalar +def _nanmean_ddof_object(ddof, value, axis=None, **kwargs): + """ In house nanmean. ddof argument will be used in _nanvar method """ + from .duck_array_ops import (count, fillna, _dask_or_eager_func, + where_method) - return np.percentile(arr1d, q, overwrite_input=overwrite_input, - interpolation=interpolation) + valid_count = count(value, axis=axis) + value = fillna(value, 0) + # As dtype inference is impossible for object dtype, we assume float + # https://github.com/dask/dask/issues/3162 + dtype = kwargs.pop('dtype', None) + if dtype is None and value.dtype.kind == 'O': + dtype = value.dtype if value.dtype.kind in ['cf'] else float + + data = _dask_or_eager_func('sum')(value, axis=axis, dtype=dtype, **kwargs) + data = data / (valid_count - ddof) + return where_method(data, valid_count != 0) @bottleneck_switch() -def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): - arr, mask = _replace_nan(a, 0) - if mask is None: - return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) - - if dtype is not None: - dtype = np.dtype(dtype) - if dtype is not None and not issubclass(dtype.type, np.inexact): - raise TypeError("If a is inexact, then dtype must be inexact") - if out is not None and not issubclass(out.dtype.type, np.inexact): - raise TypeError("If a is inexact, then out must be inexact") - - # Compute mean - if type(arr) is np.matrix: - _keepdims = np._NoValue - else: - _keepdims = True - # we need to special case matrix for reverse compatibility - # in order for this to work, these sums need to be called with - # keepdims=True, however matrix now raises an error in this case, but - # the reason that it drops the keepdims kwarg is to force keepdims=True - # so this used to work by serendipity. - cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims) - avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims) - avg = _divide_by_count(avg, cnt) - - # Compute squared deviation from mean. - np.subtract(arr, avg, out=arr, casting='unsafe') - arr = _copyto(arr, 0, mask) - if issubclass(arr.dtype.type, np.complexfloating): - sqr = np.multiply(arr, arr.conj(), out=arr).real - else: - sqr = np.multiply(arr, arr, out=arr) +def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): + if a.dtype.kind == 'O': + return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype) - # Compute variance. - var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) - if var.ndim < cnt.ndim: - # Subclasses of ndarray may ignore keepdims, so check here. - cnt = cnt.squeeze(axis) - dof = cnt - ddof - var = _divide_by_count(var, dof) + if isinstance(a, dask_array_type): + return dask_array.nanmean(a, axis=axis, dtype=dtype) - isbad = (dof <= 0) - if np.any(isbad): - warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, stacklevel=2) - # NaN, inf, or negative numbers are all possible bad - # values, so explicitly replace them with NaN. - var = _copyto(var, np.nan, isbad) - return var + return np.nanmean(a, axis=axis, dtype=dtype) -@bottleneck_switch() -def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): - var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) - if isinstance(var, np.ndarray): - std = np.sqrt(var, out=var) +def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, + min_count=None): + a, mask = _replace_nan(a, 1) + result = np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + if min_count is not None: + return _maybe_null_out(result, axis, mask, min_count) else: - std = var.dtype.type(np.sqrt(var)) - return std + return result diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 8938d4b8c6c..4a5300419e9 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -3,6 +3,7 @@ from distutils.version import LooseVersion import numpy as np +import pandas as pd import pytest from numpy import array, nan import warnings @@ -238,6 +239,11 @@ def series_reduce(da, func, dim, **kwargs): return concat(da1, dim=d) +def assert_dask_array(da, dask): + if dask and da.ndim > 0: + assert isinstance(da.data, dask_array_type) + + @pytest.mark.parametrize('dim_num', [1, 2]) @pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_]) @pytest.mark.parametrize('dask', [False, True]) @@ -278,8 +284,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): expected = getattr(np, func)(da.values, axis=axis) actual = getattr(da, func)(skipna=skipna, dim=aggdim) - if dask: - isinstance(da.data, dask_array_type) + assert_dask_array(actual, dask) assert np.allclose(actual.values, np.array(expected), rtol=1.0e-4, equal_nan=True) except (TypeError, AttributeError, ZeroDivisionError): @@ -307,8 +312,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): # make sure the dtype argument if func not in ['max', 'min']: actual = getattr(da, func)(skipna=skipna, dim=aggdim, dtype=float) - if dask: - isinstance(da.data, dask_array_type) + assert_dask_array(actual, dask) assert actual.dtype == float # without nan @@ -402,3 +406,26 @@ def test_dask_rolling(axis, window, center): with pytest.raises(ValueError): rolling_window(dx, axis=axis, window=100, center=center, fill_value=np.nan) + + +@pytest.mark.parametrize('dim_num', [1, 2]) +@pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_]) +@pytest.mark.parametrize('dask', [False, True]) +@pytest.mark.parametrize('func', ['sum', 'prod']) +@pytest.mark.parametrize('aggdim', [None, 'x']) +def test_min_count(dim_num, dtype, dask, func, aggdim): + if dask and not has_dask: + pytest.skip('requires dask') + + da = construct_dataarray(dim_num, dtype, contains_nan=True, dask=dask) + min_count = 3 + + actual = getattr(da, func)(dim=aggdim, skipna=True, min_count=min_count) + + if LooseVersion(pd.__version__) >= LooseVersion('0.22.0'): + # min_count has pandas > 0.22 + expected = series_reduce(da, func, skipna=True, dim=aggdim, + min_count=min_count) + assert_allclose(actual, expected) + + assert_dask_array(actual, dask) From 943e2b111c4b0cb2e5edf67c54818510365aa3fe Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Mon, 18 Jun 2018 21:23:30 +0900 Subject: [PATCH 03/25] remove NAT_TYPES --- xarray/core/dtypes.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py index 7ad44472f06..7326b936e2e 100644 --- a/xarray/core/dtypes.py +++ b/xarray/core/dtypes.py @@ -98,9 +98,6 @@ def maybe_promote(dtype): return np.dtype(dtype), fill_value -NAT_TYPES = (np.datetime64('NaT'), np.timedelta64('NaT')) - - def get_fill_value(dtype): """Return an appropriate fill value for this dtype. From 84fc69ec807349338a953fbb17f6ac7213c0f8f0 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Mon, 18 Jun 2018 21:37:24 +0900 Subject: [PATCH 04/25] flake8. --- xarray/core/nanops.py | 12 ++++++------ xarray/tests/test_variable.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index cc4cc0d0c62..080a7a39f68 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -39,12 +39,12 @@ def f(values, axis=None, **kwds): dtype = kwds.get('dtype', None) min_count = kwds.get('min_count', 1) - if (not isinstance(values, dask_array_type) and _USE_BOTTLENECK - and not isinstance(axis, tuple) - and values.dtype.kind in 'uifc' - and values.dtype.isnative - and (dtype is None or np.dtype(dtype) == values.dtype) - and min_count != 1): + if (not isinstance(values, dask_array_type) and _USE_BOTTLENECK and + not isinstance(axis, tuple) and + values.dtype.kind in 'uifc' and + values.dtype.isnative and + (dtype is None or np.dtype(dtype) == values.dtype) and + min_count != 1): # bottleneck does not take care dtype, min_count kwds.pop('dtype', None) kwds.pop('min_count', 1) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c486a394ae6..9800273cb60 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1508,8 +1508,8 @@ def test_reduce_funcs(self): assert_identical(v.all(dim='x'), Variable([], False)) v = Variable('t', pd.date_range('2000-01-01', periods=3)) - with pytest.raises(NotImplementedError): - v.argmax(skipna=True) + v.argmax(skipna=True) + assert_identical( v.max(), Variable([], pd.Timestamp('2000-01-03'))) From 11d735fdc0b7824b199a3aad1c710d035dc85f12 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Mon, 18 Jun 2018 21:40:00 +0900 Subject: [PATCH 05/25] another flake8 --- xarray/tests/test_variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 9800273cb60..2efa940510a 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1509,7 +1509,7 @@ def test_reduce_funcs(self): v = Variable('t', pd.date_range('2000-01-01', periods=3)) v.argmax(skipna=True) - + assert_identical( v.max(), Variable([], pd.Timestamp('2000-01-03'))) From 7a079f603e467ef3d20d24551597e58a7648b685 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Tue, 19 Jun 2018 07:07:02 +0900 Subject: [PATCH 06/25] recover nat types --- xarray/core/dtypes.py | 3 +++ xarray/core/nanops.py | 1 + 2 files changed, 4 insertions(+) diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py index 7326b936e2e..7ad44472f06 100644 --- a/xarray/core/dtypes.py +++ b/xarray/core/dtypes.py @@ -98,6 +98,9 @@ def maybe_promote(dtype): return np.dtype(dtype), fill_value +NAT_TYPES = (np.datetime64('NaT'), np.timedelta64('NaT')) + + def get_fill_value(dtype): """Return an appropriate fill value for this dtype. diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 080a7a39f68..bb84604e36d 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -83,6 +83,7 @@ def _replace_nan(a, val): This function is taken from https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py + but slightly modified to take care of dask.array """ if a.dtype == np.object_: # object arrays do not support `isnan` (gh-9009), so make a guess From 441be59d7ef2331a75bcb06efa276b09bf20bb8a Mon Sep 17 00:00:00 2001 From: keisukefujii Date: Tue, 19 Jun 2018 10:34:33 +0900 Subject: [PATCH 07/25] remove keep_dims option from nanops (to make them compatible with numpy==1.11). --- xarray/core/nanops.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index bb84604e36d..78b8822e4ea 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -50,6 +50,7 @@ def f(values, axis=None, **kwds): kwds.pop('min_count', 1) result = bn_func(values, axis=axis, **kwds) else: + print(kwds) result = alt(values, axis=axis, **kwds) return result @@ -122,7 +123,7 @@ def _maybe_null_out(result, axis, mask, min_count=1): @bottleneck_switch() -def nanmin(a, axis=None, out=None, keepdims=np._NoValue): +def nanmin(a, axis=None, out=None): if a.dtype.kind == 'O': return _nan_minmax_object('min', dtypes.get_pos_infinity, a, axis) @@ -132,7 +133,7 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): @bottleneck_switch() -def nanmax(a, axis=None, out=None, keepdims=np._NoValue): +def nanmax(a, axis=None, out=None): if a.dtype.kind == 'O': return _nan_minmax_object('max', dtypes.get_neg_infinity, a, axis) @@ -207,10 +208,9 @@ def nanargmax(a, axis=None): @bottleneck_switch() -def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, - min_count=None): +def nansum(a, axis=None, dtype=None, out=None, min_count=None): a, mask = _replace_nan(a, 0) - result = np.sum(a, axis=axis, dtype=dtype, keepdims=keepdims) + result = np.sum(a, axis=axis, dtype=dtype) if min_count is not None: return _maybe_null_out(result, axis, mask, min_count) else: @@ -236,7 +236,7 @@ def _nanmean_ddof_object(ddof, value, axis=None, **kwargs): @bottleneck_switch() -def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): +def nanmean(a, axis=None, dtype=None, out=None): if a.dtype.kind == 'O': return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype) @@ -246,10 +246,9 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): return np.nanmean(a, axis=axis, dtype=dtype) -def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, - min_count=None): +def nanprod(a, axis=None, dtype=None, out=None, min_count=None): a, mask = _replace_nan(a, 1) - result = np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + result = np.prod(a, axis=axis, dtype=dtype, out=out) if min_count is not None: return _maybe_null_out(result, axis, mask, min_count) else: From f95054b902915321bd94674e9175a5d26e063340 Mon Sep 17 00:00:00 2001 From: keisukefujii Date: Tue, 19 Jun 2018 10:45:37 +0900 Subject: [PATCH 08/25] Test aggregation over multiple dimensions --- xarray/tests/test_duck_array_ops.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 4a5300419e9..cc4d61f0fff 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -248,6 +248,7 @@ def assert_dask_array(da, dask): @pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_]) @pytest.mark.parametrize('dask', [False, True]) @pytest.mark.parametrize('func', ['sum', 'min', 'max', 'mean', 'var']) +# TODO test cumsum, cumprod @pytest.mark.parametrize('skipna', [False, True]) @pytest.mark.parametrize('aggdim', [None, 'x']) def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): @@ -429,3 +430,16 @@ def test_min_count(dim_num, dtype, dask, func, aggdim): assert_allclose(actual, expected) assert_dask_array(actual, dask) + + +@pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_]) +@pytest.mark.parametrize('dask', [False, True]) +@pytest.mark.parametrize('func', ['sum', 'prod']) +def test_multiple_dims(dtype, dask, func): + if dask and not has_dask: + pytest.skip('requires dask') + da = construct_dataarray(3, dtype, contains_nan=True, dask=dask) + + actual = getattr(da, func)(('x', 'y')) + expected = getattr(getattr(da, func)('x'), func)('y') + assert_allclose(actual, expected) From 9211b64f24f3d66193d37b455159bb3f445b3d6d Mon Sep 17 00:00:00 2001 From: keisukefujii Date: Tue, 19 Jun 2018 15:28:07 +0900 Subject: [PATCH 09/25] Remove print. --- xarray/core/nanops.py | 1 - xarray/tests/test_variable.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 78b8822e4ea..728a7b369ab 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -50,7 +50,6 @@ def f(values, axis=None, **kwds): kwds.pop('min_count', 1) result = bn_func(values, axis=axis, **kwds) else: - print(kwds) result = alt(values, axis=axis, **kwds) return result diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 2efa940510a..2306240857c 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1508,7 +1508,7 @@ def test_reduce_funcs(self): assert_identical(v.all(dim='x'), Variable([], False)) v = Variable('t', pd.date_range('2000-01-01', periods=3)) - v.argmax(skipna=True) + assert v.argmax(skipna=True) == 2 assert_identical( v.max(), Variable([], pd.Timestamp('2000-01-03'))) From 491ce2fcd35c20efdc6663349fe66d763c56495c Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Wed, 20 Jun 2018 19:17:56 +0900 Subject: [PATCH 10/25] Docs. More cleanup. --- doc/whats-new.rst | 5 ++ xarray/core/common.py | 41 ++++++++-------- xarray/core/duck_array_ops.py | 34 ++++++++----- xarray/core/nanops.py | 4 +- xarray/core/ops.py | 14 +++++- xarray/tests/test_duck_array_ops.py | 75 +++++++++++++++++++++++++++++ 6 files changed, 136 insertions(+), 37 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5871b8bb0a3..4a48c4e9972 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,11 @@ Documentation Enhancements ~~~~~~~~~~~~ +- min_count option is newly supported in :py:meth:`~xarray.DataArray.sum` and + :py:meth:`~xarray.Dataset.mean`. + (:issue:`2230`) + By `Keisuke Fujii `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index d69c60eed56..a7a13ab3871 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -2,6 +2,7 @@ import warnings from distutils.version import LooseVersion +from textwrap import dedent import numpy as np import pandas as pd @@ -27,20 +28,20 @@ def wrapped_func(self, dim=None, axis=None, keep_attrs=False, allow_lazy=True, **kwargs) return wrapped_func - _reduce_extra_args_docstring = \ - """dim : str or sequence of str, optional + _reduce_extra_args_docstring = dedent("""\ + dim : str or sequence of str, optional Dimension(s) over which to apply `{name}`. axis : int or sequence of int, optional Axis(es) over which to apply `{name}`. Only one of the 'dim' and 'axis' arguments can be supplied. If neither are supplied, then - `{name}` is calculated over axes.""" + `{name}` is calculated over axes.""") - _cum_extra_args_docstring = \ - """dim : str or sequence of str, optional + _cum_extra_args_docstring = dedent("""\ + dim : str or sequence of str, optional Dimension over which to apply `{name}`. axis : int or sequence of int, optional Axis over which to apply `{name}`. Only one of the 'dim' - and 'axis' arguments can be supplied.""" + and 'axis' arguments can be supplied.""") class ImplementsDatasetReduce(object): @@ -308,12 +309,12 @@ def assign_coords(self, **kwargs): assigned : same type as caller A new object with the new coordinates in addition to the existing data. - + Examples -------- - + Convert longitude coordinates from 0-359 to -180-179: - + >>> da = xr.DataArray(np.random.rand(4), ... coords=[np.array([358, 359, 0, 1])], ... dims='lon') @@ -445,11 +446,11 @@ def groupby(self, group, squeeze=True): grouped : GroupBy A `GroupBy` object patterned after `pandas.GroupBy` that can be iterated over in the form of `(unique_value, grouped_array)` pairs. - + Examples -------- Calculate daily anomalies for daily data: - + >>> da = xr.DataArray(np.linspace(0, 1826, num=1827), ... coords=[pd.date_range('1/1/2000', '31/12/2004', ... freq='D')], @@ -465,7 +466,7 @@ def groupby(self, group, squeeze=True): Coordinates: * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... dayofyear (time) int64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ... - + See Also -------- core.groupby.DataArrayGroupBy @@ -589,7 +590,7 @@ def resample(self, freq=None, dim=None, how=None, skipna=None, closed=None, label=None, base=0, keep_attrs=False, **indexer): """Returns a Resample object for performing resampling operations. - Handles both downsampling and upsampling. If any intervals contain no + Handles both downsampling and upsampling. If any intervals contain no values from the original object, they will be given the value ``NaN``. Parameters @@ -616,11 +617,11 @@ def resample(self, freq=None, dim=None, how=None, skipna=None, ------- resampled : same type as caller This object resampled. - + Examples -------- Downsample monthly time-series data to seasonal data: - + >>> da = xr.DataArray(np.linspace(0, 11, num=12), ... coords=[pd.date_range('15/12/1999', ... periods=12, freq=pd.DateOffset(months=1))], @@ -635,15 +636,15 @@ def resample(self, freq=None, dim=None, how=None, skipna=None, array([ 1., 4., 7., 10.]) Coordinates: * time (time) datetime64[ns] 2000-02-29 2000-05-31 2000-08-31 2000-11-30 - + Upsample monthly time-series data to daily data: - + >>> da.resample(time='1D').interpolate('linear') array([ 0. , 0.032258, 0.064516, ..., 10.935484, 10.967742, 11. ]) Coordinates: * time (time) datetime64[ns] 1999-12-15 1999-12-16 1999-12-17 ... - + References ---------- @@ -957,8 +958,8 @@ def contains_cftime_datetimes(var): sample = sample.item() return isinstance(sample, cftime_datetime) else: - return False - + return False + def _contains_datetime_like_objects(var): """Check if a variable contains datetime like objects (either diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index cf78ca66b13..a231963c446 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -214,8 +214,7 @@ def _ignore_warnings_if(condition): yield -def _create_nan_agg_method(name, numeric_only=False, np_compat=False, - no_bottleneck=False, coerce_strings=False): +def _create_nan_agg_method(name, np_compat=False, coerce_strings=False): def f(values, axis=None, skipna=None, **kwargs): if kwargs.pop('out', None) is not None: raise TypeError('`out` is not valid for {}'.format(name)) @@ -255,25 +254,34 @@ def f(values, axis=None, skipna=None, **kwargs): 'or newer to use skipna=True or skipna=None' % name) raise NotImplementedError(msg) - f.numeric_only = numeric_only f.__name__ = name return f +# Attributes `numeric_only`, `available_min_count` is used for docs. +# See ops.inject_reduce_methods argmax = _create_nan_agg_method('argmax', coerce_strings=True) argmin = _create_nan_agg_method('argmin', coerce_strings=True) max = _create_nan_agg_method('max', coerce_strings=True) min = _create_nan_agg_method('min', coerce_strings=True) -sum = _create_nan_agg_method('sum', numeric_only=True) -mean = _create_nan_agg_method('mean', numeric_only=True) -std = _create_nan_agg_method('std', numeric_only=True) -var = _create_nan_agg_method('var', numeric_only=True) -median = _create_nan_agg_method('median', numeric_only=True) -prod = _create_nan_agg_method('prod', numeric_only=True) -cumprod_1d = _create_nan_agg_method( - 'cumprod', numeric_only=True, np_compat=True) -cumsum_1d = _create_nan_agg_method( - 'cumsum', numeric_only=True, np_compat=True) +sum = _create_nan_agg_method('sum') +sum.numeric_only = True +sum.available_min_count = True +mean = _create_nan_agg_method('mean') +mean.numeric_only = True +std = _create_nan_agg_method('std') +std.numeric_only = True +var = _create_nan_agg_method('var') +var.numeric_only = True +median = _create_nan_agg_method('median') +median.numeric_only = True +prod = _create_nan_agg_method('prod') +prod.numeric_only = True +sum.available_min_count = True +cumprod_1d = _create_nan_agg_method('cumprod', np_compat=True) +cumprod_1d.numeric_only = True +cumsum_1d = _create_nan_agg_method('cumsum', np_compat=True) +cumsum_1d.numeric_only = True def _nd_cum_func(cum_func, array, axis, **kwargs): diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 728a7b369ab..531878f4686 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -37,14 +37,14 @@ def __call__(self, alt): @functools.wraps(alt) def f(values, axis=None, **kwds): dtype = kwds.get('dtype', None) - min_count = kwds.get('min_count', 1) + min_count = kwds.get('min_count', None) if (not isinstance(values, dask_array_type) and _USE_BOTTLENECK and not isinstance(axis, tuple) and values.dtype.kind in 'uifc' and values.dtype.isnative and (dtype is None or np.dtype(dtype) == values.dtype) and - min_count != 1): + min_count is None): # bottleneck does not take care dtype, min_count kwds.pop('dtype', None) kwds.pop('min_count', 1) diff --git a/xarray/core/ops.py b/xarray/core/ops.py index d9e8ceb65d5..2bf8682f357 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -86,7 +86,7 @@ If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). + implemented (object, datetime64 or timedelta64).{min_count_docs} keep_attrs : bool, optional If True, the attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be @@ -102,6 +102,12 @@ indicated dimension(s) removed. """ +_MINCOUNT_DOCSTRING = """ +min_count : int, default None + The required number of valid values to perform the operation. If fewer than + min_count non-NA values are present the result will be NA. + New in version 0.10.8: Added with the default being None.""" + _ROLLING_REDUCE_DOCSTRING_TEMPLATE = """\ Reduce this {da_or_ds}'s data windows by applying `{name}` along its dimension. @@ -236,11 +242,15 @@ def inject_reduce_methods(cls): [('count', duck_array_ops.count, False)]) for name, f, include_skipna in methods: numeric_only = getattr(f, 'numeric_only', False) + available_min_count = getattr(f, 'available_min_count', False) + min_count_docs = _MINCOUNT_DOCSTRING if available_min_count else '' + func = cls._reduce_method(f, include_skipna, numeric_only) func.__name__ = name func.__doc__ = _REDUCE_DOCSTRING_TEMPLATE.format( name=name, cls=cls.__name__, - extra_args=cls._reduce_extra_args_docstring.format(name=name)) + extra_args=cls._reduce_extra_args_docstring.format(name=name), + min_count_docs=min_count_docs) setattr(cls, name, func) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index cc4d61f0fff..c1e103c366f 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd import pytest +from textwrap import dedent from numpy import array, nan import warnings @@ -443,3 +444,77 @@ def test_multiple_dims(dtype, dask, func): actual = getattr(da, func)(('x', 'y')) expected = getattr(getattr(da, func)('x'), func)('y') assert_allclose(actual, expected) + + +def test_docs(): + # with min_count + actual = DataArray.sum.__doc__ + expected = dedent("""\ + Reduce this DataArray's data by applying `sum` along some dimension(s). + + Parameters + ---------- + dim : str or sequence of str, optional + Dimension(s) over which to apply `sum`. + axis : int or sequence of int, optional + Axis(es) over which to apply `sum`. Only one of the 'dim' + and 'axis' arguments can be supplied. If neither are supplied, then + `sum` is calculated over axes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default None + The required number of valid values to perform the operation. If fewer than + min_count non-NA values are present the result will be NA. + New in version 0.10.8: Added with the default being None. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `sum` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray object with `sum` applied to its data and the + indicated dimension(s) removed. + """) + assert actual == expected + + # without min_count + actual = DataArray.mean.__doc__ + expected = dedent("""\ + Reduce this DataArray's data by applying `mean` along some dimension(s). + + Parameters + ---------- + dim : str or sequence of str, optional + Dimension(s) over which to apply `mean`. + axis : int or sequence of int, optional + Axis(es) over which to apply `mean`. Only one of the 'dim' + and 'axis' arguments can be supplied. If neither are supplied, then + `mean` is calculated over axes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `mean` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray object with `mean` applied to its data and the + indicated dimension(s) removed. + """) + assert actual == expected From 5dda53586eae9eba65ed828f84f65a7b4d949980 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Wed, 20 Jun 2018 21:31:33 +0900 Subject: [PATCH 11/25] flake8 --- xarray/core/ops.py | 6 +++--- xarray/tests/test_duck_array_ops.py | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 2bf8682f357..a0dd2212a8f 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -104,9 +104,9 @@ _MINCOUNT_DOCSTRING = """ min_count : int, default None - The required number of valid values to perform the operation. If fewer than - min_count non-NA values are present the result will be NA. - New in version 0.10.8: Added with the default being None.""" + The required number of valid values to perform the operation. + If fewer than min_count non-NA values are present the result will + be NA. New in version 0.10.8: Added with the default being None.""" _ROLLING_REDUCE_DOCSTRING_TEMPLATE = """\ Reduce this {da_or_ds}'s data windows by applying `{name}` along its dimension. diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index c1e103c366f..2930bad6c9f 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -466,9 +466,9 @@ def test_docs(): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). min_count : int, default None - The required number of valid values to perform the operation. If fewer than - min_count non-NA values are present the result will be NA. - New in version 0.10.8: Added with the default being None. + The required number of valid values to perform the operation. + If fewer than min_count non-NA values are present the result will + be NA. New in version 0.10.8: Added with the default being None. keep_attrs : bool, optional If True, the attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be @@ -486,18 +486,18 @@ def test_docs(): assert actual == expected # without min_count - actual = DataArray.mean.__doc__ + actual = DataArray.std.__doc__ expected = dedent("""\ - Reduce this DataArray's data by applying `mean` along some dimension(s). + Reduce this DataArray's data by applying `std` along some dimension(s). Parameters ---------- dim : str or sequence of str, optional - Dimension(s) over which to apply `mean`. + Dimension(s) over which to apply `std`. axis : int or sequence of int, optional - Axis(es) over which to apply `mean`. Only one of the 'dim' + Axis(es) over which to apply `std`. Only one of the 'dim' and 'axis' arguments can be supplied. If neither are supplied, then - `mean` is calculated over axes. + `std` is calculated over axes. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -509,12 +509,12 @@ def test_docs(): returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `mean` on this object's data. + function for calculating `std` on this object's data. Returns ------- reduced : DataArray - New DataArray object with `mean` applied to its data and the + New DataArray object with `std` applied to its data and the indicated dimension(s) removed. """) assert actual == expected From 5ddc4ebacd047d96c774440aca02440a87286e09 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Wed, 20 Jun 2018 22:49:05 +0900 Subject: [PATCH 12/25] Bug fix. Better test coverage. --- doc/whats-new.rst | 2 +- xarray/core/duck_array_ops.py | 4 ++-- xarray/core/nanops.py | 28 ++++++++++++++++++++++++-- xarray/tests/test_dataarray.py | 10 +++++++--- xarray/tests/test_duck_array_ops.py | 31 ++++++++++++++++++----------- 5 files changed, 55 insertions(+), 20 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 50b6f5c6ea5..4f3f87a5883 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,7 +37,7 @@ Enhancements ~~~~~~~~~~~~ - min_count option is newly supported in :py:meth:`~xarray.DataArray.sum` and - :py:meth:`~xarray.Dataset.mean`. + :py:meth:`~xarray.Dataset.prod`. (:issue:`2230`) By `Keisuke Fujii `_. diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index a231963c446..2e8efbc67aa 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -233,11 +233,11 @@ def f(values, axis=None, skipna=None, **kwargs): func = getattr( nanops, nanname, _dask_or_eager_func( nanname, eager_module=np_module)) - if func is None: + else: if dtype is None: func = _dask_or_eager_func(name) else: - func = getattr(np, name) + func = getattr(np_module, name) try: return func(values, axis=axis, **kwargs) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 531878f4686..8f8248599b4 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -105,6 +105,10 @@ def _maybe_null_out(result, axis, mask, min_count=1): """ xarray version of pandas.core.nanops._maybe_null_out """ + if hasattr(axis, '__len__'): # if tuple or list + raise ValueError('min_count is not available for reduction ' + 'with more than one dimensions.') + if axis is not None and getattr(result, 'ndim', False): null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 if np.any(null_mask): @@ -112,8 +116,7 @@ def _maybe_null_out(result, axis, mask, min_count=1): result = result.astype(dtype) result[null_mask] = fill_value - elif (not isinstance(result, dask_array_type) and - result not in dtypes.NAT_TYPES): + elif getattr(result, 'dtype', None) not in dtypes.NAT_TYPES: null_mask = mask.size - mask.sum() if null_mask < min_count: result = np.nan @@ -245,6 +248,27 @@ def nanmean(a, axis=None, dtype=None, out=None): return np.nanmean(a, axis=axis, dtype=dtype) +def _nanvar_object(value, axis=None, **kwargs): + ddof = kwargs.pop('ddof', 0) + kwargs_mean = kwargs.copy() + kwargs_mean.pop('keepdims', None) + value_mean = _nanmean_ddof_object(ddof=0, value=value, axis=axis, + keepdims=True, **kwargs_mean) + squared = (value.astype(value_mean.dtype) - value_mean)**2 + return _nanmean_ddof_object(ddof, squared, axis=axis, **kwargs) + + +@bottleneck_switch() +def nanvar(a, axis=None, dtype=None, out=None, ddof=0): + if a.dtype.kind == 'O': + return _nanvar_object(a, axis=axis, dtype=dtype, ddof=ddof) + + if isinstance(a, dask_array_type): + return dask_array.nanvar(a, axis=axis, dtype=dtype, ddof=ddof) + + return np.nanvar(a, axis=axis, dtype=dtype, ddof=ddof) + + def nanprod(a, axis=None, dtype=None, out=None, min_count=None): a, mask = _replace_nan(a, 1) result = np.prod(a, axis=axis, dtype=dtype, out=out) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index d339e6402b6..153b276f1ac 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3416,7 +3416,9 @@ def test_isin(da): def test_rolling_iter(da): rolling_obj = da.rolling(time=7) - rolling_obj_mean = rolling_obj.mean() + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Mean of empty slice') + rolling_obj_mean = rolling_obj.mean() assert len(rolling_obj.window_labels) == len(da['time']) assert_identical(rolling_obj.window_labels, da['time']) @@ -3424,8 +3426,10 @@ def test_rolling_iter(da): for i, (label, window_da) in enumerate(rolling_obj): assert label == da['time'].isel(time=i) - actual = rolling_obj_mean.isel(time=i) - expected = window_da.mean('time') + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Mean of empty slice') + actual = rolling_obj_mean.isel(time=i) + expected = window_da.mean('time') # TODO add assert_allclose_with_nan, which compares nan position # as well as the closeness of the values. diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 2930bad6c9f..30447d76ada 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -10,7 +10,7 @@ import warnings from xarray import DataArray, concat -from xarray.core import duck_array_ops +from xarray.core import duck_array_ops, dtypes from xarray.core.duck_array_ops import ( array_notnull_equiv, concatenate, count, first, last, mean, rolling_window, stack, where) @@ -203,10 +203,15 @@ def construct_dataarray(dim_num, dtype, contains_nan, dask): array = rng.choice(['a', 'b', 'c', 'd'], size=shapes) else: raise ValueError - da = DataArray(array, dims=dims, coords={'x': np.arange(16)}, name='da') if contains_nan: - da = da.reindex(x=np.arange(20)) + inds = rng.choice(range(array.size), int(array.size * 0.2)) + dtype, fill_value = dtypes.maybe_promote(array.dtype) + array = array.astype(dtype) + array.flat[inds] = fill_value + + da = DataArray(array, dims=dims, coords={'x': np.arange(16)}, name='da') + if dask and has_dask: chunks = {d: 4 for d in dims} da = da.chunk(chunks) @@ -263,6 +268,9 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): if dask and not has_dask: pytest.skip('requires dask') + if dask and skipna is False and dtype in [np.bool_]: + pytest.skip('dask does not compute object-typed array') + rtol = 1e-04 if dtype == np.float32 else 1e-05 da = construct_dataarray(dim_num, dtype, contains_nan=True, dask=dask) @@ -294,8 +302,13 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): # nanmean for object dtype pass - # make sure the compatiblility with pandas' results. actual = getattr(da, func)(skipna=skipna, dim=aggdim) + + # for dask case, make sure the result is the same for numpy backend + expected = getattr(da.compute(), func)(skipna=skipna, dim=aggdim) + assert_allclose(actual, expected, rtol=rtol) + + # make sure the compatiblility with pandas' results. if func == 'var': expected = series_reduce(da, func, skipna=skipna, dim=aggdim, ddof=0) @@ -358,13 +371,6 @@ def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim): with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'All-NaN slice') - if aggdim == 'y' and contains_nan and skipna: - with pytest.raises(ValueError): - actual = da.isel(**{ - aggdim: getattr(da, 'arg' + func)( - dim=aggdim, skipna=skipna).compute()}) - return - actual = da.isel(**{aggdim: getattr(da, 'arg' + func) (dim=aggdim, skipna=skipna).compute()}) expected = getattr(da, func)(dim=aggdim, skipna=skipna) @@ -374,6 +380,7 @@ def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim): def test_argmin_max_error(): da = construct_dataarray(2, np.bool_, contains_nan=True, dask=False) + da[0] = np.nan with pytest.raises(ValueError): da.argmin(dim='y') @@ -425,7 +432,7 @@ def test_min_count(dim_num, dtype, dask, func, aggdim): actual = getattr(da, func)(dim=aggdim, skipna=True, min_count=min_count) if LooseVersion(pd.__version__) >= LooseVersion('0.22.0'): - # min_count has pandas > 0.22 + # min_count is only implenented in pandas > 0.22 expected = series_reduce(da, func, skipna=True, dim=aggdim, min_count=min_count) assert_allclose(actual, expected) From c37de0e13ddc7a65b640daf86b14bd2c8e2ef656 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Thu, 21 Jun 2018 08:11:06 +0900 Subject: [PATCH 13/25] using isnull, where_method. Remove unnecessary conditional branching. --- xarray/core/duck_array_ops.py | 5 +---- xarray/core/nanops.py | 16 +++------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 2e8efbc67aa..a56b7c5563f 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -234,10 +234,7 @@ def f(values, axis=None, skipna=None, **kwargs): nanops, nanname, _dask_or_eager_func( nanname, eager_module=np_module)) else: - if dtype is None: - func = _dask_or_eager_func(name) - else: - func = getattr(np_module, name) + func = _dask_or_eager_func(name) try: return func(values, axis=axis, **kwargs) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 8f8248599b4..97df3609d51 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -85,20 +85,10 @@ def _replace_nan(a, val): https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py but slightly modified to take care of dask.array """ - if a.dtype == np.object_: - # object arrays do not support `isnan` (gh-9009), so make a guess - mask = a != a - elif issubclass(a.dtype.type, np.inexact): - mask = np.isnan(a) - else: - mask = None - - if mask is not None: - if isinstance(a, dask_array_type): - return dask_array.where(mask, val, a), mask - return np.where(mask, val, a), mask + from .duck_array_ops import isnull, where_method - return a, mask + mask = isnull(a) + return where_method(val, mask, a), mask def _maybe_null_out(result, axis, mask, min_count=1): From 7aedd02d71def177ead31348ed4ace3ec2ee4b95 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Thu, 21 Jun 2018 18:38:42 +0900 Subject: [PATCH 14/25] More refactoring based on the comments --- xarray/core/duck_array_ops.py | 24 ++--- xarray/core/nanops.py | 190 ++++++++++++---------------------- xarray/core/nputils.py | 41 ++++++++ xarray/tests/test_dataset.py | 2 - 4 files changed, 114 insertions(+), 143 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index a56b7c5563f..cefc5df08b9 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -16,15 +16,6 @@ from . import dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast from .pycompat import dask_array_type -from . import nanops - -try: - import bottleneck as bn - has_bottleneck = True -except ImportError: - # use numpy methods instead - bn = np - has_bottleneck = False try: import dask.array as dask_array @@ -214,25 +205,22 @@ def _ignore_warnings_if(condition): yield -def _create_nan_agg_method(name, np_compat=False, coerce_strings=False): +def _create_nan_agg_method(name, coerce_strings=False): + from . import nanops + def f(values, axis=None, skipna=None, **kwargs): if kwargs.pop('out', None) is not None: raise TypeError('`out` is not valid for {}'.format(name)) - # If dtype is supplied, we use numpy's method. - dtype = kwargs.get('dtype', None) values = asarray(values) if coerce_strings and values.dtype.kind in 'SU': values = values.astype(object) - np_module = npcompat if np_compat else np func = None if skipna or (skipna is None and values.dtype.kind in 'cfO'): nanname = 'nan' + name - func = getattr( - nanops, nanname, _dask_or_eager_func( - nanname, eager_module=np_module)) + func = getattr(nanops, nanname) else: func = _dask_or_eager_func(name) @@ -275,9 +263,9 @@ def f(values, axis=None, skipna=None, **kwargs): prod = _create_nan_agg_method('prod') prod.numeric_only = True sum.available_min_count = True -cumprod_1d = _create_nan_agg_method('cumprod', np_compat=True) +cumprod_1d = _create_nan_agg_method('cumprod') cumprod_1d.numeric_only = True -cumsum_1d = _create_nan_agg_method('cumsum', np_compat=True) +cumsum_1d = _create_nan_agg_method('cumsum') cumsum_1d.numeric_only = True diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 97df3609d51..ebc99217c84 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -1,92 +1,24 @@ from __future__ import absolute_import, division, print_function -import functools - import numpy as np from . import dtypes from .pycompat import dask_array_type - - -try: - import bottleneck as bn - _USE_BOTTLENECK = True -except ImportError: - # use numpy methods instead - bn = np - _USE_BOTTLENECK = False +from . duck_array_ops import (count, isnull, fillna, where_method, + _dask_or_eager_func) +from . import nputils try: import dask.array as dask_array - from . import dask_array_compat except ImportError: dask_array = None - dask_array_compat = None - - -class bottleneck_switch(object): - """ xarray-version of pandas.core.nanops.bottleneck_switch """ - def __call__(self, alt): - bn_name = alt.__name__ - - try: - bn_func = getattr(bn, bn_name) - except (AttributeError, NameError): # pragma: no cover - bn_func = None - - @functools.wraps(alt) - def f(values, axis=None, **kwds): - dtype = kwds.get('dtype', None) - min_count = kwds.get('min_count', None) - - if (not isinstance(values, dask_array_type) and _USE_BOTTLENECK and - not isinstance(axis, tuple) and - values.dtype.kind in 'uifc' and - values.dtype.isnative and - (dtype is None or np.dtype(dtype) == values.dtype) and - min_count is None): - # bottleneck does not take care dtype, min_count - kwds.pop('dtype', None) - kwds.pop('min_count', 1) - result = bn_func(values, axis=axis, **kwds) - else: - result = alt(values, axis=axis, **kwds) - - return result - - return f def _replace_nan(a, val): """ - If `a` is of inexact type, make a copy of `a`, replace NaNs with - the `val` value, and return the copy together with a boolean mask - marking the locations where NaNs were present. If `a` is not of - inexact type, do nothing and return `a` together with a mask of None. - Note that scalars will end up as array scalars, which is important - for using the result as the value of the out argument in some - operations. - Parameters - ---------- - a : array-like - Input array. - val : float - NaN values are set to val before doing the operation. - Returns - ------- - y : ndarray - If `a` is of inexact type, return a copy of `a` with the NaNs - replaced by the fill value, otherwise return `a`. - mask: {bool, None} - If `a` is of inexact type, return a boolean mask marking locations of - NaNs, otherwise return None. - - This function is taken from - https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py - but slightly modified to take care of dask.array + replace nan in a by val, and returns the replaced array and the nan + position """ - from .duck_array_ops import isnull, where_method - mask = isnull(a) return where_method(val, mask, a), mask @@ -114,32 +46,9 @@ def _maybe_null_out(result, axis, mask, min_count=1): return result -@bottleneck_switch() -def nanmin(a, axis=None, out=None): - if a.dtype.kind == 'O': - return _nan_minmax_object('min', dtypes.get_pos_infinity, a, axis) - - if isinstance(a, dask_array_type): - return dask_array.nanmin(a, axis=axis) - return np.nanmin(a, axis=axis) - - -@bottleneck_switch() -def nanmax(a, axis=None, out=None): - if a.dtype.kind == 'O': - return _nan_minmax_object('max', dtypes.get_neg_infinity, a, axis) - - if isinstance(a, dask_array_type): - return dask_array.nanmax(a, axis=axis) - return np.nanmax(a, axis=axis) - - -def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs): +def _nan_argminmax_object(func, fill_value, value, axis=None, **kwargs): """ In house nanargmin, nanargmax for object arrays. Always return integer type """ - from .duck_array_ops import count, fillna - - fill_value = get_fill_value(value.dtype) valid_count = count(value, axis=axis) value = fillna(value, fill_value) data = getattr(np, func)(value, axis=axis, **kwargs) @@ -153,11 +62,8 @@ def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs): return np.array(data, dtype=int) -def _nan_minmax_object(func, get_fill_value, value, axis=None, **kwargs): +def _nan_minmax_object(func, fill_value, value, axis=None, **kwargs): """ In house nanmin and nanmax for object array """ - from .duck_array_ops import count, fillna, where_method - - fill_value = get_fill_value(value.dtype) valid_count = count(value, axis=axis) filled_value = fillna(value, fill_value) data = getattr(np, func)(filled_value, axis=axis, **kwargs) @@ -167,13 +73,36 @@ def _nan_minmax_object(func, get_fill_value, value, axis=None, **kwargs): return where_method(data, valid_count != 0) -@bottleneck_switch() +def nanmin(a, axis=None, out=None): + if a.dtype.kind == 'O': + return _nan_minmax_object( + 'min', dtypes.get_pos_infinity(a.dtype), a, axis) + + if isinstance(a, dask_array_type): + return dask_array.nanmin(a, axis=axis) + return nputils.nanmin(a, axis=axis) + + +def nanmax(a, axis=None, out=None): + if a.dtype.kind == 'O': + return _nan_minmax_object( + 'max', dtypes.get_neg_infinity(a.dtype), a, axis) + + if isinstance(a, dask_array_type): + return dask_array.nanmax(a, axis=axis) + return nputils.nanmax(a, axis=axis) + + def nanargmin(a, axis=None): + fill_value = dtypes.get_pos_infinity(a.dtype) if a.dtype.kind == 'O': - return _nan_argminmax_object('argmin', dtypes.get_pos_infinity, - a, axis=axis) - a, mask = _replace_nan(a, np.inf) - res = np.argmin(a, axis=axis) + return _nan_argminmax_object('argmin', fill_value, a, axis=axis) + a, mask = _replace_nan(a, fill_value) + if isinstance(a, dask_array_type): + res = dask_array.argmin(a, axis=axis) + else: + res = np.argmin(a, axis=axis) + if mask is not None: mask = np.all(mask, axis=axis) if np.any(mask): @@ -181,17 +110,17 @@ def nanargmin(a, axis=None): return res -@bottleneck_switch() def nanargmax(a, axis=None): - """ - taken from - https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py - """ + fill_value = dtypes.get_neg_infinity(a.dtype) if a.dtype.kind == 'O': - return _nan_argminmax_object('argmax', dtypes.get_neg_infinity, - a, axis=axis) - a, mask = _replace_nan(a, -np.inf) - res = np.argmax(a, axis=axis) + return _nan_argminmax_object('argmax', fill_value, a, axis=axis) + + a, mask = _replace_nan(a, fill_value) + if isinstance(a, dask_array_type): + res = dask_array.argmax(a, axis=axis) + else: + res = np.argmax(a, axis=axis) + if mask is not None: mask = np.all(mask, axis=axis) if np.any(mask): @@ -199,10 +128,9 @@ def nanargmax(a, axis=None): return res -@bottleneck_switch() def nansum(a, axis=None, dtype=None, out=None, min_count=None): a, mask = _replace_nan(a, 0) - result = np.sum(a, axis=axis, dtype=dtype) + result = _dask_or_eager_func('sum')(a, axis=axis, dtype=dtype) if min_count is not None: return _maybe_null_out(result, axis, mask, min_count) else: @@ -227,7 +155,6 @@ def _nanmean_ddof_object(ddof, value, axis=None, **kwargs): return where_method(data, valid_count != 0) -@bottleneck_switch() def nanmean(a, axis=None, dtype=None, out=None): if a.dtype.kind == 'O': return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype) @@ -238,6 +165,11 @@ def nanmean(a, axis=None, dtype=None, out=None): return np.nanmean(a, axis=axis, dtype=dtype) +def nanmedian(a, axis=None, dtype=None, out=None): + return _dask_or_eager_func('nanmedian', eager_module=nputils)( + a, axis=axis, dtype=dtype) + + def _nanvar_object(value, axis=None, **kwargs): ddof = kwargs.pop('ddof', 0) kwargs_mean = kwargs.copy() @@ -248,21 +180,33 @@ def _nanvar_object(value, axis=None, **kwargs): return _nanmean_ddof_object(ddof, squared, axis=axis, **kwargs) -@bottleneck_switch() def nanvar(a, axis=None, dtype=None, out=None, ddof=0): if a.dtype.kind == 'O': return _nanvar_object(a, axis=axis, dtype=dtype, ddof=ddof) - if isinstance(a, dask_array_type): - return dask_array.nanvar(a, axis=axis, dtype=dtype, ddof=ddof) + return _dask_or_eager_func('nanvar', eager_module=nputils)( + a, axis=axis, dtype=dtype, ddof=ddof) - return np.nanvar(a, axis=axis, dtype=dtype, ddof=ddof) + +def nanstd(a, axis=None, dtype=None, out=None): + return _dask_or_eager_func('nanstd', eager_module=nputils)( + a, axis=axis, dtype=dtype) def nanprod(a, axis=None, dtype=None, out=None, min_count=None): a, mask = _replace_nan(a, 1) - result = np.prod(a, axis=axis, dtype=dtype, out=out) + result = _dask_or_eager_func('nanprod')(a, axis=axis, dtype=dtype, out=out) if min_count is not None: return _maybe_null_out(result, axis, mask, min_count) else: return result + + +def nancumsum(a, axis=None, dtype=None, out=None): + return _dask_or_eager_func('nancumsum', eager_module=nputils)( + a, axis=axis, dtype=dtype) + + +def nancumprod(a, axis=None, dtype=None, out=None): + return _dask_or_eager_func('nancumprod', eager_module=nputils)( + a, axis=axis, dtype=dtype) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 4ca1f9390eb..f8c71e2df7f 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -7,6 +7,14 @@ from . import npcompat +try: + import bottleneck as bn + _USE_BOTTLENECK = True +except ImportError: + # use numpy methods instead + bn = np + _USE_BOTTLENECK = False + def _validate_axis(data, axis): ndim = data.ndim @@ -197,3 +205,36 @@ def _rolling_window(a, window, axis=-1): rolling = npcompat.as_strided(a, shape=shape, strides=strides, writeable=False) return np.swapaxes(rolling, -2, axis) + + +def _create_bottleneck_method(name, npmodule=np): + def f(values, axis=None, **kwds): + dtype = kwds.get('dtype', None) + bn_func = getattr(bn, name, None) + + if (_USE_BOTTLENECK and bn_func is not None and + not isinstance(axis, tuple) and + values.dtype.kind in 'uifc' and + values.dtype.isnative and + (dtype is None or np.dtype(dtype) == values.dtype)): + # bottleneck does not take care dtype, min_count + kwds.pop('dtype', None) + result = bn_func(values, axis=axis, **kwds) + else: + result = getattr(npmodule, name)(values, axis=axis, **kwds) + + return result + + f.__name__ = name + return f + + +nanmin = _create_bottleneck_method('nanmin') +nanmax = _create_bottleneck_method('nanmax') +nanmean = _create_bottleneck_method('nanmean') +nanmedian = _create_bottleneck_method('nanmedian') +nanvar = _create_bottleneck_method('nanvar') +nanstd = _create_bottleneck_method('nanstd') +nanprod = _create_bottleneck_method('nanprod') +nancumsum = _create_bottleneck_method('nancumsum', npmodule=npcompat) +nancumprod = _create_bottleneck_method('nancumprod', npmodule=npcompat) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a0d316d74dc..002ef12f4d0 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3352,7 +3352,6 @@ def test_reduce(self): (('dim2', 'time'), ['dim1', 'dim3']), ((), ['dim1', 'dim2', 'dim3', 'time'])]: actual = data.min(dim=reduct).dims - print(reduct, actual, expected) self.assertItemsEqual(actual, expected) assert_equal(data.mean(dim=[]), data) @@ -3407,7 +3406,6 @@ def test_reduce_cumsum_test_dims(self): ('time', ['dim1', 'dim2', 'dim3']) ]: actual = getattr(data, cumfunc)(dim=reduct).dims - print(reduct, actual, expected) self.assertItemsEqual(actual, expected) def test_reduce_non_numeric(self): From ba903db270b9651080b765eb2379e05bbbb8c72b Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Fri, 22 Jun 2018 20:19:10 +0900 Subject: [PATCH 15/25] remove dtype from nanmedian --- xarray/core/nanops.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index ebc99217c84..2d9ded4246b 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -165,9 +165,8 @@ def nanmean(a, axis=None, dtype=None, out=None): return np.nanmean(a, axis=axis, dtype=dtype) -def nanmedian(a, axis=None, dtype=None, out=None): - return _dask_or_eager_func('nanmedian', eager_module=nputils)( - a, axis=axis, dtype=dtype) +def nanmedian(a, axis=None, out=None): + return _dask_or_eager_func('nanmedian', eager_module=nputils)(a, axis=axis) def _nanvar_object(value, axis=None, **kwargs): From 5b09714c77ae03c2b7215313d9c5e72499dc9e1b Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Fri, 22 Jun 2018 21:48:46 +0900 Subject: [PATCH 16/25] Fix for nanmedian --- xarray/core/duck_array_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index cefc5df08b9..17eb310f8db 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -229,9 +229,10 @@ def f(values, axis=None, skipna=None, **kwargs): except AttributeError: if isinstance(values, dask_array_type): try: # dask/dask#3133 dask sometimes needs dtype argument + # if func does not accept dtype, then raises TypeError return func(values, axis=axis, dtype=values.dtype, **kwargs) - except AttributeError: + except (AttributeError, TypeError): msg = '%s is not yet implemented on dask arrays' % name else: msg = ('%s is not available with skipna=False with the ' From 5c82628610d8537467bd82039427a8dc6d3211df Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Sat, 11 Aug 2018 11:01:21 +0900 Subject: [PATCH 17/25] Add tests for dataset --- xarray/tests/test_duck_array_ops.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index fcb2fb1d3e3..fb00e500260 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -9,7 +9,7 @@ from numpy import array, nan import warnings -from xarray import DataArray, concat +from xarray import DataArray, Dataset, concat from xarray.core import duck_array_ops, dtypes from xarray.core.duck_array_ops import ( array_notnull_equiv, concatenate, count, first, last, mean, rolling_window, @@ -440,6 +440,15 @@ def test_min_count(dim_num, dtype, dask, func, aggdim): assert_dask_array(actual, dask) +@pytest.mark.parametrize('func', ['sum', 'prod']) +def test_min_count_dataset(func): + da = construct_dataarray(2, dtype=float, contains_nan=True, dask=False) + ds = Dataset({'var1': da}, coords={'scalar': 0}) + actual = getattr(ds, func)(dim='x', skipna=True, min_count=3)['var1'] + expected = getattr(ds['var1'], func)(dim='x', skipna=True, min_count=3) + assert_allclose(actual, expected) + + @pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_]) @pytest.mark.parametrize('dask', [False, True]) @pytest.mark.parametrize('func', ['sum', 'prod']) From 06319ac3e118bbd073ec12c1d18a16490e38cb97 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Sat, 11 Aug 2018 13:04:26 +0900 Subject: [PATCH 18/25] Add tests with resample. --- xarray/tests/test_dataset.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e47c1cc5f31..804ad2a1764 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2715,6 +2715,20 @@ def test_resample_and_first(self): result = actual.reduce(method) assert_equal(expected, result) + def test_resample_min_count(self): + times = pd.date_range('2000-01-01', freq='6H', periods=10) + ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)), + 'bar': ('time', np.random.randn(10), {'meta': 'data'}), + 'time': times}) + # inject nan + ds['foo'] = xr.where(ds['foo'] > 2.0, np.nan, ds['foo']) + + actual = ds.resample(time='1D').sum(min_count=1) + expected = xr.concat([ + ds.isel(time=slice(i*4, (i+1)*4)).sum('time', min_count=1) + for i in range(3)], dim=actual['time']) + assert_equal(expected, actual) + def test_resample_by_mean_with_keep_attrs(self): times = pd.date_range('2000-01-01', freq='6H', periods=10) ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)), From 737118e1338e5bb5bde67f2a9f358cee97a41789 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Sat, 11 Aug 2018 13:06:21 +0900 Subject: [PATCH 19/25] lint --- xarray/tests/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 804ad2a1764..fefc822b8d5 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2725,7 +2725,7 @@ def test_resample_min_count(self): actual = ds.resample(time='1D').sum(min_count=1) expected = xr.concat([ - ds.isel(time=slice(i*4, (i+1)*4)).sum('time', min_count=1) + ds.isel(time=slice(i * 4, (i + 1) * 4)).sum('time', min_count=1) for i in range(3)], dim=actual['time']) assert_equal(expected, actual) From 85b5650fecbbd29fff9987d98c76406db0f00164 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Sat, 11 Aug 2018 13:13:33 +0900 Subject: [PATCH 20/25] updated whatsnew --- doc/whats-new.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 162cca37355..dbacf6ff56e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,7 +36,8 @@ Documentation Enhancements ~~~~~~~~~~~~ -- min_count option is newly supported in :py:meth:`~xarray.DataArray.sum` and +- min_count option is newly supported in :py:meth:`~xarray.DataArray.sum`, + :py:meth:`~xarray.DataArray.prod` and :py:meth:`~xarray.Dataset.sum`, and :py:meth:`~xarray.Dataset.prod`. (:issue:`2230`) By `Keisuke Fujii `_. @@ -66,11 +67,12 @@ Bug fixes attribute being set. (:issue:`2201`) By `Thomas Voigt `_. + - Tests can be run in parallel with pytest-xdist + - Follow up the renamings in dask; from dask.ghost to dask.overlap By `Keisuke Fujii `_. - - Now :py:func:`xr.apply_ufunc` raises a ValueError when the size of ``input_core_dims`` is inconsistent with the number of arguments. (:issue:`2341`) From 015e85c8f857f946825c0e9d27193ffae328764b Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Thu, 16 Aug 2018 12:27:48 +0900 Subject: [PATCH 21/25] Revise from comments. --- xarray/core/nanops.py | 16 ++++++++-------- xarray/tests/test_duck_array_ops.py | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 2d9ded4246b..461479dedeb 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -48,14 +48,16 @@ def _maybe_null_out(result, axis, mask, min_count=1): def _nan_argminmax_object(func, fill_value, value, axis=None, **kwargs): """ In house nanargmin, nanargmax for object arrays. Always return integer - type """ + type + """ valid_count = count(value, axis=axis) value = fillna(value, fill_value) - data = getattr(np, func)(value, axis=axis, **kwargs) + data = _dask_or_eager_func(func)(value, axis=axis, **kwargs) # dask seems return non-integer type if isinstance(value, dask_array_type): data = data.astype(int) + # TODO This will evaluate dask arrays and might be costly. if (valid_count == 0).any(): raise ValueError('All-NaN slice encountered') @@ -78,9 +80,8 @@ def nanmin(a, axis=None, out=None): return _nan_minmax_object( 'min', dtypes.get_pos_infinity(a.dtype), a, axis) - if isinstance(a, dask_array_type): - return dask_array.nanmin(a, axis=axis) - return nputils.nanmin(a, axis=axis) + module = dask_array if isinstance(a, dask_array_type) else nputils + return module.nanmin(a, axis=axis) def nanmax(a, axis=None, out=None): @@ -88,9 +89,8 @@ def nanmax(a, axis=None, out=None): return _nan_minmax_object( 'max', dtypes.get_neg_infinity(a.dtype), a, axis) - if isinstance(a, dask_array_type): - return dask_array.nanmax(a, axis=axis) - return nputils.nanmax(a, axis=axis) + module = dask_array if isinstance(a, dask_array_type) else nputils + return module.nanmax(a, axis=axis) def nanargmin(a, axis=None): diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index fb00e500260..3f32fc49fd2 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -316,7 +316,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): # also check ddof!=0 case actual = getattr(da, func)(skipna=skipna, dim=aggdim, ddof=5) if dask: - isinstance(da.data, dask_array_type) + assert isinstance(da.data, dask_array_type) expected = series_reduce(da, func, skipna=skipna, dim=aggdim, ddof=5) assert_allclose(actual, expected, rtol=rtol) @@ -334,7 +334,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): da = construct_dataarray(dim_num, dtype, contains_nan=False, dask=dask) actual = getattr(da, func)(skipna=skipna) if dask: - isinstance(da.data, dask_array_type) + assert isinstance(da.data, dask_array_type) expected = getattr(np, 'nan{}'.format(func))(da.values) if actual.dtype == object: assert actual.values == np.array(expected) From 01a1419c5353d2d8a6d7f6b899d18f4c7e13d5b9 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Thu, 16 Aug 2018 12:31:57 +0900 Subject: [PATCH 22/25] Use .any and .all method instead of np.any / np.all --- xarray/core/nanops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 461479dedeb..c5d4b6f4b02 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -104,8 +104,8 @@ def nanargmin(a, axis=None): res = np.argmin(a, axis=axis) if mask is not None: - mask = np.all(mask, axis=axis) - if np.any(mask): + mask = mask.all(axis=axis) + if mask.any(): raise ValueError("All-NaN slice encountered") return res From a5b18fcd786942f3ba7982cec147c1afb6ff4818 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Thu, 16 Aug 2018 12:35:30 +0900 Subject: [PATCH 23/25] Avoid using numpy methods --- xarray/core/nanops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index c5d4b6f4b02..be7edb34c71 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -33,7 +33,7 @@ def _maybe_null_out(result, axis, mask, min_count=1): if axis is not None and getattr(result, 'ndim', False): null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 - if np.any(null_mask): + if null_mask.any(): dtype, fill_value = dtypes.maybe_promote(result.dtype) result = result.astype(dtype) result[null_mask] = fill_value @@ -61,7 +61,7 @@ def _nan_argminmax_object(func, fill_value, value, axis=None, **kwargs): if (valid_count == 0).any(): raise ValueError('All-NaN slice encountered') - return np.array(data, dtype=int) + return data def _nan_minmax_object(func, fill_value, value, axis=None, **kwargs): @@ -122,8 +122,8 @@ def nanargmax(a, axis=None): res = np.argmax(a, axis=axis) if mask is not None: - mask = np.all(mask, axis=axis) - if np.any(mask): + mask = mask.all(axis=axis) + if mask.any(): raise ValueError("All-NaN slice encountered") return res From e4e1d1e35d8b73485753240bdc78c4fbba3b7e33 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Thu, 16 Aug 2018 14:06:30 +0900 Subject: [PATCH 24/25] Avoid casting to int for dask array --- xarray/core/nanops.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index be7edb34c71..2309ed9619d 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -53,9 +53,6 @@ def _nan_argminmax_object(func, fill_value, value, axis=None, **kwargs): valid_count = count(value, axis=axis) value = fillna(value, fill_value) data = _dask_or_eager_func(func)(value, axis=axis, **kwargs) - # dask seems return non-integer type - if isinstance(value, dask_array_type): - data = data.astype(int) # TODO This will evaluate dask arrays and might be costly. if (valid_count == 0).any(): From b72a1c852add254a4cdd49408fe4e9c934ceece6 Mon Sep 17 00:00:00 2001 From: fujiisoup Date: Thu, 16 Aug 2018 15:02:40 +0900 Subject: [PATCH 25/25] Update whatsnew --- doc/whats-new.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e5fa692e63f..7f561381b42 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -69,12 +69,11 @@ Bug fixes By `Thomas Voigt `_. - Tests can be run in parallel with pytest-xdist + By `Tony Tung `_. -- Follow up the renamings in dask; from dask.ghost to dask.overlap +- Follow up the renamings in dask; from dask.ghost to dask.overlap By `Keisuke Fujii `_. - By `Tony Tung `_. - - Now raises a ValueError when there is a conflict between dimension names and level names of MultiIndex. (:issue:`2299`) By `Keisuke Fujii `_.