From f36f8f268b0035df091a1dc478d4106b801654de Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Sun, 17 Jun 2018 20:00:05 +0900
Subject: [PATCH 01/25] Inhouse nanops

---
 xarray/core/duck_array_ops.py       | 122 +-----
 xarray/core/nanops.py               | 575 ++++++++++++++++++++++++++++
 xarray/tests/test_duck_array_ops.py |  14 +-
 3 files changed, 592 insertions(+), 119 deletions(-)
 create mode 100644 xarray/core/nanops.py

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index 065ac165a0d..d4c56472e48 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -16,6 +16,7 @@
 from . import dask_array_ops, dtypes, npcompat, nputils
 from .nputils import nanfirst, nanlast
 from .pycompat import dask_array_type
+from . import nanops
 
 try:
     import bottleneck as bn
@@ -213,79 +214,6 @@ def _ignore_warnings_if(condition):
         yield
 
 
-def _nansum_object(value, axis=None, **kwargs):
-    """ In house nansum for object array """
-    value = fillna(value, 0)
-    return _dask_or_eager_func('sum')(value, axis=axis, **kwargs)
-
-
-def _nan_minmax_object(func, get_fill_value, value, axis=None, **kwargs):
-    """ In house nanmin and nanmax for object array """
-    fill_value = get_fill_value(value.dtype)
-    valid_count = count(value, axis=axis)
-    filled_value = fillna(value, fill_value)
-    data = _dask_or_eager_func(func)(filled_value, axis=axis, **kwargs)
-    if not hasattr(data, 'dtype'):  # scalar case
-        data = dtypes.fill_value(value.dtype) if valid_count == 0 else data
-        return np.array(data, dtype=value.dtype)
-    return where_method(data, valid_count != 0)
-
-
-def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs):
-    """ In house nanargmin, nanargmax for object arrays. Always return integer
-    type """
-    fill_value = get_fill_value(value.dtype)
-    valid_count = count(value, axis=axis)
-    value = fillna(value, fill_value)
-    data = _dask_or_eager_func(func)(value, axis=axis, **kwargs)
-    # dask seems return non-integer type
-    if isinstance(value, dask_array_type):
-        data = data.astype(int)
-
-    if (valid_count == 0).any():
-        raise ValueError('All-NaN slice encountered')
-
-    return np.array(data, dtype=int)
-
-
-def _nanmean_ddof_object(ddof, value, axis=None, **kwargs):
-    """ In house nanmean. ddof argument will be used in _nanvar method """
-    valid_count = count(value, axis=axis)
-    value = fillna(value, 0)
-    # As dtype inference is impossible for object dtype, we assume float
-    # https://github.com/dask/dask/issues/3162
-    dtype = kwargs.pop('dtype', None)
-    if dtype is None and value.dtype.kind == 'O':
-        dtype = value.dtype if value.dtype.kind in ['cf'] else float
-
-    data = _dask_or_eager_func('sum')(value, axis=axis, dtype=dtype, **kwargs)
-    data = data / (valid_count - ddof)
-    return where_method(data, valid_count != 0)
-
-
-def _nanvar_object(value, axis=None, **kwargs):
-    ddof = kwargs.pop('ddof', 0)
-    kwargs_mean = kwargs.copy()
-    kwargs_mean.pop('keepdims', None)
-    value_mean = _nanmean_ddof_object(ddof=0, value=value, axis=axis,
-                                      keepdims=True, **kwargs_mean)
-    squared = (value.astype(value_mean.dtype) - value_mean)**2
-    return _nanmean_ddof_object(ddof, squared, axis=axis, **kwargs)
-
-
-_nan_object_funcs = {
-    'sum': _nansum_object,
-    'min': partial(_nan_minmax_object, 'min', dtypes.get_pos_infinity),
-    'max': partial(_nan_minmax_object, 'max', dtypes.get_neg_infinity),
-    'argmin': partial(_nan_argminmax_object, 'argmin',
-                      dtypes.get_pos_infinity),
-    'argmax': partial(_nan_argminmax_object, 'argmax',
-                      dtypes.get_neg_infinity),
-    'mean': partial(_nanmean_ddof_object, 0),
-    'var': _nanvar_object,
-}
-
-
 def _create_nan_agg_method(name, numeric_only=False, np_compat=False,
                            no_bottleneck=False, coerce_strings=False):
     def f(values, axis=None, skipna=None, **kwargs):
@@ -296,57 +224,15 @@ def f(values, axis=None, skipna=None, **kwargs):
         dtype = kwargs.get('dtype', None)
         values = asarray(values)
 
-        # dask requires dtype argument for object dtype
-        if (values.dtype == 'object' and name in ['sum', ]):
-            kwargs['dtype'] = values.dtype if dtype is None else dtype
-
         if coerce_strings and values.dtype.kind in 'SU':
             values = values.astype(object)
 
         if skipna or (skipna is None and values.dtype.kind in 'cfO'):
-            if values.dtype.kind not in ['u', 'i', 'f', 'c']:
-                func = _nan_object_funcs.get(name, None)
-                using_numpy_nan_func = True
-                if func is None or values.dtype.kind not in 'Ob':
-                    raise NotImplementedError(
-                        'skipna=True not yet implemented for %s with dtype %s'
-                        % (name, values.dtype))
-            else:
-                nanname = 'nan' + name
-                if (isinstance(axis, tuple) or not values.dtype.isnative or
-                        no_bottleneck or (dtype is not None and
-                                          np.dtype(dtype) != values.dtype)):
-                    # bottleneck can't handle multiple axis arguments or
-                    # non-native endianness
-                    if np_compat:
-                        eager_module = npcompat
-                    else:
-                        eager_module = np
-                else:
-                    kwargs.pop('dtype', None)
-                    eager_module = bn
-                func = _dask_or_eager_func(nanname, eager_module)
-                using_numpy_nan_func = (eager_module is np or
-                                        eager_module is npcompat)
+            nanname = 'nan' + name
+            func = getattr(nanops, nanname)
         else:
             func = _dask_or_eager_func(name)
-            using_numpy_nan_func = False
-        with _ignore_warnings_if(using_numpy_nan_func):
-            try:
-                return func(values, axis=axis, **kwargs)
-            except AttributeError:
-                if isinstance(values, dask_array_type):
-                    try:  # dask/dask#3133 dask sometimes needs dtype argument
-                        return func(values, axis=axis, dtype=values.dtype,
-                                    **kwargs)
-                    except AttributeError:
-                        msg = '%s is not yet implemented on dask arrays' % name
-                else:
-                    assert using_numpy_nan_func
-                    msg = ('%s is not available with skipna=False with the '
-                           'installed version of numpy; upgrade to numpy 1.12 '
-                           'or newer to use skipna=True or skipna=None' % name)
-                raise NotImplementedError(msg)
+        return func(values, axis=axis, **kwargs)
     f.numeric_only = numeric_only
     f.__name__ = name
     return f
diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
new file mode 100644
index 00000000000..684f67c9cb2
--- /dev/null
+++ b/xarray/core/nanops.py
@@ -0,0 +1,575 @@
+from __future__ import absolute_import, division, print_function
+
+import contextlib
+import inspect
+import warnings
+import functools
+
+import numpy as np
+import pandas as pd
+from pandas.core.nanops import disallow
+
+from . import dask_array_ops, dtypes, npcompat, nputils
+from .nputils import nanfirst, nanlast
+from .pycompat import dask_array_type
+
+
+try:
+    import bottleneck as bn
+    _USE_BOTTLENECK = True
+except ImportError:
+    # use numpy methods instead
+    bn = np
+    _USE_BOTTLENECK = False
+
+
+def _bn_ok_dtype(dt, name):
+    # This function is taken from pandas.core.nanops
+    # Bottleneck chokes on datetime64
+    if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)):
+
+        # GH 15507
+        # bottleneck does not properly upcast during the sum
+        # so can overflow
+
+        # GH 9422
+        # further we also want to preserve NaN when all elements
+        # are NaN, unlinke bottleneck/numpy which consider this
+        # to be 0
+        if name in ['nansum', 'nanprod']:
+            return False
+
+        return True
+    return False
+
+
+class bottleneck_switch(object):
+    # This function is taken from pandas.core.nanops
+
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+
+    def __call__(self, alt):
+        bn_name = alt.__name__
+
+        try:
+            bn_func = getattr(bn, bn_name)
+        except (AttributeError, NameError):  # pragma: no cover
+            bn_func = None
+
+        @functools.wraps(alt)
+        def f(values, axis=None, **kwds):
+            if len(self.kwargs) > 0:
+                for k, v in compat.iteritems(self.kwargs):
+                    if k not in kwds:
+                        kwds[k] = v
+            try:
+                if values.size == 0 and kwds.get('min_count') is None:
+                    # We are empty, returning NA for our type
+                    # Only applies for the default `min_count` of None
+                    # since that affects how empty arrays are handled.
+                    # TODO(GH-18976) update all the nanops methods to
+                    # correctly handle empty inputs and remove this check.
+                    # It *may* just be `var`
+                    return _na_for_min_count(values, axis)
+
+                if (_USE_BOTTLENECK and not isinstance(value, dask_array_type)
+                        and _bn_ok_dtype(values.dtype, bn_name)):
+                    result = bn_func(values, axis=axis, **kwds)
+
+                    # prefer to treat inf/-inf as NA, but must compute the func
+                    # twice :(
+                    if _has_infs(result):
+                        result = alt(values, axis=axis, **kwds)
+                else:
+                    result = alt(values, axis=axis, **kwds)
+            except Exception:
+                result = alt(values, axis=axis, **kwds)
+
+            return result
+
+        return f
+
+
+def _replace_nan(a, val):
+    """
+    If `a` is of inexact type, make a copy of `a`, replace NaNs with
+    the `val` value, and return the copy together with a boolean mask
+    marking the locations where NaNs were present. If `a` is not of
+    inexact type, do nothing and return `a` together with a mask of None.
+    Note that scalars will end up as array scalars, which is important
+    for using the result as the value of the out argument in some
+    operations.
+    Parameters
+    ----------
+    a : array-like
+        Input array.
+    val : float
+        NaN values are set to val before doing the operation.
+    Returns
+    -------
+    y : ndarray
+        If `a` is of inexact type, return a copy of `a` with the NaNs
+        replaced by the fill value, otherwise return `a`.
+    mask: {bool, None}
+        If `a` is of inexact type, return a boolean mask marking locations of
+        NaNs, otherwise return None.
+
+    This function is taken from
+    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
+    """
+    a = np.array(a, subok=True, copy=True)
+
+    if a.dtype == np.object_:
+        # object arrays do not support `isnan` (gh-9009), so make a guess
+        mask = a != a
+    elif issubclass(a.dtype.type, np.inexact):
+        mask = np.isnan(a)
+    else:
+        mask = None
+
+    if mask is not None:
+        np.copyto(a, val, where=mask)
+
+    return a, mask
+
+
+def _copyto(a, val, mask):
+    """
+    Replace values in `a` with NaN where `mask` is True.  This differs from
+    copyto in that it will deal with the case where `a` is a numpy scalar.
+    Parameters
+    ----------
+    a : ndarray or numpy scalar
+        Array or numpy scalar some of whose values are to be replaced
+        by val.
+    val : numpy scalar
+        Value used a replacement.
+    mask : ndarray, scalar
+        Boolean array. Where True the corresponding element of `a` is
+        replaced by `val`. Broadcasts.
+    Returns
+    -------
+    res : ndarray, scalar
+        Array with elements replaced or scalar `val`.
+
+    This function is taken from
+    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
+    """
+    if isinstance(a, np.ndarray):
+        np.copyto(a, val, where=mask, casting='unsafe')
+    else:
+        a = a.dtype.type(val)
+    return a
+
+
+def _divide_by_count(a, b, out=None):
+    """
+    Compute a/b ignoring invalid results. If `a` is an array the division
+    is done in place. If `a` is a scalar, then its type is preserved in the
+    output. If out is None, then then a is used instead so that the
+    division is in place. Note that this is only called with `a` an inexact
+    type.
+    Parameters
+    ----------
+    a : {ndarray, numpy scalar}
+        Numerator. Expected to be of inexact type but not checked.
+    b : {ndarray, numpy scalar}
+        Denominator.
+    out : ndarray, optional
+        Alternate output array in which to place the result.  The default
+        is ``None``; if provided, it must have the same shape as the
+        expected output, but the type will be cast if necessary.
+    Returns
+    -------
+    ret : {ndarray, numpy scalar}
+        The return value is a/b. If `a` was an ndarray the division is done
+        in place. If `a` is a numpy scalar, the division preserves its type.
+
+    This function is taken from
+    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
+    """
+    with np.errstate(invalid='ignore', divide='ignore'):
+        if isinstance(a, np.ndarray):
+            if out is None:
+                return np.divide(a, b, out=a, casting='unsafe')
+            else:
+                return np.divide(a, b, out=out, casting='unsafe')
+        else:
+            if out is None:
+                return a.dtype.type(a / b)
+            else:
+                # This is questionable, but currently a numpy scalar can
+                # be output to a zero dimensional array.
+                return np.divide(a, b, out=out, casting='unsafe')
+
+
+@bottleneck_switch()
+def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
+    """
+    taken from
+    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
+    """
+    if a.dtype.kind == 'O':
+        return _nan_minmax_object('min', dtypes.get_pos_infinity, a, axis)
+    kwargs = {}
+    if keepdims is not np._NoValue:
+        kwargs['keepdims'] = keepdims
+    if type(a) is np.ndarray and a.dtype != np.object_:
+        # Fast, but not safe for subclasses of ndarray, or object arrays,
+        # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
+        res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
+        if np.isnan(res).any():
+            warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2)
+    else:
+        # Slow, but safe for subclasses of ndarray
+        a, mask = _replace_nan(a, +np.inf)
+        res = np.amin(a, axis=axis, out=out, **kwargs)
+        if mask is None:
+            return res
+
+        # Check for all-NaN axis
+        mask = np.all(mask, axis=axis, **kwargs)
+        if np.any(mask):
+            res = _copyto(res, np.nan, mask)
+            warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2)
+    return res
+
+
+@bottleneck_switch()
+def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
+    """
+    taken from
+    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
+    """
+    if a.dtype.kind == 'O':
+        return _nan_minmax_object('max', dtypes.get_neg_infinity, a, axis)
+    kwargs = {}
+    if keepdims is not np._NoValue:
+        kwargs['keepdims'] = keepdims
+    if type(a) is np.ndarray and a.dtype != np.object_:
+        # Fast, but not safe for subclasses of ndarray, or object arrays,
+        # which do not implement isnan (gh-9009), or fmax correctly (gh-8975)
+        res = np.fmax.reduce(a, axis=axis, out=out, **kwargs)
+        if np.isnan(res).any():
+            warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2)
+    else:
+        # Slow, but safe for subclasses of ndarray
+        a, mask = _replace_nan(a, -np.inf)
+        res = np.amax(a, axis=axis, out=out, **kwargs)
+        if mask is None:
+            return res
+
+        # Check for all-NaN axis
+        mask = np.all(mask, axis=axis, **kwargs)
+        if np.any(mask):
+            res = _copyto(res, np.nan, mask)
+            warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2)
+    return res
+
+
+def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs):
+    """ In house nanargmin, nanargmax for object arrays. Always return integer
+    type """
+    from .duck_array_ops import isnull, count, fillna
+
+    fill_value = get_fill_value(value.dtype)
+    valid_count = count(value, axis=axis)
+    value = fillna(value, fill_value)
+    data = getattr(np, func)(value, axis=axis, **kwargs)
+    # dask seems return non-integer type
+    if isinstance(value, dask_array_type):
+        data = data.astype(int)
+
+    if (valid_count == 0).any():
+        raise ValueError('All-NaN slice encountered')
+
+    return np.array(data, dtype=int)
+
+
+def _nan_minmax_object(func, get_fill_value, value, axis=None, **kwargs):
+    """ In house nanmin and nanmax for object array """
+    from .duck_array_ops import isnull, count, fillna, where_method
+
+    fill_value = get_fill_value(value.dtype)
+    valid_count = count(value, axis=axis)
+    filled_value = fillna(value, fill_value)
+    data = getattr(np, func)(filled_value, axis=axis, **kwargs)
+    if not hasattr(data, 'dtype'):  # scalar case
+        data = dtypes.fill_value(value.dtype) if valid_count == 0 else data
+        return np.array(data, dtype=value.dtype)
+    return where_method(data, valid_count != 0)
+
+
+@bottleneck_switch()
+def nanargmin(a, axis=None):
+    if a.dtype.kind == 'O':
+        return _nan_argminmax_object('argmin', dtypes.get_pos_infinity,
+                                     a, axis=axis)
+    a, mask = _replace_nan(a, np.inf)
+    res = np.argmin(a, axis=axis)
+    if mask is not None:
+        mask = np.all(mask, axis=axis)
+        if np.any(mask):
+            raise ValueError("All-NaN slice encountered")
+    return res
+
+
+@bottleneck_switch()
+def nanargmax(a, axis=None):
+    """
+    taken from
+    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
+    """
+    if a.dtype.kind == 'O':
+        return _nan_argminmax_object('argmax', dtypes.get_neg_infinity,
+                                     a, axis=axis)
+    a, mask = _replace_nan(a, -np.inf)
+    res = np.argmax(a, axis=axis)
+    if mask is not None:
+        mask = np.all(mask, axis=axis)
+        if np.any(mask):
+            raise ValueError("All-NaN slice encountered")
+    return res
+
+
+@bottleneck_switch()
+def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
+    a, mask = _replace_nan(a, 0)
+    return np.sum(a, axis=axis, dtype=dtype, keepdims=keepdims)
+
+
+@bottleneck_switch()
+def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
+    a, mask = _replace_nan(a, 1)
+    return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
+
+
+@bottleneck_switch()
+def nancumsum(a, axis=None, dtype=None, out=None):
+    a, mask = _replace_nan(a, 0)
+    return np.cumsum(a, axis=axis, dtype=dtype, out=out)
+
+
+@bottleneck_switch()
+def nancumprod(a, axis=None, dtype=None, out=None):
+    a, mask = _replace_nan(a, 1)
+    return np.cumprod(a, axis=axis, dtype=dtype, out=out)
+
+
+@bottleneck_switch()
+def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
+    arr, mask = _replace_nan(a, 0)
+    if mask is None:
+        return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
+
+    if dtype is not None:
+        dtype = np.dtype(dtype)
+    if dtype is not None and not issubclass(dtype.type, np.inexact):
+        raise TypeError("If a is inexact, then dtype must be inexact")
+    if out is not None and not issubclass(out.dtype.type, np.inexact):
+        raise TypeError("If a is inexact, then out must be inexact")
+
+    cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims)
+    tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
+    avg = _divide_by_count(tot, cnt, out=out)
+
+    isbad = (cnt == 0)
+    if isbad.any():
+        warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2)
+        # NaN is the only possible bad value, so no further
+        # action is needed to handle bad results.
+    return avg
+
+
+@bottleneck_switch()
+def _nanmedian1d(arr1d, overwrite_input=False):
+    """
+    Private function for rank 1 arrays. Compute the median ignoring NaNs.
+    See nanmedian for parameter usage
+    """
+    arr1d, overwrite_input = _remove_nan_1d(arr1d,
+        overwrite_input=overwrite_input)
+    if arr1d.size == 0:
+        return np.nan
+
+    return np.median(arr1d, overwrite_input=overwrite_input)
+
+
+@bottleneck_switch()
+def _nanmedian(a, axis=None, out=None, overwrite_input=False):
+    """
+    Private function that doesn't support extended axis or keepdims.
+    These methods are extended to this function using _ureduce
+    See nanmedian for parameter usage
+    """
+    if axis is None or a.ndim == 1:
+        part = a.ravel()
+        if out is None:
+            return _nanmedian1d(part, overwrite_input)
+        else:
+            out[...] = _nanmedian1d(part, overwrite_input)
+            return out
+    else:
+        # for small medians use sort + indexing which is still faster than
+        # apply_along_axis
+        # benchmarked with shuffled (50, 50, x) containing a few NaN
+        if a.shape[axis] < 600:
+            return _nanmedian_small(a, axis, out, overwrite_input)
+        result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
+        if out is not None:
+            out[...] = result
+        return result
+
+
+def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
+    """
+    sort + indexing median, faster for small medians along multiple
+    dimensions due to the high overhead of apply_along_axis
+    see nanmedian for parameter usage
+    """
+    a = np.ma.masked_array(a, np.isnan(a))
+    m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input)
+    for i in range(np.count_nonzero(m.mask.ravel())):
+        warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3)
+    if out is not None:
+        out[...] = m.filled(np.nan)
+        return out
+    return m.filled(np.nan)
+
+
+@bottleneck_switch()
+def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
+    a = np.asanyarray(a)
+    # apply_along_axis in _nanmedian doesn't handle empty arrays well,
+    # so deal them upfront
+    if a.size == 0:
+        return np.nanmean(a, axis, out=out, keepdims=keepdims)
+
+    r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out,
+                    overwrite_input=overwrite_input)
+    if keepdims and keepdims is not np._NoValue:
+        return r.reshape(k)
+    else:
+        return r
+
+
+@bottleneck_switch()
+def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
+                  interpolation='linear', keepdims=np._NoValue):
+    a = np.asanyarray(a)
+    q = np.asanyarray(q)
+    # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
+    # so deal them upfront
+    if a.size == 0:
+        return np.nanmean(a, axis, out=out, keepdims=keepdims)
+
+    r, k = _ureduce(a, func=_nanpercentile, q=q, axis=axis, out=out,
+                    overwrite_input=overwrite_input,
+                    interpolation=interpolation)
+    if keepdims and keepdims is not np._NoValue:
+        return r.reshape(q.shape + k)
+    else:
+        return r
+
+
+def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
+                   interpolation='linear'):
+    """
+    Private function that doesn't support extended axis or keepdims.
+    These methods are extended to this function using _ureduce
+    See nanpercentile for parameter usage
+    """
+    if axis is None or a.ndim == 1:
+        part = a.ravel()
+        result = _nanpercentile1d(part, q, overwrite_input, interpolation)
+    else:
+        result = np.apply_along_axis(_nanpercentile1d, axis, a, q,
+                                     overwrite_input, interpolation)
+        # apply_along_axis fills in collapsed axis with results.
+        # Move that axis to the beginning to match percentile's
+        # convention.
+        if q.ndim != 0:
+            result = np.moveaxis(result, axis, 0)
+
+    if out is not None:
+        out[...] = result
+    return result
+
+
+def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'):
+    """
+    Private function for rank 1 arrays. Compute percentile ignoring NaNs.
+    See nanpercentile for parameter usage
+    """
+    arr1d, overwrite_input = _remove_nan_1d(arr1d,
+        overwrite_input=overwrite_input)
+    if arr1d.size == 0:
+        return np.full(q.shape, np.nan)[()]  # convert to scalar
+
+    return np.percentile(arr1d, q, overwrite_input=overwrite_input,
+                         interpolation=interpolation)
+
+
+@bottleneck_switch()
+def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
+    arr, mask = _replace_nan(a, 0)
+    if mask is None:
+        return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof,
+                      keepdims=keepdims)
+
+    if dtype is not None:
+        dtype = np.dtype(dtype)
+    if dtype is not None and not issubclass(dtype.type, np.inexact):
+        raise TypeError("If a is inexact, then dtype must be inexact")
+    if out is not None and not issubclass(out.dtype.type, np.inexact):
+        raise TypeError("If a is inexact, then out must be inexact")
+
+    # Compute mean
+    if type(arr) is np.matrix:
+        _keepdims = np._NoValue
+    else:
+        _keepdims = True
+    # we need to special case matrix for reverse compatibility
+    # in order for this to work, these sums need to be called with
+    # keepdims=True, however matrix now raises an error in this case, but
+    # the reason that it drops the keepdims kwarg is to force keepdims=True
+    # so this used to work by serendipity.
+    cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims)
+    avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims)
+    avg = _divide_by_count(avg, cnt)
+
+    # Compute squared deviation from mean.
+    np.subtract(arr, avg, out=arr, casting='unsafe')
+    arr = _copyto(arr, 0, mask)
+    if issubclass(arr.dtype.type, np.complexfloating):
+        sqr = np.multiply(arr, arr.conj(), out=arr).real
+    else:
+        sqr = np.multiply(arr, arr, out=arr)
+
+    # Compute variance.
+    var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
+    if var.ndim < cnt.ndim:
+        # Subclasses of ndarray may ignore keepdims, so check here.
+        cnt = cnt.squeeze(axis)
+    dof = cnt - ddof
+    var = _divide_by_count(var, dof)
+
+    isbad = (dof <= 0)
+    if np.any(isbad):
+        warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, stacklevel=2)
+        # NaN, inf, or negative numbers are all possible bad
+        # values, so explicitly replace them with NaN.
+        var = _copyto(var, np.nan, isbad)
+    return var
+
+
+@bottleneck_switch()
+def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
+    var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
+                 keepdims=keepdims)
+    if isinstance(var, np.ndarray):
+        std = np.sqrt(var, out=var)
+    else:
+        std = var.dtype.type(np.sqrt(var))
+    return std
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index 3f4adee6713..8938d4b8c6c 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -101,7 +101,10 @@ def test_concatenate_type_promotion(self):
         assert_array_equal(result, np.array([1, 'b'], dtype=object))
 
     def test_all_nan_arrays(self):
-        assert np.isnan(mean([np.nan, np.nan]))
+        with warnings.catch_warnings():
+            warnings.filterwarnings('ignore', 'All-NaN slice')
+            warnings.filterwarnings('ignore', 'Mean of empty slice')
+            assert np.isnan(mean([np.nan, np.nan]))
 
 
 def test_cumsum_1d():
@@ -260,6 +263,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
     # TODO: remove these after resolving
     # https://github.com/dask/dask/issues/3245
     with warnings.catch_warnings():
+        warnings.filterwarnings('ignore', 'Mean of empty slice')
         warnings.filterwarnings('ignore', 'All-NaN slice')
         warnings.filterwarnings('ignore', 'invalid value encountered in')
 
@@ -274,6 +278,8 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
                     expected = getattr(np, func)(da.values, axis=axis)
 
                 actual = getattr(da, func)(skipna=skipna, dim=aggdim)
+                if dask:
+                    isinstance(da.data, dask_array_type)
                 assert np.allclose(actual.values, np.array(expected),
                                    rtol=1.0e-4, equal_nan=True)
             except (TypeError, AttributeError, ZeroDivisionError):
@@ -289,6 +295,8 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
             assert_allclose(actual, expected, rtol=rtol)
             # also check ddof!=0 case
             actual = getattr(da, func)(skipna=skipna, dim=aggdim, ddof=5)
+            if dask:
+                isinstance(da.data, dask_array_type)
             expected = series_reduce(da, func, skipna=skipna, dim=aggdim,
                                      ddof=5)
             assert_allclose(actual, expected, rtol=rtol)
@@ -299,11 +307,15 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
         # make sure the dtype argument
         if func not in ['max', 'min']:
             actual = getattr(da, func)(skipna=skipna, dim=aggdim, dtype=float)
+            if dask:
+                isinstance(da.data, dask_array_type)
             assert actual.dtype == float
 
         # without nan
         da = construct_dataarray(dim_num, dtype, contains_nan=False, dask=dask)
         actual = getattr(da, func)(skipna=skipna)
+        if dask:
+            isinstance(da.data, dask_array_type)
         expected = getattr(np, 'nan{}'.format(func))(da.values)
         if actual.dtype == object:
             assert actual.values == np.array(expected)

From 76218b2540939a3292d4f27271402da6b9b876b7 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Sun, 17 Jun 2018 22:13:10 +0900
Subject: [PATCH 02/25] Cleanup nanops

---
 xarray/core/dtypes.py               |   3 +
 xarray/core/duck_array_ops.py       |  38 ++-
 xarray/core/nanops.py               | 484 +++++-----------------------
 xarray/tests/test_duck_array_ops.py |  35 +-
 4 files changed, 146 insertions(+), 414 deletions(-)

diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py
index 7326b936e2e..7ad44472f06 100644
--- a/xarray/core/dtypes.py
+++ b/xarray/core/dtypes.py
@@ -98,6 +98,9 @@ def maybe_promote(dtype):
     return np.dtype(dtype), fill_value
 
 
+NAT_TYPES = (np.datetime64('NaT'), np.timedelta64('NaT'))
+
+
 def get_fill_value(dtype):
     """Return an appropriate fill value for this dtype.
 
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index d4c56472e48..cf78ca66b13 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -176,7 +176,7 @@ def array_notnull_equiv(arr1, arr2):
 def count(data, axis=None):
     """Count the number of non-NA in this array along the given axis or axes
     """
-    return sum(~isnull(data), axis=axis)
+    return np.sum(~isnull(data), axis=axis)
 
 
 def where(condition, x, y):
@@ -227,12 +227,34 @@ def f(values, axis=None, skipna=None, **kwargs):
         if coerce_strings and values.dtype.kind in 'SU':
             values = values.astype(object)
 
+        np_module = npcompat if np_compat else np
+        func = None
         if skipna or (skipna is None and values.dtype.kind in 'cfO'):
             nanname = 'nan' + name
-            func = getattr(nanops, nanname)
-        else:
-            func = _dask_or_eager_func(name)
-        return func(values, axis=axis, **kwargs)
+            func = getattr(
+                nanops, nanname, _dask_or_eager_func(
+                    nanname, eager_module=np_module))
+        if func is None:
+            if dtype is None:
+                func = _dask_or_eager_func(name)
+            else:
+                func = getattr(np, name)
+
+        try:
+            return func(values, axis=axis, **kwargs)
+        except AttributeError:
+            if isinstance(values, dask_array_type):
+                try:  # dask/dask#3133 dask sometimes needs dtype argument
+                    return func(values, axis=axis, dtype=values.dtype,
+                                **kwargs)
+                except AttributeError:
+                    msg = '%s is not yet implemented on dask arrays' % name
+            else:
+                msg = ('%s is not available with skipna=False with the '
+                       'installed version of numpy; upgrade to numpy 1.12 '
+                       'or newer to use skipna=True or skipna=None' % name)
+            raise NotImplementedError(msg)
+
     f.numeric_only = numeric_only
     f.__name__ = name
     return f
@@ -247,11 +269,11 @@ def f(values, axis=None, skipna=None, **kwargs):
 std = _create_nan_agg_method('std', numeric_only=True)
 var = _create_nan_agg_method('var', numeric_only=True)
 median = _create_nan_agg_method('median', numeric_only=True)
-prod = _create_nan_agg_method('prod', numeric_only=True, no_bottleneck=True)
+prod = _create_nan_agg_method('prod', numeric_only=True)
 cumprod_1d = _create_nan_agg_method(
-    'cumprod', numeric_only=True, np_compat=True, no_bottleneck=True)
+    'cumprod', numeric_only=True, np_compat=True)
 cumsum_1d = _create_nan_agg_method(
-    'cumsum', numeric_only=True, np_compat=True, no_bottleneck=True)
+    'cumsum', numeric_only=True, np_compat=True)
 
 
 def _nd_cum_func(cum_func, array, axis, **kwargs):
diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 684f67c9cb2..cc4cc0d0c62 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -1,16 +1,10 @@
 from __future__ import absolute_import, division, print_function
 
-import contextlib
-import inspect
-import warnings
 import functools
 
 import numpy as np
-import pandas as pd
-from pandas.core.nanops import disallow
 
-from . import dask_array_ops, dtypes, npcompat, nputils
-from .nputils import nanfirst, nanlast
+from . import dtypes
 from .pycompat import dask_array_type
 
 
@@ -22,33 +16,16 @@
     bn = np
     _USE_BOTTLENECK = False
 
-
-def _bn_ok_dtype(dt, name):
-    # This function is taken from pandas.core.nanops
-    # Bottleneck chokes on datetime64
-    if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)):
-
-        # GH 15507
-        # bottleneck does not properly upcast during the sum
-        # so can overflow
-
-        # GH 9422
-        # further we also want to preserve NaN when all elements
-        # are NaN, unlinke bottleneck/numpy which consider this
-        # to be 0
-        if name in ['nansum', 'nanprod']:
-            return False
-
-        return True
-    return False
+try:
+    import dask.array as dask_array
+    from . import dask_array_compat
+except ImportError:
+    dask_array = None
+    dask_array_compat = None
 
 
 class bottleneck_switch(object):
-    # This function is taken from pandas.core.nanops
-
-    def __init__(self, **kwargs):
-        self.kwargs = kwargs
-
+    """ xarray-version of pandas.core.nanops.bottleneck_switch """
     def __call__(self, alt):
         bn_name = alt.__name__
 
@@ -59,31 +36,20 @@ def __call__(self, alt):
 
         @functools.wraps(alt)
         def f(values, axis=None, **kwds):
-            if len(self.kwargs) > 0:
-                for k, v in compat.iteritems(self.kwargs):
-                    if k not in kwds:
-                        kwds[k] = v
-            try:
-                if values.size == 0 and kwds.get('min_count') is None:
-                    # We are empty, returning NA for our type
-                    # Only applies for the default `min_count` of None
-                    # since that affects how empty arrays are handled.
-                    # TODO(GH-18976) update all the nanops methods to
-                    # correctly handle empty inputs and remove this check.
-                    # It *may* just be `var`
-                    return _na_for_min_count(values, axis)
-
-                if (_USE_BOTTLENECK and not isinstance(value, dask_array_type)
-                        and _bn_ok_dtype(values.dtype, bn_name)):
-                    result = bn_func(values, axis=axis, **kwds)
-
-                    # prefer to treat inf/-inf as NA, but must compute the func
-                    # twice :(
-                    if _has_infs(result):
-                        result = alt(values, axis=axis, **kwds)
-                else:
-                    result = alt(values, axis=axis, **kwds)
-            except Exception:
+            dtype = kwds.get('dtype', None)
+            min_count = kwds.get('min_count', 1)
+
+            if (not isinstance(values, dask_array_type) and _USE_BOTTLENECK
+                    and not isinstance(axis, tuple)
+                    and values.dtype.kind in 'uifc'
+                    and values.dtype.isnative
+                    and (dtype is None or np.dtype(dtype) == values.dtype)
+                    and min_count != 1):
+                # bottleneck does not take care dtype, min_count
+                kwds.pop('dtype', None)
+                kwds.pop('min_count', 1)
+                result = bn_func(values, axis=axis, **kwds)
+            else:
                 result = alt(values, axis=axis, **kwds)
 
             return result
@@ -118,8 +84,6 @@ def _replace_nan(a, val):
     This function is taken from
     https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
     """
-    a = np.array(a, subok=True, copy=True)
-
     if a.dtype == np.object_:
         # object arrays do not support `isnan` (gh-9009), so make a guess
         mask = a != a
@@ -129,149 +93,57 @@ def _replace_nan(a, val):
         mask = None
 
     if mask is not None:
-        np.copyto(a, val, where=mask)
+        if isinstance(a, dask_array_type):
+            return dask_array.where(mask, val, a), mask
+        return np.where(mask, val, a), mask
 
     return a, mask
 
 
-def _copyto(a, val, mask):
+def _maybe_null_out(result, axis, mask, min_count=1):
     """
-    Replace values in `a` with NaN where `mask` is True.  This differs from
-    copyto in that it will deal with the case where `a` is a numpy scalar.
-    Parameters
-    ----------
-    a : ndarray or numpy scalar
-        Array or numpy scalar some of whose values are to be replaced
-        by val.
-    val : numpy scalar
-        Value used a replacement.
-    mask : ndarray, scalar
-        Boolean array. Where True the corresponding element of `a` is
-        replaced by `val`. Broadcasts.
-    Returns
-    -------
-    res : ndarray, scalar
-        Array with elements replaced or scalar `val`.
-
-    This function is taken from
-    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
+    xarray version of pandas.core.nanops._maybe_null_out
     """
-    if isinstance(a, np.ndarray):
-        np.copyto(a, val, where=mask, casting='unsafe')
-    else:
-        a = a.dtype.type(val)
-    return a
+    if axis is not None and getattr(result, 'ndim', False):
+        null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
+        if np.any(null_mask):
+            dtype, fill_value = dtypes.maybe_promote(result.dtype)
+            result = result.astype(dtype)
+            result[null_mask] = fill_value
 
+    elif (not isinstance(result, dask_array_type) and
+            result not in dtypes.NAT_TYPES):
+        null_mask = mask.size - mask.sum()
+        if null_mask < min_count:
+            result = np.nan
 
-def _divide_by_count(a, b, out=None):
-    """
-    Compute a/b ignoring invalid results. If `a` is an array the division
-    is done in place. If `a` is a scalar, then its type is preserved in the
-    output. If out is None, then then a is used instead so that the
-    division is in place. Note that this is only called with `a` an inexact
-    type.
-    Parameters
-    ----------
-    a : {ndarray, numpy scalar}
-        Numerator. Expected to be of inexact type but not checked.
-    b : {ndarray, numpy scalar}
-        Denominator.
-    out : ndarray, optional
-        Alternate output array in which to place the result.  The default
-        is ``None``; if provided, it must have the same shape as the
-        expected output, but the type will be cast if necessary.
-    Returns
-    -------
-    ret : {ndarray, numpy scalar}
-        The return value is a/b. If `a` was an ndarray the division is done
-        in place. If `a` is a numpy scalar, the division preserves its type.
-
-    This function is taken from
-    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
-    """
-    with np.errstate(invalid='ignore', divide='ignore'):
-        if isinstance(a, np.ndarray):
-            if out is None:
-                return np.divide(a, b, out=a, casting='unsafe')
-            else:
-                return np.divide(a, b, out=out, casting='unsafe')
-        else:
-            if out is None:
-                return a.dtype.type(a / b)
-            else:
-                # This is questionable, but currently a numpy scalar can
-                # be output to a zero dimensional array.
-                return np.divide(a, b, out=out, casting='unsafe')
+    return result
 
 
 @bottleneck_switch()
 def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
-    """
-    taken from
-    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
-    """
     if a.dtype.kind == 'O':
         return _nan_minmax_object('min', dtypes.get_pos_infinity, a, axis)
-    kwargs = {}
-    if keepdims is not np._NoValue:
-        kwargs['keepdims'] = keepdims
-    if type(a) is np.ndarray and a.dtype != np.object_:
-        # Fast, but not safe for subclasses of ndarray, or object arrays,
-        # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
-        res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
-        if np.isnan(res).any():
-            warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2)
-    else:
-        # Slow, but safe for subclasses of ndarray
-        a, mask = _replace_nan(a, +np.inf)
-        res = np.amin(a, axis=axis, out=out, **kwargs)
-        if mask is None:
-            return res
-
-        # Check for all-NaN axis
-        mask = np.all(mask, axis=axis, **kwargs)
-        if np.any(mask):
-            res = _copyto(res, np.nan, mask)
-            warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2)
-    return res
+
+    if isinstance(a, dask_array_type):
+        return dask_array.nanmin(a, axis=axis)
+    return np.nanmin(a, axis=axis)
 
 
 @bottleneck_switch()
 def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
-    """
-    taken from
-    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
-    """
     if a.dtype.kind == 'O':
         return _nan_minmax_object('max', dtypes.get_neg_infinity, a, axis)
-    kwargs = {}
-    if keepdims is not np._NoValue:
-        kwargs['keepdims'] = keepdims
-    if type(a) is np.ndarray and a.dtype != np.object_:
-        # Fast, but not safe for subclasses of ndarray, or object arrays,
-        # which do not implement isnan (gh-9009), or fmax correctly (gh-8975)
-        res = np.fmax.reduce(a, axis=axis, out=out, **kwargs)
-        if np.isnan(res).any():
-            warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2)
-    else:
-        # Slow, but safe for subclasses of ndarray
-        a, mask = _replace_nan(a, -np.inf)
-        res = np.amax(a, axis=axis, out=out, **kwargs)
-        if mask is None:
-            return res
-
-        # Check for all-NaN axis
-        mask = np.all(mask, axis=axis, **kwargs)
-        if np.any(mask):
-            res = _copyto(res, np.nan, mask)
-            warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2)
-    return res
+
+    if isinstance(a, dask_array_type):
+        return dask_array.nanmax(a, axis=axis)
+    return np.nanmax(a, axis=axis)
 
 
 def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs):
     """ In house nanargmin, nanargmax for object arrays. Always return integer
     type """
-    from .duck_array_ops import isnull, count, fillna
+    from .duck_array_ops import count, fillna
 
     fill_value = get_fill_value(value.dtype)
     valid_count = count(value, axis=axis)
@@ -289,7 +161,7 @@ def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs):
 
 def _nan_minmax_object(func, get_fill_value, value, axis=None, **kwargs):
     """ In house nanmin and nanmax for object array """
-    from .duck_array_ops import isnull, count, fillna, where_method
+    from .duck_array_ops import count, fillna, where_method
 
     fill_value = get_fill_value(value.dtype)
     valid_count = count(value, axis=axis)
@@ -334,242 +206,50 @@ def nanargmax(a, axis=None):
 
 
 @bottleneck_switch()
-def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
+def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
+           min_count=None):
     a, mask = _replace_nan(a, 0)
-    return np.sum(a, axis=axis, dtype=dtype, keepdims=keepdims)
-
-
-@bottleneck_switch()
-def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
-    a, mask = _replace_nan(a, 1)
-    return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
-
-
-@bottleneck_switch()
-def nancumsum(a, axis=None, dtype=None, out=None):
-    a, mask = _replace_nan(a, 0)
-    return np.cumsum(a, axis=axis, dtype=dtype, out=out)
-
-
-@bottleneck_switch()
-def nancumprod(a, axis=None, dtype=None, out=None):
-    a, mask = _replace_nan(a, 1)
-    return np.cumprod(a, axis=axis, dtype=dtype, out=out)
-
-
-@bottleneck_switch()
-def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
-    arr, mask = _replace_nan(a, 0)
-    if mask is None:
-        return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
-
-    if dtype is not None:
-        dtype = np.dtype(dtype)
-    if dtype is not None and not issubclass(dtype.type, np.inexact):
-        raise TypeError("If a is inexact, then dtype must be inexact")
-    if out is not None and not issubclass(out.dtype.type, np.inexact):
-        raise TypeError("If a is inexact, then out must be inexact")
-
-    cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims)
-    tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
-    avg = _divide_by_count(tot, cnt, out=out)
-
-    isbad = (cnt == 0)
-    if isbad.any():
-        warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2)
-        # NaN is the only possible bad value, so no further
-        # action is needed to handle bad results.
-    return avg
-
-
-@bottleneck_switch()
-def _nanmedian1d(arr1d, overwrite_input=False):
-    """
-    Private function for rank 1 arrays. Compute the median ignoring NaNs.
-    See nanmedian for parameter usage
-    """
-    arr1d, overwrite_input = _remove_nan_1d(arr1d,
-        overwrite_input=overwrite_input)
-    if arr1d.size == 0:
-        return np.nan
-
-    return np.median(arr1d, overwrite_input=overwrite_input)
-
-
-@bottleneck_switch()
-def _nanmedian(a, axis=None, out=None, overwrite_input=False):
-    """
-    Private function that doesn't support extended axis or keepdims.
-    These methods are extended to this function using _ureduce
-    See nanmedian for parameter usage
-    """
-    if axis is None or a.ndim == 1:
-        part = a.ravel()
-        if out is None:
-            return _nanmedian1d(part, overwrite_input)
-        else:
-            out[...] = _nanmedian1d(part, overwrite_input)
-            return out
+    result = np.sum(a, axis=axis, dtype=dtype, keepdims=keepdims)
+    if min_count is not None:
+        return _maybe_null_out(result, axis, mask, min_count)
     else:
-        # for small medians use sort + indexing which is still faster than
-        # apply_along_axis
-        # benchmarked with shuffled (50, 50, x) containing a few NaN
-        if a.shape[axis] < 600:
-            return _nanmedian_small(a, axis, out, overwrite_input)
-        result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
-        if out is not None:
-            out[...] = result
         return result
 
 
-def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
-    """
-    sort + indexing median, faster for small medians along multiple
-    dimensions due to the high overhead of apply_along_axis
-    see nanmedian for parameter usage
-    """
-    a = np.ma.masked_array(a, np.isnan(a))
-    m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input)
-    for i in range(np.count_nonzero(m.mask.ravel())):
-        warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3)
-    if out is not None:
-        out[...] = m.filled(np.nan)
-        return out
-    return m.filled(np.nan)
-
-
-@bottleneck_switch()
-def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
-    a = np.asanyarray(a)
-    # apply_along_axis in _nanmedian doesn't handle empty arrays well,
-    # so deal them upfront
-    if a.size == 0:
-        return np.nanmean(a, axis, out=out, keepdims=keepdims)
-
-    r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out,
-                    overwrite_input=overwrite_input)
-    if keepdims and keepdims is not np._NoValue:
-        return r.reshape(k)
-    else:
-        return r
-
-
-@bottleneck_switch()
-def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
-                  interpolation='linear', keepdims=np._NoValue):
-    a = np.asanyarray(a)
-    q = np.asanyarray(q)
-    # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
-    # so deal them upfront
-    if a.size == 0:
-        return np.nanmean(a, axis, out=out, keepdims=keepdims)
-
-    r, k = _ureduce(a, func=_nanpercentile, q=q, axis=axis, out=out,
-                    overwrite_input=overwrite_input,
-                    interpolation=interpolation)
-    if keepdims and keepdims is not np._NoValue:
-        return r.reshape(q.shape + k)
-    else:
-        return r
-
-
-def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
-                   interpolation='linear'):
-    """
-    Private function that doesn't support extended axis or keepdims.
-    These methods are extended to this function using _ureduce
-    See nanpercentile for parameter usage
-    """
-    if axis is None or a.ndim == 1:
-        part = a.ravel()
-        result = _nanpercentile1d(part, q, overwrite_input, interpolation)
-    else:
-        result = np.apply_along_axis(_nanpercentile1d, axis, a, q,
-                                     overwrite_input, interpolation)
-        # apply_along_axis fills in collapsed axis with results.
-        # Move that axis to the beginning to match percentile's
-        # convention.
-        if q.ndim != 0:
-            result = np.moveaxis(result, axis, 0)
-
-    if out is not None:
-        out[...] = result
-    return result
-
-
-def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'):
-    """
-    Private function for rank 1 arrays. Compute percentile ignoring NaNs.
-    See nanpercentile for parameter usage
-    """
-    arr1d, overwrite_input = _remove_nan_1d(arr1d,
-        overwrite_input=overwrite_input)
-    if arr1d.size == 0:
-        return np.full(q.shape, np.nan)[()]  # convert to scalar
+def _nanmean_ddof_object(ddof, value, axis=None, **kwargs):
+    """ In house nanmean. ddof argument will be used in _nanvar method """
+    from .duck_array_ops import (count, fillna, _dask_or_eager_func,
+                                 where_method)
 
-    return np.percentile(arr1d, q, overwrite_input=overwrite_input,
-                         interpolation=interpolation)
+    valid_count = count(value, axis=axis)
+    value = fillna(value, 0)
+    # As dtype inference is impossible for object dtype, we assume float
+    # https://github.com/dask/dask/issues/3162
+    dtype = kwargs.pop('dtype', None)
+    if dtype is None and value.dtype.kind == 'O':
+        dtype = value.dtype if value.dtype.kind in ['cf'] else float
+
+    data = _dask_or_eager_func('sum')(value, axis=axis, dtype=dtype, **kwargs)
+    data = data / (valid_count - ddof)
+    return where_method(data, valid_count != 0)
 
 
 @bottleneck_switch()
-def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
-    arr, mask = _replace_nan(a, 0)
-    if mask is None:
-        return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof,
-                      keepdims=keepdims)
-
-    if dtype is not None:
-        dtype = np.dtype(dtype)
-    if dtype is not None and not issubclass(dtype.type, np.inexact):
-        raise TypeError("If a is inexact, then dtype must be inexact")
-    if out is not None and not issubclass(out.dtype.type, np.inexact):
-        raise TypeError("If a is inexact, then out must be inexact")
-
-    # Compute mean
-    if type(arr) is np.matrix:
-        _keepdims = np._NoValue
-    else:
-        _keepdims = True
-    # we need to special case matrix for reverse compatibility
-    # in order for this to work, these sums need to be called with
-    # keepdims=True, however matrix now raises an error in this case, but
-    # the reason that it drops the keepdims kwarg is to force keepdims=True
-    # so this used to work by serendipity.
-    cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims)
-    avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims)
-    avg = _divide_by_count(avg, cnt)
-
-    # Compute squared deviation from mean.
-    np.subtract(arr, avg, out=arr, casting='unsafe')
-    arr = _copyto(arr, 0, mask)
-    if issubclass(arr.dtype.type, np.complexfloating):
-        sqr = np.multiply(arr, arr.conj(), out=arr).real
-    else:
-        sqr = np.multiply(arr, arr, out=arr)
+def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
+    if a.dtype.kind == 'O':
+        return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype)
 
-    # Compute variance.
-    var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
-    if var.ndim < cnt.ndim:
-        # Subclasses of ndarray may ignore keepdims, so check here.
-        cnt = cnt.squeeze(axis)
-    dof = cnt - ddof
-    var = _divide_by_count(var, dof)
+    if isinstance(a, dask_array_type):
+        return dask_array.nanmean(a, axis=axis, dtype=dtype)
 
-    isbad = (dof <= 0)
-    if np.any(isbad):
-        warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, stacklevel=2)
-        # NaN, inf, or negative numbers are all possible bad
-        # values, so explicitly replace them with NaN.
-        var = _copyto(var, np.nan, isbad)
-    return var
+    return np.nanmean(a, axis=axis, dtype=dtype)
 
 
-@bottleneck_switch()
-def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
-    var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
-                 keepdims=keepdims)
-    if isinstance(var, np.ndarray):
-        std = np.sqrt(var, out=var)
+def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
+            min_count=None):
+    a, mask = _replace_nan(a, 1)
+    result = np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
+    if min_count is not None:
+        return _maybe_null_out(result, axis, mask, min_count)
     else:
-        std = var.dtype.type(np.sqrt(var))
-    return std
+        return result
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index 8938d4b8c6c..4a5300419e9 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -3,6 +3,7 @@
 from distutils.version import LooseVersion
 
 import numpy as np
+import pandas as pd
 import pytest
 from numpy import array, nan
 import warnings
@@ -238,6 +239,11 @@ def series_reduce(da, func, dim, **kwargs):
         return concat(da1, dim=d)
 
 
+def assert_dask_array(da, dask):
+    if dask and da.ndim > 0:
+        assert isinstance(da.data, dask_array_type)
+
+
 @pytest.mark.parametrize('dim_num', [1, 2])
 @pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_])
 @pytest.mark.parametrize('dask', [False, True])
@@ -278,8 +284,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
                     expected = getattr(np, func)(da.values, axis=axis)
 
                 actual = getattr(da, func)(skipna=skipna, dim=aggdim)
-                if dask:
-                    isinstance(da.data, dask_array_type)
+                assert_dask_array(actual, dask)
                 assert np.allclose(actual.values, np.array(expected),
                                    rtol=1.0e-4, equal_nan=True)
             except (TypeError, AttributeError, ZeroDivisionError):
@@ -307,8 +312,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
         # make sure the dtype argument
         if func not in ['max', 'min']:
             actual = getattr(da, func)(skipna=skipna, dim=aggdim, dtype=float)
-            if dask:
-                isinstance(da.data, dask_array_type)
+            assert_dask_array(actual, dask)
             assert actual.dtype == float
 
         # without nan
@@ -402,3 +406,26 @@ def test_dask_rolling(axis, window, center):
     with pytest.raises(ValueError):
         rolling_window(dx, axis=axis, window=100, center=center,
                        fill_value=np.nan)
+
+
+@pytest.mark.parametrize('dim_num', [1, 2])
+@pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_])
+@pytest.mark.parametrize('dask', [False, True])
+@pytest.mark.parametrize('func', ['sum', 'prod'])
+@pytest.mark.parametrize('aggdim', [None, 'x'])
+def test_min_count(dim_num, dtype, dask, func, aggdim):
+    if dask and not has_dask:
+        pytest.skip('requires dask')
+
+    da = construct_dataarray(dim_num, dtype, contains_nan=True, dask=dask)
+    min_count = 3
+
+    actual = getattr(da, func)(dim=aggdim, skipna=True, min_count=min_count)
+
+    if LooseVersion(pd.__version__) >= LooseVersion('0.22.0'):
+        # min_count has pandas > 0.22
+        expected = series_reduce(da, func, skipna=True, dim=aggdim,
+                                 min_count=min_count)
+        assert_allclose(actual, expected)
+
+    assert_dask_array(actual, dask)

From 943e2b111c4b0cb2e5edf67c54818510365aa3fe Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Mon, 18 Jun 2018 21:23:30 +0900
Subject: [PATCH 03/25] remove NAT_TYPES

---
 xarray/core/dtypes.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py
index 7ad44472f06..7326b936e2e 100644
--- a/xarray/core/dtypes.py
+++ b/xarray/core/dtypes.py
@@ -98,9 +98,6 @@ def maybe_promote(dtype):
     return np.dtype(dtype), fill_value
 
 
-NAT_TYPES = (np.datetime64('NaT'), np.timedelta64('NaT'))
-
-
 def get_fill_value(dtype):
     """Return an appropriate fill value for this dtype.
 

From 84fc69ec807349338a953fbb17f6ac7213c0f8f0 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Mon, 18 Jun 2018 21:37:24 +0900
Subject: [PATCH 04/25] flake8.

---
 xarray/core/nanops.py         | 12 ++++++------
 xarray/tests/test_variable.py |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index cc4cc0d0c62..080a7a39f68 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -39,12 +39,12 @@ def f(values, axis=None, **kwds):
             dtype = kwds.get('dtype', None)
             min_count = kwds.get('min_count', 1)
 
-            if (not isinstance(values, dask_array_type) and _USE_BOTTLENECK
-                    and not isinstance(axis, tuple)
-                    and values.dtype.kind in 'uifc'
-                    and values.dtype.isnative
-                    and (dtype is None or np.dtype(dtype) == values.dtype)
-                    and min_count != 1):
+            if (not isinstance(values, dask_array_type) and _USE_BOTTLENECK and
+                    not isinstance(axis, tuple) and
+                    values.dtype.kind in 'uifc' and
+                    values.dtype.isnative and
+                    (dtype is None or np.dtype(dtype) == values.dtype) and
+                    min_count != 1):
                 # bottleneck does not take care dtype, min_count
                 kwds.pop('dtype', None)
                 kwds.pop('min_count', 1)
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index c486a394ae6..9800273cb60 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -1508,8 +1508,8 @@ def test_reduce_funcs(self):
         assert_identical(v.all(dim='x'), Variable([], False))
 
         v = Variable('t', pd.date_range('2000-01-01', periods=3))
-        with pytest.raises(NotImplementedError):
-            v.argmax(skipna=True)
+        v.argmax(skipna=True)
+        
         assert_identical(
             v.max(), Variable([], pd.Timestamp('2000-01-03')))
 

From 11d735fdc0b7824b199a3aad1c710d035dc85f12 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Mon, 18 Jun 2018 21:40:00 +0900
Subject: [PATCH 05/25] another flake8

---
 xarray/tests/test_variable.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 9800273cb60..2efa940510a 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -1509,7 +1509,7 @@ def test_reduce_funcs(self):
 
         v = Variable('t', pd.date_range('2000-01-01', periods=3))
         v.argmax(skipna=True)
-        
+
         assert_identical(
             v.max(), Variable([], pd.Timestamp('2000-01-03')))
 

From 7a079f603e467ef3d20d24551597e58a7648b685 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Tue, 19 Jun 2018 07:07:02 +0900
Subject: [PATCH 06/25] recover nat types

---
 xarray/core/dtypes.py | 3 +++
 xarray/core/nanops.py | 1 +
 2 files changed, 4 insertions(+)

diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py
index 7326b936e2e..7ad44472f06 100644
--- a/xarray/core/dtypes.py
+++ b/xarray/core/dtypes.py
@@ -98,6 +98,9 @@ def maybe_promote(dtype):
     return np.dtype(dtype), fill_value
 
 
+NAT_TYPES = (np.datetime64('NaT'), np.timedelta64('NaT'))
+
+
 def get_fill_value(dtype):
     """Return an appropriate fill value for this dtype.
 
diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 080a7a39f68..bb84604e36d 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -83,6 +83,7 @@ def _replace_nan(a, val):
 
     This function is taken from
     https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
+    but slightly modified to take care of dask.array
     """
     if a.dtype == np.object_:
         # object arrays do not support `isnan` (gh-9009), so make a guess

From 441be59d7ef2331a75bcb06efa276b09bf20bb8a Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Tue, 19 Jun 2018 10:34:33 +0900
Subject: [PATCH 07/25] remove keep_dims option from nanops (to make them
 compatible with numpy==1.11).

---
 xarray/core/nanops.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index bb84604e36d..78b8822e4ea 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -50,6 +50,7 @@ def f(values, axis=None, **kwds):
                 kwds.pop('min_count', 1)
                 result = bn_func(values, axis=axis, **kwds)
             else:
+                print(kwds)
                 result = alt(values, axis=axis, **kwds)
 
             return result
@@ -122,7 +123,7 @@ def _maybe_null_out(result, axis, mask, min_count=1):
 
 
 @bottleneck_switch()
-def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
+def nanmin(a, axis=None, out=None):
     if a.dtype.kind == 'O':
         return _nan_minmax_object('min', dtypes.get_pos_infinity, a, axis)
 
@@ -132,7 +133,7 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
 
 
 @bottleneck_switch()
-def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
+def nanmax(a, axis=None, out=None):
     if a.dtype.kind == 'O':
         return _nan_minmax_object('max', dtypes.get_neg_infinity, a, axis)
 
@@ -207,10 +208,9 @@ def nanargmax(a, axis=None):
 
 
 @bottleneck_switch()
-def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
-           min_count=None):
+def nansum(a, axis=None, dtype=None, out=None, min_count=None):
     a, mask = _replace_nan(a, 0)
-    result = np.sum(a, axis=axis, dtype=dtype, keepdims=keepdims)
+    result = np.sum(a, axis=axis, dtype=dtype)
     if min_count is not None:
         return _maybe_null_out(result, axis, mask, min_count)
     else:
@@ -236,7 +236,7 @@ def _nanmean_ddof_object(ddof, value, axis=None, **kwargs):
 
 
 @bottleneck_switch()
-def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
+def nanmean(a, axis=None, dtype=None, out=None):
     if a.dtype.kind == 'O':
         return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype)
 
@@ -246,10 +246,9 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     return np.nanmean(a, axis=axis, dtype=dtype)
 
 
-def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
-            min_count=None):
+def nanprod(a, axis=None, dtype=None, out=None, min_count=None):
     a, mask = _replace_nan(a, 1)
-    result = np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
+    result = np.prod(a, axis=axis, dtype=dtype, out=out)
     if min_count is not None:
         return _maybe_null_out(result, axis, mask, min_count)
     else:

From f95054b902915321bd94674e9175a5d26e063340 Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Tue, 19 Jun 2018 10:45:37 +0900
Subject: [PATCH 08/25] Test aggregation over multiple dimensions

---
 xarray/tests/test_duck_array_ops.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index 4a5300419e9..cc4d61f0fff 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -248,6 +248,7 @@ def assert_dask_array(da, dask):
 @pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_])
 @pytest.mark.parametrize('dask', [False, True])
 @pytest.mark.parametrize('func', ['sum', 'min', 'max', 'mean', 'var'])
+# TODO test cumsum, cumprod
 @pytest.mark.parametrize('skipna', [False, True])
 @pytest.mark.parametrize('aggdim', [None, 'x'])
 def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
@@ -429,3 +430,16 @@ def test_min_count(dim_num, dtype, dask, func, aggdim):
         assert_allclose(actual, expected)
 
     assert_dask_array(actual, dask)
+
+
+@pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_])
+@pytest.mark.parametrize('dask', [False, True])
+@pytest.mark.parametrize('func', ['sum', 'prod'])
+def test_multiple_dims(dtype, dask, func):
+    if dask and not has_dask:
+        pytest.skip('requires dask')
+    da = construct_dataarray(3, dtype, contains_nan=True, dask=dask)
+
+    actual = getattr(da, func)(('x', 'y'))
+    expected = getattr(getattr(da, func)('x'), func)('y')
+    assert_allclose(actual, expected)

From 9211b64f24f3d66193d37b455159bb3f445b3d6d Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Tue, 19 Jun 2018 15:28:07 +0900
Subject: [PATCH 09/25] Remove print.

---
 xarray/core/nanops.py         | 1 -
 xarray/tests/test_variable.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 78b8822e4ea..728a7b369ab 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -50,7 +50,6 @@ def f(values, axis=None, **kwds):
                 kwds.pop('min_count', 1)
                 result = bn_func(values, axis=axis, **kwds)
             else:
-                print(kwds)
                 result = alt(values, axis=axis, **kwds)
 
             return result
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 2efa940510a..2306240857c 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -1508,7 +1508,7 @@ def test_reduce_funcs(self):
         assert_identical(v.all(dim='x'), Variable([], False))
 
         v = Variable('t', pd.date_range('2000-01-01', periods=3))
-        v.argmax(skipna=True)
+        assert v.argmax(skipna=True) == 2
 
         assert_identical(
             v.max(), Variable([], pd.Timestamp('2000-01-03')))

From 491ce2fcd35c20efdc6663349fe66d763c56495c Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Wed, 20 Jun 2018 19:17:56 +0900
Subject: [PATCH 10/25] Docs. More cleanup.

---
 doc/whats-new.rst                   |  5 ++
 xarray/core/common.py               | 41 ++++++++--------
 xarray/core/duck_array_ops.py       | 34 ++++++++-----
 xarray/core/nanops.py               |  4 +-
 xarray/core/ops.py                  | 14 +++++-
 xarray/tests/test_duck_array_ops.py | 75 +++++++++++++++++++++++++++++
 6 files changed, 136 insertions(+), 37 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 5871b8bb0a3..4a48c4e9972 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -36,6 +36,11 @@ Documentation
 Enhancements
 ~~~~~~~~~~~~
 
+- min_count option is newly supported in :py:meth:`~xarray.DataArray.sum` and
+  :py:meth:`~xarray.Dataset.mean`.
+  (:issue:`2230`)
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
+
 Bug fixes
 ~~~~~~~~~
 
diff --git a/xarray/core/common.py b/xarray/core/common.py
index d69c60eed56..a7a13ab3871 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -2,6 +2,7 @@
 
 import warnings
 from distutils.version import LooseVersion
+from textwrap import dedent
 
 import numpy as np
 import pandas as pd
@@ -27,20 +28,20 @@ def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
                                    allow_lazy=True, **kwargs)
         return wrapped_func
 
-    _reduce_extra_args_docstring = \
-        """dim : str or sequence of str, optional
+    _reduce_extra_args_docstring = dedent("""\
+        dim : str or sequence of str, optional
             Dimension(s) over which to apply `{name}`.
         axis : int or sequence of int, optional
             Axis(es) over which to apply `{name}`. Only one of the 'dim'
             and 'axis' arguments can be supplied. If neither are supplied, then
-            `{name}` is calculated over axes."""
+            `{name}` is calculated over axes.""")
 
-    _cum_extra_args_docstring = \
-        """dim : str or sequence of str, optional
+    _cum_extra_args_docstring = dedent("""\
+        dim : str or sequence of str, optional
             Dimension over which to apply `{name}`.
         axis : int or sequence of int, optional
             Axis over which to apply `{name}`. Only one of the 'dim'
-            and 'axis' arguments can be supplied."""
+            and 'axis' arguments can be supplied.""")
 
 
 class ImplementsDatasetReduce(object):
@@ -308,12 +309,12 @@ def assign_coords(self, **kwargs):
         assigned : same type as caller
             A new object with the new coordinates in addition to the existing
             data.
-            
+
         Examples
         --------
-        
+
         Convert longitude coordinates from 0-359 to -180-179:
-        
+
         >>> da = xr.DataArray(np.random.rand(4),
         ...                   coords=[np.array([358, 359, 0, 1])],
         ...                   dims='lon')
@@ -445,11 +446,11 @@ def groupby(self, group, squeeze=True):
         grouped : GroupBy
             A `GroupBy` object patterned after `pandas.GroupBy` that can be
             iterated over in the form of `(unique_value, grouped_array)` pairs.
-            
+
         Examples
         --------
         Calculate daily anomalies for daily data:
-        
+
         >>> da = xr.DataArray(np.linspace(0, 1826, num=1827),
         ...                   coords=[pd.date_range('1/1/2000', '31/12/2004',
         ...                           freq='D')],
@@ -465,7 +466,7 @@ def groupby(self, group, squeeze=True):
         Coordinates:
           * time       (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ...
             dayofyear  (time) int64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ...
-        
+
         See Also
         --------
         core.groupby.DataArrayGroupBy
@@ -589,7 +590,7 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
                  closed=None, label=None, base=0, keep_attrs=False, **indexer):
         """Returns a Resample object for performing resampling operations.
 
-        Handles both downsampling and upsampling. If any intervals contain no 
+        Handles both downsampling and upsampling. If any intervals contain no
         values from the original object, they will be given the value ``NaN``.
 
         Parameters
@@ -616,11 +617,11 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
         -------
         resampled : same type as caller
             This object resampled.
-            
+
         Examples
         --------
         Downsample monthly time-series data to seasonal data:
-        
+
         >>> da = xr.DataArray(np.linspace(0, 11, num=12),
         ...                   coords=[pd.date_range('15/12/1999',
         ...                           periods=12, freq=pd.DateOffset(months=1))],
@@ -635,15 +636,15 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
         array([ 1.,  4.,  7., 10.])
         Coordinates:
           * time     (time) datetime64[ns] 2000-02-29 2000-05-31 2000-08-31 2000-11-30
-          
+
         Upsample monthly time-series data to daily data:
-        
+
         >>> da.resample(time='1D').interpolate('linear')
         <xarray.DataArray (time: 337)>
         array([ 0.      ,  0.032258,  0.064516, ..., 10.935484, 10.967742, 11.      ])
         Coordinates:
           * time     (time) datetime64[ns] 1999-12-15 1999-12-16 1999-12-17 ...
-          
+
         References
         ----------
 
@@ -957,8 +958,8 @@ def contains_cftime_datetimes(var):
                     sample = sample.item()
             return isinstance(sample, cftime_datetime)
         else:
-            return False        
-                    
+            return False
+
 
 def _contains_datetime_like_objects(var):
     """Check if a variable contains datetime like objects (either
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index cf78ca66b13..a231963c446 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -214,8 +214,7 @@ def _ignore_warnings_if(condition):
         yield
 
 
-def _create_nan_agg_method(name, numeric_only=False, np_compat=False,
-                           no_bottleneck=False, coerce_strings=False):
+def _create_nan_agg_method(name, np_compat=False, coerce_strings=False):
     def f(values, axis=None, skipna=None, **kwargs):
         if kwargs.pop('out', None) is not None:
             raise TypeError('`out` is not valid for {}'.format(name))
@@ -255,25 +254,34 @@ def f(values, axis=None, skipna=None, **kwargs):
                        'or newer to use skipna=True or skipna=None' % name)
             raise NotImplementedError(msg)
 
-    f.numeric_only = numeric_only
     f.__name__ = name
     return f
 
 
+# Attributes `numeric_only`, `available_min_count` is used for docs.
+# See ops.inject_reduce_methods
 argmax = _create_nan_agg_method('argmax', coerce_strings=True)
 argmin = _create_nan_agg_method('argmin', coerce_strings=True)
 max = _create_nan_agg_method('max', coerce_strings=True)
 min = _create_nan_agg_method('min', coerce_strings=True)
-sum = _create_nan_agg_method('sum', numeric_only=True)
-mean = _create_nan_agg_method('mean', numeric_only=True)
-std = _create_nan_agg_method('std', numeric_only=True)
-var = _create_nan_agg_method('var', numeric_only=True)
-median = _create_nan_agg_method('median', numeric_only=True)
-prod = _create_nan_agg_method('prod', numeric_only=True)
-cumprod_1d = _create_nan_agg_method(
-    'cumprod', numeric_only=True, np_compat=True)
-cumsum_1d = _create_nan_agg_method(
-    'cumsum', numeric_only=True, np_compat=True)
+sum = _create_nan_agg_method('sum')
+sum.numeric_only = True
+sum.available_min_count = True
+mean = _create_nan_agg_method('mean')
+mean.numeric_only = True
+std = _create_nan_agg_method('std')
+std.numeric_only = True
+var = _create_nan_agg_method('var')
+var.numeric_only = True
+median = _create_nan_agg_method('median')
+median.numeric_only = True
+prod = _create_nan_agg_method('prod')
+prod.numeric_only = True
+sum.available_min_count = True
+cumprod_1d = _create_nan_agg_method('cumprod', np_compat=True)
+cumprod_1d.numeric_only = True
+cumsum_1d = _create_nan_agg_method('cumsum', np_compat=True)
+cumsum_1d.numeric_only = True
 
 
 def _nd_cum_func(cum_func, array, axis, **kwargs):
diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 728a7b369ab..531878f4686 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -37,14 +37,14 @@ def __call__(self, alt):
         @functools.wraps(alt)
         def f(values, axis=None, **kwds):
             dtype = kwds.get('dtype', None)
-            min_count = kwds.get('min_count', 1)
+            min_count = kwds.get('min_count', None)
 
             if (not isinstance(values, dask_array_type) and _USE_BOTTLENECK and
                     not isinstance(axis, tuple) and
                     values.dtype.kind in 'uifc' and
                     values.dtype.isnative and
                     (dtype is None or np.dtype(dtype) == values.dtype) and
-                    min_count != 1):
+                    min_count is None):
                 # bottleneck does not take care dtype, min_count
                 kwds.pop('dtype', None)
                 kwds.pop('min_count', 1)
diff --git a/xarray/core/ops.py b/xarray/core/ops.py
index d9e8ceb65d5..2bf8682f357 100644
--- a/xarray/core/ops.py
+++ b/xarray/core/ops.py
@@ -86,7 +86,7 @@
     If True, skip missing values (as marked by NaN). By default, only
     skips missing values for float dtypes; other dtypes either do not
     have a sentinel missing value (int) or skipna=True has not been
-    implemented (object, datetime64 or timedelta64).
+    implemented (object, datetime64 or timedelta64).{min_count_docs}
 keep_attrs : bool, optional
     If True, the attributes (`attrs`) will be copied from the original
     object to the new one.  If False (default), the new object will be
@@ -102,6 +102,12 @@
     indicated dimension(s) removed.
 """
 
+_MINCOUNT_DOCSTRING = """
+min_count : int, default None
+    The required number of valid values to perform the operation. If fewer than
+    min_count non-NA values are present the result will be NA.
+    New in version 0.10.8: Added with the default being None."""
+
 _ROLLING_REDUCE_DOCSTRING_TEMPLATE = """\
 Reduce this {da_or_ds}'s data windows by applying `{name}` along its dimension.
 
@@ -236,11 +242,15 @@ def inject_reduce_methods(cls):
                [('count', duck_array_ops.count, False)])
     for name, f, include_skipna in methods:
         numeric_only = getattr(f, 'numeric_only', False)
+        available_min_count = getattr(f, 'available_min_count', False)
+        min_count_docs = _MINCOUNT_DOCSTRING if available_min_count else ''
+
         func = cls._reduce_method(f, include_skipna, numeric_only)
         func.__name__ = name
         func.__doc__ = _REDUCE_DOCSTRING_TEMPLATE.format(
             name=name, cls=cls.__name__,
-            extra_args=cls._reduce_extra_args_docstring.format(name=name))
+            extra_args=cls._reduce_extra_args_docstring.format(name=name),
+            min_count_docs=min_count_docs)
         setattr(cls, name, func)
 
 
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index cc4d61f0fff..c1e103c366f 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from textwrap import dedent
 from numpy import array, nan
 import warnings
 
@@ -443,3 +444,77 @@ def test_multiple_dims(dtype, dask, func):
     actual = getattr(da, func)(('x', 'y'))
     expected = getattr(getattr(da, func)('x'), func)('y')
     assert_allclose(actual, expected)
+
+
+def test_docs():
+    # with min_count
+    actual = DataArray.sum.__doc__
+    expected = dedent("""\
+        Reduce this DataArray's data by applying `sum` along some dimension(s).
+
+        Parameters
+        ----------
+        dim : str or sequence of str, optional
+            Dimension(s) over which to apply `sum`.
+        axis : int or sequence of int, optional
+            Axis(es) over which to apply `sum`. Only one of the 'dim'
+            and 'axis' arguments can be supplied. If neither are supplied, then
+            `sum` is calculated over axes.
+        skipna : bool, optional
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
+        min_count : int, default None
+            The required number of valid values to perform the operation. If fewer than
+            min_count non-NA values are present the result will be NA.
+            New in version 0.10.8: Added with the default being None.
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        **kwargs : dict
+            Additional keyword arguments passed on to the appropriate array
+            function for calculating `sum` on this object's data.
+
+        Returns
+        -------
+        reduced : DataArray
+            New DataArray object with `sum` applied to its data and the
+            indicated dimension(s) removed.
+        """)
+    assert actual == expected
+
+    # without min_count
+    actual = DataArray.mean.__doc__
+    expected = dedent("""\
+        Reduce this DataArray's data by applying `mean` along some dimension(s).
+
+        Parameters
+        ----------
+        dim : str or sequence of str, optional
+            Dimension(s) over which to apply `mean`.
+        axis : int or sequence of int, optional
+            Axis(es) over which to apply `mean`. Only one of the 'dim'
+            and 'axis' arguments can be supplied. If neither are supplied, then
+            `mean` is calculated over axes.
+        skipna : bool, optional
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        **kwargs : dict
+            Additional keyword arguments passed on to the appropriate array
+            function for calculating `mean` on this object's data.
+
+        Returns
+        -------
+        reduced : DataArray
+            New DataArray object with `mean` applied to its data and the
+            indicated dimension(s) removed.
+        """)
+    assert actual == expected

From 5dda53586eae9eba65ed828f84f65a7b4d949980 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Wed, 20 Jun 2018 21:31:33 +0900
Subject: [PATCH 11/25] flake8

---
 xarray/core/ops.py                  |  6 +++---
 xarray/tests/test_duck_array_ops.py | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/xarray/core/ops.py b/xarray/core/ops.py
index 2bf8682f357..a0dd2212a8f 100644
--- a/xarray/core/ops.py
+++ b/xarray/core/ops.py
@@ -104,9 +104,9 @@
 
 _MINCOUNT_DOCSTRING = """
 min_count : int, default None
-    The required number of valid values to perform the operation. If fewer than
-    min_count non-NA values are present the result will be NA.
-    New in version 0.10.8: Added with the default being None."""
+    The required number of valid values to perform the operation.
+    If fewer than min_count non-NA values are present the result will
+    be NA. New in version 0.10.8: Added with the default being None."""
 
 _ROLLING_REDUCE_DOCSTRING_TEMPLATE = """\
 Reduce this {da_or_ds}'s data windows by applying `{name}` along its dimension.
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index c1e103c366f..2930bad6c9f 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -466,9 +466,9 @@ def test_docs():
             have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         min_count : int, default None
-            The required number of valid values to perform the operation. If fewer than
-            min_count non-NA values are present the result will be NA.
-            New in version 0.10.8: Added with the default being None.
+            The required number of valid values to perform the operation.
+            If fewer than min_count non-NA values are present the result will
+            be NA. New in version 0.10.8: Added with the default being None.
         keep_attrs : bool, optional
             If True, the attributes (`attrs`) will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -486,18 +486,18 @@ def test_docs():
     assert actual == expected
 
     # without min_count
-    actual = DataArray.mean.__doc__
+    actual = DataArray.std.__doc__
     expected = dedent("""\
-        Reduce this DataArray's data by applying `mean` along some dimension(s).
+        Reduce this DataArray's data by applying `std` along some dimension(s).
 
         Parameters
         ----------
         dim : str or sequence of str, optional
-            Dimension(s) over which to apply `mean`.
+            Dimension(s) over which to apply `std`.
         axis : int or sequence of int, optional
-            Axis(es) over which to apply `mean`. Only one of the 'dim'
+            Axis(es) over which to apply `std`. Only one of the 'dim'
             and 'axis' arguments can be supplied. If neither are supplied, then
-            `mean` is calculated over axes.
+            `std` is calculated over axes.
         skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -509,12 +509,12 @@ def test_docs():
             returned without attributes.
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
-            function for calculating `mean` on this object's data.
+            function for calculating `std` on this object's data.
 
         Returns
         -------
         reduced : DataArray
-            New DataArray object with `mean` applied to its data and the
+            New DataArray object with `std` applied to its data and the
             indicated dimension(s) removed.
         """)
     assert actual == expected

From 5ddc4ebacd047d96c774440aca02440a87286e09 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Wed, 20 Jun 2018 22:49:05 +0900
Subject: [PATCH 12/25] Bug fix. Better test coverage.

---
 doc/whats-new.rst                   |  2 +-
 xarray/core/duck_array_ops.py       |  4 ++--
 xarray/core/nanops.py               | 28 ++++++++++++++++++++++++--
 xarray/tests/test_dataarray.py      | 10 +++++++---
 xarray/tests/test_duck_array_ops.py | 31 ++++++++++++++++++-----------
 5 files changed, 55 insertions(+), 20 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 50b6f5c6ea5..4f3f87a5883 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -37,7 +37,7 @@ Enhancements
 ~~~~~~~~~~~~
 
 - min_count option is newly supported in :py:meth:`~xarray.DataArray.sum` and
-  :py:meth:`~xarray.Dataset.mean`.
+  :py:meth:`~xarray.Dataset.prod`.
   (:issue:`2230`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index a231963c446..2e8efbc67aa 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -233,11 +233,11 @@ def f(values, axis=None, skipna=None, **kwargs):
             func = getattr(
                 nanops, nanname, _dask_or_eager_func(
                     nanname, eager_module=np_module))
-        if func is None:
+        else:
             if dtype is None:
                 func = _dask_or_eager_func(name)
             else:
-                func = getattr(np, name)
+                func = getattr(np_module, name)
 
         try:
             return func(values, axis=axis, **kwargs)
diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 531878f4686..8f8248599b4 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -105,6 +105,10 @@ def _maybe_null_out(result, axis, mask, min_count=1):
     """
     xarray version of pandas.core.nanops._maybe_null_out
     """
+    if hasattr(axis, '__len__'):  # if tuple or list
+        raise ValueError('min_count is not available for reduction '
+                         'with more than one dimensions.')
+
     if axis is not None and getattr(result, 'ndim', False):
         null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
         if np.any(null_mask):
@@ -112,8 +116,7 @@ def _maybe_null_out(result, axis, mask, min_count=1):
             result = result.astype(dtype)
             result[null_mask] = fill_value
 
-    elif (not isinstance(result, dask_array_type) and
-            result not in dtypes.NAT_TYPES):
+    elif getattr(result, 'dtype', None) not in dtypes.NAT_TYPES:
         null_mask = mask.size - mask.sum()
         if null_mask < min_count:
             result = np.nan
@@ -245,6 +248,27 @@ def nanmean(a, axis=None, dtype=None, out=None):
     return np.nanmean(a, axis=axis, dtype=dtype)
 
 
+def _nanvar_object(value, axis=None, **kwargs):
+    ddof = kwargs.pop('ddof', 0)
+    kwargs_mean = kwargs.copy()
+    kwargs_mean.pop('keepdims', None)
+    value_mean = _nanmean_ddof_object(ddof=0, value=value, axis=axis,
+                                      keepdims=True, **kwargs_mean)
+    squared = (value.astype(value_mean.dtype) - value_mean)**2
+    return _nanmean_ddof_object(ddof, squared, axis=axis, **kwargs)
+
+
+@bottleneck_switch()
+def nanvar(a, axis=None, dtype=None, out=None, ddof=0):
+    if a.dtype.kind == 'O':
+        return _nanvar_object(a, axis=axis, dtype=dtype, ddof=ddof)
+
+    if isinstance(a, dask_array_type):
+        return dask_array.nanvar(a, axis=axis, dtype=dtype, ddof=ddof)
+
+    return np.nanvar(a, axis=axis, dtype=dtype, ddof=ddof)
+
+
 def nanprod(a, axis=None, dtype=None, out=None, min_count=None):
     a, mask = _replace_nan(a, 1)
     result = np.prod(a, axis=axis, dtype=dtype, out=out)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index d339e6402b6..153b276f1ac 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -3416,7 +3416,9 @@ def test_isin(da):
 def test_rolling_iter(da):
 
     rolling_obj = da.rolling(time=7)
-    rolling_obj_mean = rolling_obj.mean()
+    with warnings.catch_warnings():
+        warnings.filterwarnings('ignore', 'Mean of empty slice')
+        rolling_obj_mean = rolling_obj.mean()
 
     assert len(rolling_obj.window_labels) == len(da['time'])
     assert_identical(rolling_obj.window_labels, da['time'])
@@ -3424,8 +3426,10 @@ def test_rolling_iter(da):
     for i, (label, window_da) in enumerate(rolling_obj):
         assert label == da['time'].isel(time=i)
 
-        actual = rolling_obj_mean.isel(time=i)
-        expected = window_da.mean('time')
+        with warnings.catch_warnings():
+            warnings.filterwarnings('ignore', 'Mean of empty slice')
+            actual = rolling_obj_mean.isel(time=i)
+            expected = window_da.mean('time')
 
         # TODO add assert_allclose_with_nan, which compares nan position
         # as well as the closeness of the values.
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index 2930bad6c9f..30447d76ada 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -10,7 +10,7 @@
 import warnings
 
 from xarray import DataArray, concat
-from xarray.core import duck_array_ops
+from xarray.core import duck_array_ops, dtypes
 from xarray.core.duck_array_ops import (
     array_notnull_equiv, concatenate, count, first, last, mean, rolling_window,
     stack, where)
@@ -203,10 +203,15 @@ def construct_dataarray(dim_num, dtype, contains_nan, dask):
         array = rng.choice(['a', 'b', 'c', 'd'], size=shapes)
     else:
         raise ValueError
-    da = DataArray(array, dims=dims, coords={'x': np.arange(16)}, name='da')
 
     if contains_nan:
-        da = da.reindex(x=np.arange(20))
+        inds = rng.choice(range(array.size), int(array.size * 0.2))
+        dtype, fill_value = dtypes.maybe_promote(array.dtype)
+        array = array.astype(dtype)
+        array.flat[inds] = fill_value
+
+    da = DataArray(array, dims=dims, coords={'x': np.arange(16)}, name='da')
+
     if dask and has_dask:
         chunks = {d: 4 for d in dims}
         da = da.chunk(chunks)
@@ -263,6 +268,9 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
     if dask and not has_dask:
         pytest.skip('requires dask')
 
+    if dask and skipna is False and dtype in [np.bool_]:
+        pytest.skip('dask does not compute object-typed array')
+
     rtol = 1e-04 if dtype == np.float32 else 1e-05
 
     da = construct_dataarray(dim_num, dtype, contains_nan=True, dask=dask)
@@ -294,8 +302,13 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
                 # nanmean for object dtype
                 pass
 
-        # make sure the compatiblility with pandas' results.
         actual = getattr(da, func)(skipna=skipna, dim=aggdim)
+
+        # for dask case, make sure the result is the same for numpy backend
+        expected = getattr(da.compute(), func)(skipna=skipna, dim=aggdim)
+        assert_allclose(actual, expected, rtol=rtol)
+
+        # make sure the compatiblility with pandas' results.
         if func == 'var':
             expected = series_reduce(da, func, skipna=skipna, dim=aggdim,
                                      ddof=0)
@@ -358,13 +371,6 @@ def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim):
     with warnings.catch_warnings():
         warnings.filterwarnings('ignore', 'All-NaN slice')
 
-        if aggdim == 'y' and contains_nan and skipna:
-            with pytest.raises(ValueError):
-                actual = da.isel(**{
-                    aggdim: getattr(da, 'arg' + func)(
-                        dim=aggdim, skipna=skipna).compute()})
-            return
-
         actual = da.isel(**{aggdim: getattr(da, 'arg' + func)
                             (dim=aggdim, skipna=skipna).compute()})
         expected = getattr(da, func)(dim=aggdim, skipna=skipna)
@@ -374,6 +380,7 @@ def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim):
 
 def test_argmin_max_error():
     da = construct_dataarray(2, np.bool_, contains_nan=True, dask=False)
+    da[0] = np.nan
     with pytest.raises(ValueError):
         da.argmin(dim='y')
 
@@ -425,7 +432,7 @@ def test_min_count(dim_num, dtype, dask, func, aggdim):
     actual = getattr(da, func)(dim=aggdim, skipna=True, min_count=min_count)
 
     if LooseVersion(pd.__version__) >= LooseVersion('0.22.0'):
-        # min_count has pandas > 0.22
+        # min_count is only implenented in pandas > 0.22
         expected = series_reduce(da, func, skipna=True, dim=aggdim,
                                  min_count=min_count)
         assert_allclose(actual, expected)

From c37de0e13ddc7a65b640daf86b14bd2c8e2ef656 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Thu, 21 Jun 2018 08:11:06 +0900
Subject: [PATCH 13/25] using isnull, where_method. Remove unnecessary
 conditional branching.

---
 xarray/core/duck_array_ops.py |  5 +----
 xarray/core/nanops.py         | 16 +++-------------
 2 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index 2e8efbc67aa..a56b7c5563f 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -234,10 +234,7 @@ def f(values, axis=None, skipna=None, **kwargs):
                 nanops, nanname, _dask_or_eager_func(
                     nanname, eager_module=np_module))
         else:
-            if dtype is None:
-                func = _dask_or_eager_func(name)
-            else:
-                func = getattr(np_module, name)
+            func = _dask_or_eager_func(name)
 
         try:
             return func(values, axis=axis, **kwargs)
diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 8f8248599b4..97df3609d51 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -85,20 +85,10 @@ def _replace_nan(a, val):
     https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
     but slightly modified to take care of dask.array
     """
-    if a.dtype == np.object_:
-        # object arrays do not support `isnan` (gh-9009), so make a guess
-        mask = a != a
-    elif issubclass(a.dtype.type, np.inexact):
-        mask = np.isnan(a)
-    else:
-        mask = None
-
-    if mask is not None:
-        if isinstance(a, dask_array_type):
-            return dask_array.where(mask, val, a), mask
-        return np.where(mask, val, a), mask
+    from .duck_array_ops import isnull, where_method
 
-    return a, mask
+    mask = isnull(a)
+    return where_method(val, mask, a), mask
 
 
 def _maybe_null_out(result, axis, mask, min_count=1):

From 7aedd02d71def177ead31348ed4ace3ec2ee4b95 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Thu, 21 Jun 2018 18:38:42 +0900
Subject: [PATCH 14/25] More refactoring based on the comments

---
 xarray/core/duck_array_ops.py |  24 ++---
 xarray/core/nanops.py         | 190 ++++++++++++----------------------
 xarray/core/nputils.py        |  41 ++++++++
 xarray/tests/test_dataset.py  |   2 -
 4 files changed, 114 insertions(+), 143 deletions(-)

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index a56b7c5563f..cefc5df08b9 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -16,15 +16,6 @@
 from . import dask_array_ops, dtypes, npcompat, nputils
 from .nputils import nanfirst, nanlast
 from .pycompat import dask_array_type
-from . import nanops
-
-try:
-    import bottleneck as bn
-    has_bottleneck = True
-except ImportError:
-    # use numpy methods instead
-    bn = np
-    has_bottleneck = False
 
 try:
     import dask.array as dask_array
@@ -214,25 +205,22 @@ def _ignore_warnings_if(condition):
         yield
 
 
-def _create_nan_agg_method(name, np_compat=False, coerce_strings=False):
+def _create_nan_agg_method(name, coerce_strings=False):
+    from . import nanops
+
     def f(values, axis=None, skipna=None, **kwargs):
         if kwargs.pop('out', None) is not None:
             raise TypeError('`out` is not valid for {}'.format(name))
 
-        # If dtype is supplied, we use numpy's method.
-        dtype = kwargs.get('dtype', None)
         values = asarray(values)
 
         if coerce_strings and values.dtype.kind in 'SU':
             values = values.astype(object)
 
-        np_module = npcompat if np_compat else np
         func = None
         if skipna or (skipna is None and values.dtype.kind in 'cfO'):
             nanname = 'nan' + name
-            func = getattr(
-                nanops, nanname, _dask_or_eager_func(
-                    nanname, eager_module=np_module))
+            func = getattr(nanops, nanname)
         else:
             func = _dask_or_eager_func(name)
 
@@ -275,9 +263,9 @@ def f(values, axis=None, skipna=None, **kwargs):
 prod = _create_nan_agg_method('prod')
 prod.numeric_only = True
 sum.available_min_count = True
-cumprod_1d = _create_nan_agg_method('cumprod', np_compat=True)
+cumprod_1d = _create_nan_agg_method('cumprod')
 cumprod_1d.numeric_only = True
-cumsum_1d = _create_nan_agg_method('cumsum', np_compat=True)
+cumsum_1d = _create_nan_agg_method('cumsum')
 cumsum_1d.numeric_only = True
 
 
diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 97df3609d51..ebc99217c84 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -1,92 +1,24 @@
 from __future__ import absolute_import, division, print_function
 
-import functools
-
 import numpy as np
 
 from . import dtypes
 from .pycompat import dask_array_type
-
-
-try:
-    import bottleneck as bn
-    _USE_BOTTLENECK = True
-except ImportError:
-    # use numpy methods instead
-    bn = np
-    _USE_BOTTLENECK = False
+from . duck_array_ops import (count, isnull, fillna, where_method,
+                              _dask_or_eager_func)
+from . import nputils
 
 try:
     import dask.array as dask_array
-    from . import dask_array_compat
 except ImportError:
     dask_array = None
-    dask_array_compat = None
-
-
-class bottleneck_switch(object):
-    """ xarray-version of pandas.core.nanops.bottleneck_switch """
-    def __call__(self, alt):
-        bn_name = alt.__name__
-
-        try:
-            bn_func = getattr(bn, bn_name)
-        except (AttributeError, NameError):  # pragma: no cover
-            bn_func = None
-
-        @functools.wraps(alt)
-        def f(values, axis=None, **kwds):
-            dtype = kwds.get('dtype', None)
-            min_count = kwds.get('min_count', None)
-
-            if (not isinstance(values, dask_array_type) and _USE_BOTTLENECK and
-                    not isinstance(axis, tuple) and
-                    values.dtype.kind in 'uifc' and
-                    values.dtype.isnative and
-                    (dtype is None or np.dtype(dtype) == values.dtype) and
-                    min_count is None):
-                # bottleneck does not take care dtype, min_count
-                kwds.pop('dtype', None)
-                kwds.pop('min_count', 1)
-                result = bn_func(values, axis=axis, **kwds)
-            else:
-                result = alt(values, axis=axis, **kwds)
-
-            return result
-
-        return f
 
 
 def _replace_nan(a, val):
     """
-    If `a` is of inexact type, make a copy of `a`, replace NaNs with
-    the `val` value, and return the copy together with a boolean mask
-    marking the locations where NaNs were present. If `a` is not of
-    inexact type, do nothing and return `a` together with a mask of None.
-    Note that scalars will end up as array scalars, which is important
-    for using the result as the value of the out argument in some
-    operations.
-    Parameters
-    ----------
-    a : array-like
-        Input array.
-    val : float
-        NaN values are set to val before doing the operation.
-    Returns
-    -------
-    y : ndarray
-        If `a` is of inexact type, return a copy of `a` with the NaNs
-        replaced by the fill value, otherwise return `a`.
-    mask: {bool, None}
-        If `a` is of inexact type, return a boolean mask marking locations of
-        NaNs, otherwise return None.
-
-    This function is taken from
-    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
-    but slightly modified to take care of dask.array
+    replace nan in a by val, and returns the replaced array and the nan
+    position
     """
-    from .duck_array_ops import isnull, where_method
-
     mask = isnull(a)
     return where_method(val, mask, a), mask
 
@@ -114,32 +46,9 @@ def _maybe_null_out(result, axis, mask, min_count=1):
     return result
 
 
-@bottleneck_switch()
-def nanmin(a, axis=None, out=None):
-    if a.dtype.kind == 'O':
-        return _nan_minmax_object('min', dtypes.get_pos_infinity, a, axis)
-
-    if isinstance(a, dask_array_type):
-        return dask_array.nanmin(a, axis=axis)
-    return np.nanmin(a, axis=axis)
-
-
-@bottleneck_switch()
-def nanmax(a, axis=None, out=None):
-    if a.dtype.kind == 'O':
-        return _nan_minmax_object('max', dtypes.get_neg_infinity, a, axis)
-
-    if isinstance(a, dask_array_type):
-        return dask_array.nanmax(a, axis=axis)
-    return np.nanmax(a, axis=axis)
-
-
-def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs):
+def _nan_argminmax_object(func, fill_value, value, axis=None, **kwargs):
     """ In house nanargmin, nanargmax for object arrays. Always return integer
     type """
-    from .duck_array_ops import count, fillna
-
-    fill_value = get_fill_value(value.dtype)
     valid_count = count(value, axis=axis)
     value = fillna(value, fill_value)
     data = getattr(np, func)(value, axis=axis, **kwargs)
@@ -153,11 +62,8 @@ def _nan_argminmax_object(func, get_fill_value, value, axis=None, **kwargs):
     return np.array(data, dtype=int)
 
 
-def _nan_minmax_object(func, get_fill_value, value, axis=None, **kwargs):
+def _nan_minmax_object(func, fill_value, value, axis=None, **kwargs):
     """ In house nanmin and nanmax for object array """
-    from .duck_array_ops import count, fillna, where_method
-
-    fill_value = get_fill_value(value.dtype)
     valid_count = count(value, axis=axis)
     filled_value = fillna(value, fill_value)
     data = getattr(np, func)(filled_value, axis=axis, **kwargs)
@@ -167,13 +73,36 @@ def _nan_minmax_object(func, get_fill_value, value, axis=None, **kwargs):
     return where_method(data, valid_count != 0)
 
 
-@bottleneck_switch()
+def nanmin(a, axis=None, out=None):
+    if a.dtype.kind == 'O':
+        return _nan_minmax_object(
+            'min', dtypes.get_pos_infinity(a.dtype), a, axis)
+
+    if isinstance(a, dask_array_type):
+        return dask_array.nanmin(a, axis=axis)
+    return nputils.nanmin(a, axis=axis)
+
+
+def nanmax(a, axis=None, out=None):
+    if a.dtype.kind == 'O':
+        return _nan_minmax_object(
+            'max', dtypes.get_neg_infinity(a.dtype), a, axis)
+
+    if isinstance(a, dask_array_type):
+        return dask_array.nanmax(a, axis=axis)
+    return nputils.nanmax(a, axis=axis)
+
+
 def nanargmin(a, axis=None):
+    fill_value = dtypes.get_pos_infinity(a.dtype)
     if a.dtype.kind == 'O':
-        return _nan_argminmax_object('argmin', dtypes.get_pos_infinity,
-                                     a, axis=axis)
-    a, mask = _replace_nan(a, np.inf)
-    res = np.argmin(a, axis=axis)
+        return _nan_argminmax_object('argmin', fill_value, a, axis=axis)
+    a, mask = _replace_nan(a, fill_value)
+    if isinstance(a, dask_array_type):
+        res = dask_array.argmin(a, axis=axis)
+    else:
+        res = np.argmin(a, axis=axis)
+
     if mask is not None:
         mask = np.all(mask, axis=axis)
         if np.any(mask):
@@ -181,17 +110,17 @@ def nanargmin(a, axis=None):
     return res
 
 
-@bottleneck_switch()
 def nanargmax(a, axis=None):
-    """
-    taken from
-    https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/nanfunctions.py
-    """
+    fill_value = dtypes.get_neg_infinity(a.dtype)
     if a.dtype.kind == 'O':
-        return _nan_argminmax_object('argmax', dtypes.get_neg_infinity,
-                                     a, axis=axis)
-    a, mask = _replace_nan(a, -np.inf)
-    res = np.argmax(a, axis=axis)
+        return _nan_argminmax_object('argmax', fill_value, a, axis=axis)
+
+    a, mask = _replace_nan(a, fill_value)
+    if isinstance(a, dask_array_type):
+        res = dask_array.argmax(a, axis=axis)
+    else:
+        res = np.argmax(a, axis=axis)
+
     if mask is not None:
         mask = np.all(mask, axis=axis)
         if np.any(mask):
@@ -199,10 +128,9 @@ def nanargmax(a, axis=None):
     return res
 
 
-@bottleneck_switch()
 def nansum(a, axis=None, dtype=None, out=None, min_count=None):
     a, mask = _replace_nan(a, 0)
-    result = np.sum(a, axis=axis, dtype=dtype)
+    result = _dask_or_eager_func('sum')(a, axis=axis, dtype=dtype)
     if min_count is not None:
         return _maybe_null_out(result, axis, mask, min_count)
     else:
@@ -227,7 +155,6 @@ def _nanmean_ddof_object(ddof, value, axis=None, **kwargs):
     return where_method(data, valid_count != 0)
 
 
-@bottleneck_switch()
 def nanmean(a, axis=None, dtype=None, out=None):
     if a.dtype.kind == 'O':
         return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype)
@@ -238,6 +165,11 @@ def nanmean(a, axis=None, dtype=None, out=None):
     return np.nanmean(a, axis=axis, dtype=dtype)
 
 
+def nanmedian(a, axis=None, dtype=None, out=None):
+    return _dask_or_eager_func('nanmedian', eager_module=nputils)(
+        a, axis=axis, dtype=dtype)
+
+
 def _nanvar_object(value, axis=None, **kwargs):
     ddof = kwargs.pop('ddof', 0)
     kwargs_mean = kwargs.copy()
@@ -248,21 +180,33 @@ def _nanvar_object(value, axis=None, **kwargs):
     return _nanmean_ddof_object(ddof, squared, axis=axis, **kwargs)
 
 
-@bottleneck_switch()
 def nanvar(a, axis=None, dtype=None, out=None, ddof=0):
     if a.dtype.kind == 'O':
         return _nanvar_object(a, axis=axis, dtype=dtype, ddof=ddof)
 
-    if isinstance(a, dask_array_type):
-        return dask_array.nanvar(a, axis=axis, dtype=dtype, ddof=ddof)
+    return _dask_or_eager_func('nanvar', eager_module=nputils)(
+        a, axis=axis, dtype=dtype, ddof=ddof)
 
-    return np.nanvar(a, axis=axis, dtype=dtype, ddof=ddof)
+
+def nanstd(a, axis=None, dtype=None, out=None):
+    return _dask_or_eager_func('nanstd', eager_module=nputils)(
+        a, axis=axis, dtype=dtype)
 
 
 def nanprod(a, axis=None, dtype=None, out=None, min_count=None):
     a, mask = _replace_nan(a, 1)
-    result = np.prod(a, axis=axis, dtype=dtype, out=out)
+    result = _dask_or_eager_func('nanprod')(a, axis=axis, dtype=dtype, out=out)
     if min_count is not None:
         return _maybe_null_out(result, axis, mask, min_count)
     else:
         return result
+
+
+def nancumsum(a, axis=None, dtype=None, out=None):
+    return _dask_or_eager_func('nancumsum', eager_module=nputils)(
+        a, axis=axis, dtype=dtype)
+
+
+def nancumprod(a, axis=None, dtype=None, out=None):
+    return _dask_or_eager_func('nancumprod', eager_module=nputils)(
+        a, axis=axis, dtype=dtype)
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index 4ca1f9390eb..f8c71e2df7f 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -7,6 +7,14 @@
 
 from . import npcompat
 
+try:
+    import bottleneck as bn
+    _USE_BOTTLENECK = True
+except ImportError:
+    # use numpy methods instead
+    bn = np
+    _USE_BOTTLENECK = False
+
 
 def _validate_axis(data, axis):
     ndim = data.ndim
@@ -197,3 +205,36 @@ def _rolling_window(a, window, axis=-1):
     rolling = npcompat.as_strided(a, shape=shape, strides=strides,
                                   writeable=False)
     return np.swapaxes(rolling, -2, axis)
+
+
+def _create_bottleneck_method(name, npmodule=np):
+    def f(values, axis=None, **kwds):
+        dtype = kwds.get('dtype', None)
+        bn_func = getattr(bn, name, None)
+
+        if (_USE_BOTTLENECK and bn_func is not None and
+                not isinstance(axis, tuple) and
+                values.dtype.kind in 'uifc' and
+                values.dtype.isnative and
+                (dtype is None or np.dtype(dtype) == values.dtype)):
+            # bottleneck does not take care dtype, min_count
+            kwds.pop('dtype', None)
+            result = bn_func(values, axis=axis, **kwds)
+        else:
+            result = getattr(npmodule, name)(values, axis=axis, **kwds)
+
+        return result
+
+    f.__name__ = name
+    return f
+
+
+nanmin = _create_bottleneck_method('nanmin')
+nanmax = _create_bottleneck_method('nanmax')
+nanmean = _create_bottleneck_method('nanmean')
+nanmedian = _create_bottleneck_method('nanmedian')
+nanvar = _create_bottleneck_method('nanvar')
+nanstd = _create_bottleneck_method('nanstd')
+nanprod = _create_bottleneck_method('nanprod')
+nancumsum = _create_bottleneck_method('nancumsum', npmodule=npcompat)
+nancumprod = _create_bottleneck_method('nancumprod', npmodule=npcompat)
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index a0d316d74dc..002ef12f4d0 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3352,7 +3352,6 @@ def test_reduce(self):
                                  (('dim2', 'time'), ['dim1', 'dim3']),
                                  ((), ['dim1', 'dim2', 'dim3', 'time'])]:
             actual = data.min(dim=reduct).dims
-            print(reduct, actual, expected)
             self.assertItemsEqual(actual, expected)
 
         assert_equal(data.mean(dim=[]), data)
@@ -3407,7 +3406,6 @@ def test_reduce_cumsum_test_dims(self):
                 ('time', ['dim1', 'dim2', 'dim3'])
             ]:
                 actual = getattr(data, cumfunc)(dim=reduct).dims
-                print(reduct, actual, expected)
                 self.assertItemsEqual(actual, expected)
 
     def test_reduce_non_numeric(self):

From ba903db270b9651080b765eb2379e05bbbb8c72b Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Fri, 22 Jun 2018 20:19:10 +0900
Subject: [PATCH 15/25] remove dtype from nanmedian

---
 xarray/core/nanops.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index ebc99217c84..2d9ded4246b 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -165,9 +165,8 @@ def nanmean(a, axis=None, dtype=None, out=None):
     return np.nanmean(a, axis=axis, dtype=dtype)
 
 
-def nanmedian(a, axis=None, dtype=None, out=None):
-    return _dask_or_eager_func('nanmedian', eager_module=nputils)(
-        a, axis=axis, dtype=dtype)
+def nanmedian(a, axis=None, out=None):
+    return _dask_or_eager_func('nanmedian', eager_module=nputils)(a, axis=axis)
 
 
 def _nanvar_object(value, axis=None, **kwargs):

From 5b09714c77ae03c2b7215313d9c5e72499dc9e1b Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Fri, 22 Jun 2018 21:48:46 +0900
Subject: [PATCH 16/25] Fix for nanmedian

---
 xarray/core/duck_array_ops.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index cefc5df08b9..17eb310f8db 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -229,9 +229,10 @@ def f(values, axis=None, skipna=None, **kwargs):
         except AttributeError:
             if isinstance(values, dask_array_type):
                 try:  # dask/dask#3133 dask sometimes needs dtype argument
+                    # if func does not accept dtype, then raises TypeError
                     return func(values, axis=axis, dtype=values.dtype,
                                 **kwargs)
-                except AttributeError:
+                except (AttributeError, TypeError):
                     msg = '%s is not yet implemented on dask arrays' % name
             else:
                 msg = ('%s is not available with skipna=False with the '

From 5c82628610d8537467bd82039427a8dc6d3211df Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Sat, 11 Aug 2018 11:01:21 +0900
Subject: [PATCH 17/25] Add tests for dataset

---
 xarray/tests/test_duck_array_ops.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index fcb2fb1d3e3..fb00e500260 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -9,7 +9,7 @@
 from numpy import array, nan
 import warnings
 
-from xarray import DataArray, concat
+from xarray import DataArray, Dataset, concat
 from xarray.core import duck_array_ops, dtypes
 from xarray.core.duck_array_ops import (
     array_notnull_equiv, concatenate, count, first, last, mean, rolling_window,
@@ -440,6 +440,15 @@ def test_min_count(dim_num, dtype, dask, func, aggdim):
     assert_dask_array(actual, dask)
 
 
+@pytest.mark.parametrize('func', ['sum', 'prod'])
+def test_min_count_dataset(func):
+    da = construct_dataarray(2, dtype=float, contains_nan=True, dask=False)
+    ds = Dataset({'var1': da}, coords={'scalar': 0})
+    actual = getattr(ds, func)(dim='x', skipna=True, min_count=3)['var1']
+    expected = getattr(ds['var1'], func)(dim='x', skipna=True, min_count=3)
+    assert_allclose(actual, expected)
+
+
 @pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_])
 @pytest.mark.parametrize('dask', [False, True])
 @pytest.mark.parametrize('func', ['sum', 'prod'])

From 06319ac3e118bbd073ec12c1d18a16490e38cb97 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Sat, 11 Aug 2018 13:04:26 +0900
Subject: [PATCH 18/25] Add tests with resample.

---
 xarray/tests/test_dataset.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index e47c1cc5f31..804ad2a1764 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -2715,6 +2715,20 @@ def test_resample_and_first(self):
             result = actual.reduce(method)
             assert_equal(expected, result)
 
+    def test_resample_min_count(self):
+        times = pd.date_range('2000-01-01', freq='6H', periods=10)
+        ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),
+                      'bar': ('time', np.random.randn(10), {'meta': 'data'}),
+                      'time': times})
+        # inject nan
+        ds['foo'] = xr.where(ds['foo'] > 2.0, np.nan, ds['foo'])
+
+        actual = ds.resample(time='1D').sum(min_count=1)
+        expected = xr.concat([
+            ds.isel(time=slice(i*4, (i+1)*4)).sum('time', min_count=1)
+            for i in range(3)], dim=actual['time'])
+        assert_equal(expected, actual)
+
     def test_resample_by_mean_with_keep_attrs(self):
         times = pd.date_range('2000-01-01', freq='6H', periods=10)
         ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),

From 737118e1338e5bb5bde67f2a9f358cee97a41789 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Sat, 11 Aug 2018 13:06:21 +0900
Subject: [PATCH 19/25] lint

---
 xarray/tests/test_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 804ad2a1764..fefc822b8d5 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -2725,7 +2725,7 @@ def test_resample_min_count(self):
 
         actual = ds.resample(time='1D').sum(min_count=1)
         expected = xr.concat([
-            ds.isel(time=slice(i*4, (i+1)*4)).sum('time', min_count=1)
+            ds.isel(time=slice(i * 4, (i + 1) * 4)).sum('time', min_count=1)
             for i in range(3)], dim=actual['time'])
         assert_equal(expected, actual)
 

From 85b5650fecbbd29fff9987d98c76406db0f00164 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Sat, 11 Aug 2018 13:13:33 +0900
Subject: [PATCH 20/25] updated whatsnew

---
 doc/whats-new.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 162cca37355..dbacf6ff56e 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -36,7 +36,8 @@ Documentation
 Enhancements
 ~~~~~~~~~~~~
 
-- min_count option is newly supported in :py:meth:`~xarray.DataArray.sum` and
+- min_count option is newly supported in :py:meth:`~xarray.DataArray.sum`,
+  :py:meth:`~xarray.DataArray.prod` and :py:meth:`~xarray.Dataset.sum`, and
   :py:meth:`~xarray.Dataset.prod`.
   (:issue:`2230`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
@@ -66,11 +67,12 @@ Bug fixes
   attribute being set.
   (:issue:`2201`)
   By `Thomas Voigt <https://github.com/tv3141>`_.
+
 - Tests can be run in parallel with pytest-xdist
+
 - Follow up the renamings in dask; from dask.ghost to dask.overlap
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 
-
 - Now :py:func:`xr.apply_ufunc` raises a ValueError when the size of
 ``input_core_dims`` is inconsistent with the number of arguments.
   (:issue:`2341`)

From 015e85c8f857f946825c0e9d27193ffae328764b Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Thu, 16 Aug 2018 12:27:48 +0900
Subject: [PATCH 21/25] Revise from comments.

---
 xarray/core/nanops.py               | 16 ++++++++--------
 xarray/tests/test_duck_array_ops.py |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 2d9ded4246b..461479dedeb 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -48,14 +48,16 @@ def _maybe_null_out(result, axis, mask, min_count=1):
 
 def _nan_argminmax_object(func, fill_value, value, axis=None, **kwargs):
     """ In house nanargmin, nanargmax for object arrays. Always return integer
-    type """
+    type
+    """
     valid_count = count(value, axis=axis)
     value = fillna(value, fill_value)
-    data = getattr(np, func)(value, axis=axis, **kwargs)
+    data = _dask_or_eager_func(func)(value, axis=axis, **kwargs)
     # dask seems return non-integer type
     if isinstance(value, dask_array_type):
         data = data.astype(int)
 
+    # TODO This will evaluate dask arrays and might be costly.
     if (valid_count == 0).any():
         raise ValueError('All-NaN slice encountered')
 
@@ -78,9 +80,8 @@ def nanmin(a, axis=None, out=None):
         return _nan_minmax_object(
             'min', dtypes.get_pos_infinity(a.dtype), a, axis)
 
-    if isinstance(a, dask_array_type):
-        return dask_array.nanmin(a, axis=axis)
-    return nputils.nanmin(a, axis=axis)
+    module = dask_array if isinstance(a, dask_array_type) else nputils
+    return module.nanmin(a, axis=axis)
 
 
 def nanmax(a, axis=None, out=None):
@@ -88,9 +89,8 @@ def nanmax(a, axis=None, out=None):
         return _nan_minmax_object(
             'max', dtypes.get_neg_infinity(a.dtype), a, axis)
 
-    if isinstance(a, dask_array_type):
-        return dask_array.nanmax(a, axis=axis)
-    return nputils.nanmax(a, axis=axis)
+    module = dask_array if isinstance(a, dask_array_type) else nputils
+    return module.nanmax(a, axis=axis)
 
 
 def nanargmin(a, axis=None):
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index fb00e500260..3f32fc49fd2 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -316,7 +316,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
             # also check ddof!=0 case
             actual = getattr(da, func)(skipna=skipna, dim=aggdim, ddof=5)
             if dask:
-                isinstance(da.data, dask_array_type)
+                assert isinstance(da.data, dask_array_type)
             expected = series_reduce(da, func, skipna=skipna, dim=aggdim,
                                      ddof=5)
             assert_allclose(actual, expected, rtol=rtol)
@@ -334,7 +334,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
         da = construct_dataarray(dim_num, dtype, contains_nan=False, dask=dask)
         actual = getattr(da, func)(skipna=skipna)
         if dask:
-            isinstance(da.data, dask_array_type)
+            assert isinstance(da.data, dask_array_type)
         expected = getattr(np, 'nan{}'.format(func))(da.values)
         if actual.dtype == object:
             assert actual.values == np.array(expected)

From 01a1419c5353d2d8a6d7f6b899d18f4c7e13d5b9 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Thu, 16 Aug 2018 12:31:57 +0900
Subject: [PATCH 22/25] Use .any and .all method instead of np.any / np.all

---
 xarray/core/nanops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 461479dedeb..c5d4b6f4b02 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -104,8 +104,8 @@ def nanargmin(a, axis=None):
         res = np.argmin(a, axis=axis)
 
     if mask is not None:
-        mask = np.all(mask, axis=axis)
-        if np.any(mask):
+        mask = mask.all(axis=axis)
+        if mask.any():
             raise ValueError("All-NaN slice encountered")
     return res
 

From a5b18fcd786942f3ba7982cec147c1afb6ff4818 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Thu, 16 Aug 2018 12:35:30 +0900
Subject: [PATCH 23/25] Avoid using numpy methods

---
 xarray/core/nanops.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index c5d4b6f4b02..be7edb34c71 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -33,7 +33,7 @@ def _maybe_null_out(result, axis, mask, min_count=1):
 
     if axis is not None and getattr(result, 'ndim', False):
         null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
-        if np.any(null_mask):
+        if null_mask.any():
             dtype, fill_value = dtypes.maybe_promote(result.dtype)
             result = result.astype(dtype)
             result[null_mask] = fill_value
@@ -61,7 +61,7 @@ def _nan_argminmax_object(func, fill_value, value, axis=None, **kwargs):
     if (valid_count == 0).any():
         raise ValueError('All-NaN slice encountered')
 
-    return np.array(data, dtype=int)
+    return data
 
 
 def _nan_minmax_object(func, fill_value, value, axis=None, **kwargs):
@@ -122,8 +122,8 @@ def nanargmax(a, axis=None):
         res = np.argmax(a, axis=axis)
 
     if mask is not None:
-        mask = np.all(mask, axis=axis)
-        if np.any(mask):
+        mask = mask.all(axis=axis)
+        if mask.any():
             raise ValueError("All-NaN slice encountered")
     return res
 

From e4e1d1e35d8b73485753240bdc78c4fbba3b7e33 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Thu, 16 Aug 2018 14:06:30 +0900
Subject: [PATCH 24/25] Avoid casting to int for dask array

---
 xarray/core/nanops.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index be7edb34c71..2309ed9619d 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -53,9 +53,6 @@ def _nan_argminmax_object(func, fill_value, value, axis=None, **kwargs):
     valid_count = count(value, axis=axis)
     value = fillna(value, fill_value)
     data = _dask_or_eager_func(func)(value, axis=axis, **kwargs)
-    # dask seems return non-integer type
-    if isinstance(value, dask_array_type):
-        data = data.astype(int)
 
     # TODO This will evaluate dask arrays and might be costly.
     if (valid_count == 0).any():

From b72a1c852add254a4cdd49408fe4e9c934ceece6 Mon Sep 17 00:00:00 2001
From: fujiisoup <fujiisoup@gmail.com>
Date: Thu, 16 Aug 2018 15:02:40 +0900
Subject: [PATCH 25/25] Update whatsnew

---
 doc/whats-new.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index e5fa692e63f..7f561381b42 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -69,12 +69,11 @@ Bug fixes
   By `Thomas Voigt <https://github.com/tv3141>`_.
 
 - Tests can be run in parallel with pytest-xdist
+  By `Tony Tung <https://github.com/ttung>`_.
 
-- Follow up the renamings in dask; from dask.ghost to dask.overlap
+- Follow up the renamings in dask; from dask.ghost to dask.overlap 
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 
-  By `Tony Tung <https://github.com/ttung>`_.
-
 - Now raises a ValueError when there is a conflict between dimension names and
   level names of MultiIndex. (:issue:`2299`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.