Skip to content

Refactor nanops #2236

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
Aug 16, 2018
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
f36f8f2
Inhouse nanops
fujiisoup Jun 17, 2018
76218b2
Cleanup nanops
fujiisoup Jun 17, 2018
943e2b1
remove NAT_TYPES
fujiisoup Jun 18, 2018
84fc69e
flake8.
fujiisoup Jun 18, 2018
11d735f
another flake8
fujiisoup Jun 18, 2018
7a079f6
recover nat types
fujiisoup Jun 18, 2018
441be59
remove keep_dims option from nanops (to make them compatible with num…
fujiisoup Jun 19, 2018
f95054b
Test aggregation over multiple dimensions
fujiisoup Jun 19, 2018
9211b64
Remove print.
fujiisoup Jun 19, 2018
491ce2f
Docs. More cleanup.
fujiisoup Jun 20, 2018
f0fc8bf
Merge branch 'master' into refactor_nanops
fujiisoup Jun 20, 2018
5dda535
flake8
fujiisoup Jun 20, 2018
5ddc4eb
Bug fix. Better test coverage.
fujiisoup Jun 20, 2018
c37de0e
using isnull, where_method. Remove unnecessary conditional branching.
fujiisoup Jun 20, 2018
7aedd02
More refactoring based on the comments
fujiisoup Jun 21, 2018
ba903db
remove dtype from nanmedian
fujiisoup Jun 22, 2018
5b09714
Fix for nanmedian
fujiisoup Jun 22, 2018
e8fdac2
Merge branch 'master' into refactor_nanops
fujiisoup Jul 8, 2018
a65c579
Merge branch 'master' into refactor_nanops
fujiisoup Aug 11, 2018
5c82628
Add tests for dataset
fujiisoup Aug 11, 2018
06319ac
Add tests with resample.
fujiisoup Aug 11, 2018
737118e
lint
fujiisoup Aug 11, 2018
85b5650
updated whatsnew
fujiisoup Aug 11, 2018
623016b
Merge branch 'master' into refactor_nanops
fujiisoup Aug 16, 2018
015e85c
Revise from comments.
fujiisoup Aug 16, 2018
01a1419
Use .any and .all method instead of np.any / np.all
fujiisoup Aug 16, 2018
a5b18fc
Avoid using numpy methods
fujiisoup Aug 16, 2018
e4e1d1e
Avoid casting to int for dask array
fujiisoup Aug 16, 2018
b72a1c8
Update whatsnew
fujiisoup Aug 16, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ Bug fixes

- Tests can be run in parallel with pytest-xdist

- Follow up the renamings in dask; from dask.ghost to dask.overlap
By `Keisuke Fujii <https://github.com/fujiisoup>`_.

By `Tony Tung <https://github.com/ttung>`_.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should reference the line above, not your change here


- Now raises a ValueError when there is a conflict between dimension names and
level names of MultiIndex. (:issue:`2299`)
By `Keisuke Fujii <https://github.com/fujiisoup>`_.

- Follow up the renamings in dask; from dask.ghost to dask.overlap
By `Keisuke Fujii <https://github.com/fujiisoup>`_.

Expand Down
31 changes: 14 additions & 17 deletions xarray/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def _maybe_null_out(result, axis, mask, min_count=1):

if axis is not None and getattr(result, 'ndim', False):
null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
if np.any(null_mask):
if null_mask.any():
dtype, fill_value = dtypes.maybe_promote(result.dtype)
result = result.astype(dtype)
result[null_mask] = fill_value
Expand All @@ -48,18 +48,17 @@ def _maybe_null_out(result, axis, mask, min_count=1):

def _nan_argminmax_object(func, fill_value, value, axis=None, **kwargs):
""" In house nanargmin, nanargmax for object arrays. Always return integer
type """
type
"""
valid_count = count(value, axis=axis)
value = fillna(value, fill_value)
data = getattr(np, func)(value, axis=axis, **kwargs)
# dask seems return non-integer type
if isinstance(value, dask_array_type):
data = data.astype(int)
data = _dask_or_eager_func(func)(value, axis=axis, **kwargs)

# TODO This will evaluate dask arrays and might be costly.
if (valid_count == 0).any():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would also evaluate dask arrays, which can be costly. I don't know if there's much to do about this but it should be noted.

raise ValueError('All-NaN slice encountered')

return np.array(data, dtype=int)
return data


def _nan_minmax_object(func, fill_value, value, axis=None, **kwargs):
Expand All @@ -78,19 +77,17 @@ def nanmin(a, axis=None, out=None):
return _nan_minmax_object(
'min', dtypes.get_pos_infinity(a.dtype), a, axis)

if isinstance(a, dask_array_type):
return dask_array.nanmin(a, axis=axis)
return nputils.nanmin(a, axis=axis)
module = dask_array if isinstance(a, dask_array_type) else nputils
return module.nanmin(a, axis=axis)


def nanmax(a, axis=None, out=None):
if a.dtype.kind == 'O':
return _nan_minmax_object(
'max', dtypes.get_neg_infinity(a.dtype), a, axis)

if isinstance(a, dask_array_type):
return dask_array.nanmax(a, axis=axis)
return nputils.nanmax(a, axis=axis)
module = dask_array if isinstance(a, dask_array_type) else nputils
return module.nanmax(a, axis=axis)


def nanargmin(a, axis=None):
Expand All @@ -104,8 +101,8 @@ def nanargmin(a, axis=None):
res = np.argmin(a, axis=axis)

if mask is not None:
mask = np.all(mask, axis=axis)
if np.any(mask):
mask = mask.all(axis=axis)
if mask.any():
raise ValueError("All-NaN slice encountered")
return res

Expand All @@ -122,8 +119,8 @@ def nanargmax(a, axis=None):
res = np.argmax(a, axis=axis)

if mask is not None:
mask = np.all(mask, axis=axis)
if np.any(mask):
mask = mask.all(axis=axis)
if mask.any():
raise ValueError("All-NaN slice encountered")
return res

Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
# also check ddof!=0 case
actual = getattr(da, func)(skipna=skipna, dim=aggdim, ddof=5)
if dask:
isinstance(da.data, dask_array_type)
assert isinstance(da.data, dask_array_type)
expected = series_reduce(da, func, skipna=skipna, dim=aggdim,
ddof=5)
assert_allclose(actual, expected, rtol=rtol)
Expand All @@ -334,7 +334,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
da = construct_dataarray(dim_num, dtype, contains_nan=False, dask=dask)
actual = getattr(da, func)(skipna=skipna)
if dask:
isinstance(da.data, dask_array_type)
assert isinstance(da.data, dask_array_type)
expected = getattr(np, 'nan{}'.format(func))(da.values)
if actual.dtype == object:
assert actual.values == np.array(expected)
Expand Down