Skip to content

Commit 50f8970

Browse files
authored
Internal clean-up of isnull() to avoid relying on pandas (#3132)
* Internal clean-up of isnull() to avoid relying on pandas This version should be much more compatible out of the box with duck typing. * Use isnat ufunc * update comment
1 parent 298d532 commit 50f8970

File tree

3 files changed

+63
-16
lines changed

3 files changed

+63
-16
lines changed

properties/test_encode_decode.py

-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
These ones pass, just as you'd hope!
55
66
"""
7-
from __future__ import absolute_import, division, print_function
8-
97
import hypothesis.extra.numpy as npst
108
import hypothesis.strategies as st
119
from hypothesis import given, settings

xarray/core/duck_array_ops.py

+42-10
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,51 @@ def fail_on_dask_array_input(values, msg=None, func_name=None):
6363

6464
around = _dask_or_eager_func('around')
6565
isclose = _dask_or_eager_func('isclose')
66-
notnull = _dask_or_eager_func('notnull', eager_module=pd)
67-
_isnull = _dask_or_eager_func('isnull', eager_module=pd)
66+
67+
if hasattr(np, 'isnat') and (
68+
dask_array is None or hasattr(dask_array_type, '__array_ufunc__')):
69+
# np.isnat is available since NumPy 1.13, so __array_ufunc__ is always
70+
# supported.
71+
isnat = np.isnat
72+
else:
73+
isnat = _dask_or_eager_func('isnull', eager_module=pd)
74+
isnan = _dask_or_eager_func('isnan')
75+
zeros_like = _dask_or_eager_func('zeros_like')
76+
77+
78+
pandas_isnull = _dask_or_eager_func('isnull', eager_module=pd)
6879

6980

7081
def isnull(data):
71-
# GH837, GH861
72-
# isnull fcn from pandas will throw TypeError when run on numpy structured
73-
# array therefore for dims that are np structured arrays we assume all
74-
# data is present
75-
try:
76-
return _isnull(data)
77-
except TypeError:
78-
return np.zeros(data.shape, dtype=bool)
82+
data = asarray(data)
83+
scalar_type = data.dtype.type
84+
if issubclass(scalar_type, (np.datetime64, np.timedelta64)):
85+
# datetime types use NaT for null
86+
# note: must check timedelta64 before integers, because currently
87+
# timedelta64 inherits from np.integer
88+
return isnat(data)
89+
elif issubclass(scalar_type, np.inexact):
90+
# float types use NaN for null
91+
return isnan(data)
92+
elif issubclass(
93+
scalar_type, (np.bool_, np.integer, np.character, np.void)
94+
):
95+
# these types cannot represent missing values
96+
return zeros_like(data, dtype=bool)
97+
else:
98+
# at this point, array should have dtype=object
99+
if isinstance(data, (np.ndarray, dask_array_type)):
100+
return pandas_isnull(data)
101+
else:
102+
# Not reachable yet, but intended for use with other duck array
103+
# types. For full consistency with pandas, we should accept None as
104+
# a null value as well as NaN, but it isn't clear how to do this
105+
# with duck typing.
106+
return data != data
107+
108+
109+
def notnull(data):
110+
return ~isnull(data)
79111

80112

81113
transpose = _dask_or_eager_func('transpose')

xarray/tests/test_duck_array_ops.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -178,14 +178,18 @@ def test_wrong_shape(self):
178178
assert not array_notnull_equiv(a, b)
179179

180180
@pytest.mark.parametrize("val1, val2, val3, null", [
181-
(1, 2, 3, None),
181+
(np.datetime64('2000'),
182+
np.datetime64('2001'),
183+
np.datetime64('2002'),
184+
np.datetime64('NaT')),
182185
(1., 2., 3., np.nan),
183-
(1., 2., 3., None),
184186
('foo', 'bar', 'baz', None),
187+
('foo', 'bar', 'baz', np.nan),
185188
])
186189
def test_types(self, val1, val2, val3, null):
187-
arr1 = np.array([val1, null, val3, null])
188-
arr2 = np.array([val1, val2, null, null])
190+
dtype = object if isinstance(val1, str) else None
191+
arr1 = np.array([val1, null, val3, null], dtype=dtype)
192+
arr2 = np.array([val1, val2, null, null], dtype=dtype)
189193
assert array_notnull_equiv(arr1, arr2)
190194

191195

@@ -432,6 +436,19 @@ def test_argmin_max_error():
432436
da.argmin(dim='y')
433437

434438

439+
@pytest.mark.parametrize('array', [
440+
np.array([np.datetime64('2000-01-01'), np.datetime64('NaT')]),
441+
np.array([np.timedelta64(1, 'h'), np.timedelta64('NaT')]),
442+
np.array([0.0, np.nan]),
443+
np.array([1j, np.nan]),
444+
np.array(['foo', np.nan], dtype=object),
445+
])
446+
def test_isnull(array):
447+
expected = np.array([False, True])
448+
actual = duck_array_ops.isnull(array)
449+
np.testing.assert_equal(expected, actual)
450+
451+
435452
@requires_dask
436453
def test_isnull_with_dask():
437454
da = construct_dataarray(2, np.float32, contains_nan=True, dask=True)

0 commit comments

Comments
 (0)