Skip to content

Commit 3f1b879

Browse files
authored
Fix sparse ops that were calling bottleneck (#3254)
min and max are now working. notnull was already fixed by one of my earlier PRs. std/var/median are still broken, but only because sparse hasn't implemented the corresponding NumPy functions yet (nanstd, nanvar and nanmedian). rank needs pure NumPy implementation (not via bottleneck) if we want it to work on sparse or dask arrays.
1 parent 3faee2b commit 3f1b879

File tree

3 files changed

+34
-19
lines changed

3 files changed

+34
-19
lines changed

xarray/core/nputils.py

+1
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def f(values, axis=None, **kwargs):
209209

210210
if (
211211
_USE_BOTTLENECK
212+
and isinstance(values, np.ndarray)
212213
and bn_func is not None
213214
and not isinstance(axis, tuple)
214215
and values.dtype.kind in "uifc"

xarray/core/variable.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1697,18 +1697,24 @@ def rank(self, dim, pct=False):
16971697
"""
16981698
import bottleneck as bn
16991699

1700-
if isinstance(self.data, dask_array_type):
1700+
data = self.data
1701+
1702+
if isinstance(data, dask_array_type):
17011703
raise TypeError(
17021704
"rank does not work for arrays stored as dask "
17031705
"arrays. Load the data via .compute() or .load() "
17041706
"prior to calling this method."
17051707
)
1708+
elif not isinstance(data, np.ndarray):
1709+
raise TypeError(
1710+
"rank is not implemented for {} objects.".format(type(data))
1711+
)
17061712

17071713
axis = self.get_axis_num(dim)
17081714
func = bn.nanrankdata if self.dtype.kind == "f" else bn.rankdata
1709-
ranked = func(self.data, axis=axis)
1715+
ranked = func(data, axis=axis)
17101716
if pct:
1711-
count = np.sum(~np.isnan(self.data), axis=axis, keepdims=True)
1717+
count = np.sum(~np.isnan(data), axis=axis, keepdims=True)
17121718
ranked /= count
17131719
return Variable(self.dims, ranked)
17141720

xarray/tests/test_sparse.py

+24-16
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,13 @@ def test_variable_property(prop):
171171
False,
172172
marks=xfail(reason="'COO' object has no attribute 'item'"),
173173
),
174-
param(do("max"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
175174
param(
176-
do("median"), False, marks=xfail(reason="Coercion to dense via bottleneck")
175+
do("median"),
176+
False,
177+
marks=xfail(reason="Missing implementation for np.nanmedian"),
177178
),
178-
param(do("min"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
179+
param(do("max"), False),
180+
param(do("min"), False),
179181
param(
180182
do("no_conflicts", other=make_xrvar({"x": 10, "y": 5})),
181183
True,
@@ -199,7 +201,7 @@ def test_variable_property(prop):
199201
param(
200202
do("rank", dim="x"),
201203
False,
202-
marks=xfail(reason="Coercion to dense via bottleneck"),
204+
marks=xfail(reason="Only implemented for NumPy arrays (via bottleneck)"),
203205
),
204206
param(
205207
do("reduce", func=np.sum, dim="x"),
@@ -214,13 +216,17 @@ def test_variable_property(prop):
214216
param(
215217
do("shift", x=2), True, marks=xfail(reason="mixed sparse-dense operation")
216218
),
217-
param(do("std"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
219+
param(
220+
do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd")
221+
),
218222
param(
219223
do("sum"),
220224
False,
221225
marks=xfail(reason="Missing implementation for np.result_type"),
222226
),
223-
param(do("var"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
227+
param(
228+
do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar")
229+
),
224230
param(do("to_dict"), False, marks=xfail(reason="Coercion to dense")),
225231
param(
226232
do("where", cond=make_xrvar({"x": 10, "y": 5}) > 0.5),
@@ -476,16 +482,14 @@ def test_dataarray_property(prop):
476482
False,
477483
marks=xfail(reason="'COO' object has no attribute 'item'"),
478484
),
479-
param(do("max"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
485+
param(do("max"), False),
486+
param(do("min"), False),
480487
param(
481-
do("median"), False, marks=xfail(reason="Coercion to dense via bottleneck")
482-
),
483-
param(do("min"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
484-
param(
485-
do("notnull"),
488+
do("median"),
486489
False,
487-
marks=xfail(reason="'COO' object has no attribute 'notnull'"),
490+
marks=xfail(reason="Missing implementation for np.nanmedian"),
488491
),
492+
param(do("notnull"), True),
489493
param(
490494
do("pipe", np.sum, axis=1),
491495
True,
@@ -504,7 +508,7 @@ def test_dataarray_property(prop):
504508
param(
505509
do("rank", "x"),
506510
False,
507-
marks=xfail(reason="Coercion to dense via bottleneck"),
511+
marks=xfail(reason="Only implemented for NumPy arrays (via bottleneck)"),
508512
),
509513
param(
510514
do("reduce", np.sum, dim="x"),
@@ -532,13 +536,17 @@ def test_dataarray_property(prop):
532536
True,
533537
marks=xfail(reason="Indexing COO with more than one iterable index"),
534538
), # noqa
535-
param(do("std"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
539+
param(
540+
do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd")
541+
),
536542
param(
537543
do("sum"),
538544
False,
539545
marks=xfail(reason="Missing implementation for np.result_type"),
540546
),
541-
param(do("var"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
547+
param(
548+
do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar")
549+
),
542550
param(
543551
do("where", make_xrarray({"x": 10, "y": 5}) > 0.5),
544552
False,

0 commit comments

Comments
 (0)