Skip to content

Commit c782637

Browse files
authored
chunk sparse arrays (#3202)
* chunk sparse arrays * Deprecated API * Don't wrap plain numpy arrays with ImplicitToExplicitIndexingAdapter * typo * What's New * Version bump and annotations What's New polish
1 parent 14f1a97 commit c782637

File tree

3 files changed

+59
-43
lines changed

3 files changed

+59
-43
lines changed

doc/whats-new.rst

+22-11
Original file line numberDiff line numberDiff line change
@@ -13,31 +13,42 @@ What's New
1313
import xarray as xr
1414
np.random.seed(123456)
1515
16-
.. _whats-new.0.12.4:
16+
.. _whats-new.0.13.0:
1717

18-
v0.12.4 (unreleased)
19-
-------------------
18+
v0.13.0 (unreleased)
19+
--------------------
2020

2121
This release increases the minimum required Python version from 3.5.0 to 3.5.3
2222
(:issue:`3089`). By `Guido Imperiale <https://github.com/crusaderky>`_.
2323

2424
New functions/methods
2525
~~~~~~~~~~~~~~~~~~~~~
2626

27-
- Added :py:meth:`DataArray.broadcast_like` and :py:meth:`Dataset.broadcast_like`.
28-
By `Deepak Cherian <https://github.com/dcherian>`_ and `David Mertz
29-
<http://github.com/DavidMertz>`_.
27+
- xarray can now wrap around any
28+
`NEP18 <https://www.numpy.org/neps/nep-0018-array-function-protocol.html>`_ compliant
29+
numpy-like library (important: read notes about NUMPY_EXPERIMENTAL_ARRAY_FUNCTION in
30+
the above link). Added explicit test coverage for
31+
`sparse <https://github.com/pydata/sparse>`_. (:issue:`3117`, :issue:`3202`)
32+
By `Nezar Abdennur <https://github.com/nvictus>`_
33+
and `Guido Imperiale <https://github.com/crusaderky>`_.
3034

31-
- The xarray package is now discoverably by mypy (although typing hints
32-
coverage is not complete yet). mypy type checking is now enforced by CI.
33-
Libraries that depend on xarray and use mypy can now remove from their setup.cfg the lines::
35+
- The xarray package is now discoverable by mypy (although typing hints coverage is not
36+
complete yet). mypy type checking is now enforced by CI. Libraries that depend on
37+
xarray and use mypy can now remove from their setup.cfg the lines::
3438

3539
[mypy-xarray]
3640
ignore_missing_imports = True
3741

38-
By `Guido Imperiale <https://github.com/crusaderky>`_
42+
(:issue:`2877`, :issue:`3088`, :issue:`3090`, :issue:`3112`, :issue:`3117`,
43+
:issue:`3207`)
44+
By `Guido Imperiale <https://github.com/crusaderky>`_
45+
and `Maximilian Roos <https://github.com/max-sixty>`_.
46+
47+
- Added :py:meth:`DataArray.broadcast_like` and :py:meth:`Dataset.broadcast_like`.
48+
By `Deepak Cherian <https://github.com/dcherian>`_ and `David Mertz
49+
<http://github.com/DavidMertz>`_.
3950

40-
- Dataset plotting API for visualizing dependences between two `DataArray`s!
51+
- Dataset plotting API for visualizing dependencies between two `DataArray`s!
4152
Currently only :py:meth:`Dataset.plot.scatter` is implemented.
4253
By `Yohai Bar Sinai <https://github.com/yohai>`_ and `Deepak Cherian <https://github.com/dcherian>`_
4354

xarray/core/variable.py

+21-15
Original file line numberDiff line numberDiff line change
@@ -926,23 +926,29 @@ def chunk(self, chunks=None, name=None, lock=False):
926926
if isinstance(data, da.Array):
927927
data = data.rechunk(chunks)
928928
else:
929+
if isinstance(data, indexing.ExplicitlyIndexed):
930+
# Unambiguously handle array storage backends (like NetCDF4 and h5py)
931+
# that can't handle general array indexing. For example, in netCDF4 you
932+
# can do "outer" indexing along two dimensions independent, which works
933+
# differently from how NumPy handles it.
934+
# da.from_array works by using lazy indexing with a tuple of slices.
935+
# Using OuterIndexer is a pragmatic choice: dask does not yet handle
936+
# different indexing types in an explicit way:
937+
# https://github.com/dask/dask/issues/2883
938+
data = indexing.ImplicitToExplicitIndexingAdapter(
939+
data, indexing.OuterIndexer
940+
)
941+
if LooseVersion(dask.__version__) < "2.0.0":
942+
kwargs = {}
943+
else:
944+
# All of our lazily loaded backend array classes should use NumPy
945+
# array operations.
946+
kwargs = {"meta": np.ndarray}
947+
else:
948+
kwargs = {}
949+
929950
if utils.is_dict_like(chunks):
930951
chunks = tuple(chunks.get(n, s) for n, s in enumerate(self.shape))
931-
# da.from_array works by using lazily indexing with a tuple of
932-
# slices. Using OuterIndexer is a pragmatic choice: dask does not
933-
# yet handle different indexing types in an explicit way:
934-
# https://github.com/dask/dask/issues/2883
935-
data = indexing.ImplicitToExplicitIndexingAdapter(
936-
data, indexing.OuterIndexer
937-
)
938-
939-
# For now, assume that all arrays that we wrap with dask (including
940-
# our lazily loaded backend array classes) should use NumPy array
941-
# operations.
942-
if LooseVersion(dask.__version__) > "1.2.2":
943-
kwargs = dict(meta=np.ndarray)
944-
else:
945-
kwargs = dict()
946952

947953
data = da.from_array(data, chunks, name=name, lock=lock, **kwargs)
948954

xarray/tests/test_sparse.py

+16-17
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,14 @@
1-
from collections import OrderedDict
2-
from contextlib import suppress
3-
from distutils.version import LooseVersion
41
from textwrap import dedent
52
import pickle
63
import numpy as np
74
import pandas as pd
85

9-
from xarray import DataArray, Dataset, Variable
10-
from xarray.tests import mock
6+
from xarray import DataArray, Variable
117
from xarray.core.npcompat import IS_NEP18_ACTIVE
128
import xarray as xr
139
import xarray.ufuncs as xu
1410

15-
from . import (
16-
assert_allclose,
17-
assert_array_equal,
18-
assert_equal,
19-
assert_frame_equal,
20-
assert_identical,
21-
raises_regex,
22-
)
11+
from . import assert_equal, assert_identical
2312

2413
import pytest
2514

@@ -148,7 +137,6 @@ def test_variable_property(prop):
148137
True,
149138
marks=xfail(reason="'COO' object has no attribute 'argsort'"),
150139
),
151-
param(do("chunk", chunks=(5, 5)), True, marks=xfail),
152140
param(
153141
do(
154142
"concat",
@@ -422,9 +410,6 @@ def test_dataarray_property(prop):
422410
False,
423411
marks=xfail(reason="Missing implementation for np.flip"),
424412
),
425-
param(
426-
do("chunk", chunks=(5, 5)), False, marks=xfail(reason="Coercion to dense")
427-
),
428413
param(
429414
do("combine_first", make_xrarray({"x": 10, "y": 5})),
430415
True,
@@ -879,3 +864,17 @@ def test_sparse_coords(self):
879864
dims=["x"],
880865
coords={"x": COO.from_numpy([1, 2, 3, 4])},
881866
)
867+
868+
869+
def test_chunk():
870+
s = sparse.COO.from_numpy(np.array([0, 0, 1, 2]))
871+
a = DataArray(s)
872+
ac = a.chunk(2)
873+
assert ac.chunks == ((2, 2),)
874+
assert isinstance(ac.data._meta, sparse.COO)
875+
assert_identical(ac, a)
876+
877+
ds = a.to_dataset(name="a")
878+
dsc = ds.chunk(2)
879+
assert dsc.chunks == {"dim_0": (2, 2)}
880+
assert_identical(dsc, ds)

0 commit comments

Comments
 (0)