pydata · crusaderky · Aug 12, 2019 · Aug 11, 2019 · Aug 11, 2019 · Aug 12, 2019
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -926,23 +926,29 @@ def chunk(self, chunks=None, name=None, lock=False):
         if isinstance(data, da.Array):
             data = data.rechunk(chunks)
         else:
+            if isinstance(data, indexing.ExplicitlyIndexed):
+                # Unambiguously handle array storage backends (like NetCDF4 and h5py)
+                # that can't handle general array indexing. For example, in netCDF4 you
+                # can do "outer" indexing along two dimensions independent, which works
+                # differently from how NumPy handles it.
+                # da.from_array works by using lazy indexing with a tuple of slices.
+                # Using OuterIndexer is a pragmatic choice: dask does not yet handle
+                # different indexing types in an explicit way:
+                # https://github.com/dask/dask/issues/2883
+                data = indexing.ImplicitToExplicitIndexingAdapter(
+                    data, indexing.OuterIndexer
+                )
+                if LooseVersion(dask.__version__) < "2.0.0":
+                    kwargs = {}
+                else:
+                    # All of our lazily loaded backend array classes should use NumPy
+                    # array operations.
+                    kwargs = {"meta": np.ndarray}
+            else:
+                kwargs = {}
+
             if utils.is_dict_like(chunks):
                 chunks = tuple(chunks.get(n, s) for n, s in enumerate(self.shape))
-            # da.from_array works by using lazily indexing with a tuple of
-            # slices. Using OuterIndexer is a pragmatic choice: dask does not
-            # yet handle different indexing types in an explicit way:
-            # https://github.com/dask/dask/issues/2883
-            data = indexing.ImplicitToExplicitIndexingAdapter(
-                data, indexing.OuterIndexer
-            )
-
-            # For now, assume that all arrays that we wrap with dask (including
-            # our lazily loaded backend array classes) should use NumPy array
-            # operations.
-            if LooseVersion(dask.__version__) > "1.2.2":
-                kwargs = dict(meta=np.ndarray)
-            else:
-                kwargs = dict()
 
             data = da.from_array(data, chunks, name=name, lock=lock, **kwargs)
 

diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
@@ -1,25 +1,14 @@
-from collections import OrderedDict
-from contextlib import suppress
-from distutils.version import LooseVersion
 from textwrap import dedent
 import pickle
 import numpy as np
 import pandas as pd
 
-from xarray import DataArray, Dataset, Variable
-from xarray.tests import mock
+from xarray import DataArray, Variable
 from xarray.core.npcompat import IS_NEP18_ACTIVE
 import xarray as xr
 import xarray.ufuncs as xu
 
-from . import (
-    assert_allclose,
-    assert_array_equal,
-    assert_equal,
-    assert_frame_equal,
-    assert_identical,
-    raises_regex,
-)
+from . import assert_equal, assert_identical
 
 import pytest
 
@@ -148,7 +137,6 @@ def test_variable_property(prop):
             True,
             marks=xfail(reason="'COO' object has no attribute 'argsort'"),
         ),
-        param(do("chunk", chunks=(5, 5)), True, marks=xfail),
         param(
             do(
                 "concat",
@@ -422,9 +410,6 @@ def test_dataarray_property(prop):
             False,
             marks=xfail(reason="Missing implementation for np.flip"),
         ),
-        param(
-            do("chunk", chunks=(5, 5)), False, marks=xfail(reason="Coercion to dense")
-        ),
         param(
             do("combine_first", make_xrarray({"x": 10, "y": 5})),
             True,
@@ -879,3 +864,17 @@ def test_sparse_coords(self):
             dims=["x"],
             coords={"x": COO.from_numpy([1, 2, 3, 4])},
         )
+
+
+def test_chunk():
+    s = sparse.COO.from_numpy(np.array([0, 0, 1, 2]))
+    a = DataArray(s)
+    ac = a.chunk(2)
+    assert ac.chunks == ((2, 2),)
+    assert isinstance(ac.data._meta, sparse.COO)
+    assert_identical(ac, a)
+
+    ds = a.to_dataset(name="a")
+    dsc = ds.chunk(2)
+    assert dsc.chunks == {"dim_0": (2, 2)}
+    assert_identical(dsc, ds)