-
-
Notifications
You must be signed in to change notification settings - Fork 327
[REVIEW] Support of alternative array classes #934
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
590ace6
254bbae
40d084e
947152a
a60b2f4
8da9f17
0bf1cf0
9795f77
bf03fd7
d78ce33
c0402e2
39ffef5
86a1ec6
dc2be53
bb5538f
cb0c02f
745b612
9976f06
be9099d
082f299
3dd64dd
4c921e6
4ce89b2
2127ffa
2abc111
cc85f91
eb7f650
1706cad
a7857d6
b3fc488
2e2b022
dabf502
18c4c6b
2efa2fe
440753c
0dc8f93
fdcf949
d40593b
149d511
3ed7a7e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,12 +4,11 @@ | |
import math | ||
import operator | ||
import re | ||
from collections.abc import MutableMapping | ||
from functools import reduce | ||
from typing import Any | ||
|
||
import numpy as np | ||
from numcodecs.compat import ensure_bytes, ensure_ndarray | ||
from numcodecs.compat import ensure_bytes | ||
|
||
from zarr._storage.store import _prefix_to_attrs_key, assert_zarr_v3_api_available | ||
from zarr.attrs import Attributes | ||
|
@@ -35,6 +34,7 @@ | |
from zarr.storage import ( | ||
_get_hierarchy_metadata, | ||
_prefix_to_array_key, | ||
KVStore, | ||
getsize, | ||
listdir, | ||
normalize_store_arg, | ||
|
@@ -51,6 +51,7 @@ | |
normalize_shape, | ||
normalize_storage_path, | ||
PartialReadBuffer, | ||
ensure_ndarray_like | ||
) | ||
|
||
|
||
|
@@ -98,6 +99,12 @@ class Array: | |
|
||
.. versionadded:: 2.11 | ||
|
||
meta_array : array-like, optional | ||
An array instance to use for determining arrays to create and return | ||
to users. Use `numpy.empty(())` by default. | ||
|
||
.. versionadded:: 2.13 | ||
|
||
|
||
Attributes | ||
---------- | ||
|
@@ -129,6 +136,7 @@ class Array: | |
vindex | ||
oindex | ||
write_empty_chunks | ||
meta_array | ||
|
||
Methods | ||
------- | ||
|
@@ -163,6 +171,7 @@ def __init__( | |
partial_decompress=False, | ||
write_empty_chunks=True, | ||
zarr_version=None, | ||
meta_array=None, | ||
): | ||
# N.B., expect at this point store is fully initialized with all | ||
# configuration metadata fully specified and normalized | ||
|
@@ -191,8 +200,11 @@ def __init__( | |
self._is_view = False | ||
self._partial_decompress = partial_decompress | ||
self._write_empty_chunks = write_empty_chunks | ||
if meta_array is not None: | ||
self._meta_array = np.empty_like(meta_array, shape=()) | ||
else: | ||
self._meta_array = np.empty(()) | ||
self._version = zarr_version | ||
|
||
if self._version == 3: | ||
self._data_key_prefix = 'data/root/' + self._key_prefix | ||
self._data_path = 'data/root/' + self._path | ||
|
@@ -555,6 +567,13 @@ def write_empty_chunks(self) -> bool: | |
""" | ||
return self._write_empty_chunks | ||
|
||
@property | ||
def meta_array(self): | ||
"""An array-like instance to use for determining arrays to create and return | ||
to users. | ||
""" | ||
return self._meta_array | ||
|
||
def __eq__(self, other): | ||
return ( | ||
isinstance(other, Array) and | ||
|
@@ -929,7 +948,7 @@ def _get_basic_selection_zd(self, selection, out=None, fields=None): | |
|
||
except KeyError: | ||
# chunk not initialized | ||
chunk = np.zeros((), dtype=self._dtype) | ||
chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype) | ||
if self._fill_value is not None: | ||
chunk.fill(self._fill_value) | ||
|
||
|
@@ -1233,7 +1252,8 @@ def _get_selection(self, indexer, out=None, fields=None): | |
|
||
# setup output array | ||
if out is None: | ||
out = np.empty(out_shape, dtype=out_dtype, order=self._order) | ||
out = np.empty_like(self._meta_array, shape=out_shape, | ||
dtype=out_dtype, order=self._order) | ||
else: | ||
check_array_shape('out', out, out_shape) | ||
|
||
|
@@ -1607,9 +1627,13 @@ def set_coordinate_selection(self, selection, value, fields=None): | |
# setup indexer | ||
indexer = CoordinateIndexer(selection, self) | ||
|
||
# handle value - need to flatten | ||
# handle value - need ndarray-like flatten value | ||
if not is_scalar(value, self._dtype): | ||
value = np.asanyarray(value) | ||
try: | ||
value = ensure_ndarray_like(value) | ||
except TypeError: | ||
# Handle types like `list` or `tuple` | ||
value = np.array(value, like=self._meta_array) | ||
if hasattr(value, 'shape') and len(value.shape) > 1: | ||
value = value.reshape(-1) | ||
|
||
|
@@ -1712,7 +1736,7 @@ def _set_basic_selection_zd(self, selection, value, fields=None): | |
|
||
except KeyError: | ||
# chunk not initialized | ||
chunk = np.zeros((), dtype=self._dtype) | ||
chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype) | ||
if self._fill_value is not None: | ||
chunk.fill(self._fill_value) | ||
|
||
|
@@ -1772,7 +1796,7 @@ def _set_selection(self, indexer, value, fields=None): | |
pass | ||
else: | ||
if not hasattr(value, 'shape'): | ||
value = np.asanyarray(value) | ||
value = np.asanyarray(value, like=self._meta_array) | ||
check_array_shape('value', value, sel_shape) | ||
|
||
# iterate over chunks in range | ||
|
@@ -1840,8 +1864,11 @@ def _process_chunk( | |
self._dtype != object): | ||
|
||
dest = out[out_selection] | ||
# Assume that array-like objects that doesn't have a | ||
# `writeable` flag is writable. | ||
dest_is_writable = getattr(dest, "writeable", True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't we need to check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CuPy array doesn't have a # Assume that array-like objects that doesn't have a
# `writeable` flag is writable.
dest_is_writable = getattr(dest, "writeable", True) |
||
write_direct = ( | ||
dest.flags.writeable and | ||
dest_is_writable and | ||
( | ||
(self._order == 'C' and dest.flags.c_contiguous) or | ||
(self._order == 'F' and dest.flags.f_contiguous) | ||
|
@@ -1858,7 +1885,7 @@ def _process_chunk( | |
cdata = cdata.read_full() | ||
self._compressor.decode(cdata, dest) | ||
else: | ||
chunk = ensure_ndarray(cdata).view(self._dtype) | ||
chunk = ensure_ndarray_like(cdata).view(self._dtype) | ||
chunk = chunk.reshape(self._chunks, order=self._order) | ||
np.copyto(dest, chunk) | ||
return | ||
|
@@ -1868,7 +1895,7 @@ def _process_chunk( | |
if partial_read_decode: | ||
cdata.prepare_chunk() | ||
# size of chunk | ||
tmp = np.empty(self._chunks, dtype=self.dtype) | ||
tmp = np.empty_like(self._meta_array, shape=self._chunks, dtype=self.dtype) | ||
index_selection = PartialChunkIterator(chunk_selection, self.chunks) | ||
for start, nitems, partial_out_selection in index_selection: | ||
expected_shape = [ | ||
|
@@ -1925,7 +1952,7 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, | |
""" | ||
out_is_ndarray = True | ||
try: | ||
out = ensure_ndarray(out) | ||
out = ensure_ndarray_like(out) | ||
except TypeError: | ||
out_is_ndarray = False | ||
|
||
|
@@ -1960,7 +1987,7 @@ def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, | |
""" | ||
out_is_ndarray = True | ||
try: | ||
out = ensure_ndarray(out) | ||
out = ensure_ndarray_like(out) | ||
except TypeError: # pragma: no cover | ||
out_is_ndarray = False | ||
|
||
|
@@ -2082,7 +2109,9 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): | |
if is_scalar(value, self._dtype): | ||
|
||
# setup array filled with value | ||
chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) | ||
chunk = np.empty_like( | ||
self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order | ||
) | ||
chunk.fill(value) | ||
|
||
else: | ||
|
@@ -2102,14 +2131,18 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): | |
|
||
# chunk not initialized | ||
if self._fill_value is not None: | ||
chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) | ||
chunk = np.empty_like( | ||
self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order | ||
) | ||
chunk.fill(self._fill_value) | ||
elif self._dtype == object: | ||
chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) | ||
else: | ||
# N.B., use zeros here so any region beyond the array has consistent | ||
# and compressible data | ||
chunk = np.zeros(self._chunks, dtype=self._dtype, order=self._order) | ||
chunk = np.zeros_like( | ||
self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order | ||
) | ||
|
||
else: | ||
|
||
|
@@ -2159,7 +2192,7 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): | |
chunk = f.decode(chunk) | ||
|
||
# view as numpy array with correct dtype | ||
chunk = ensure_ndarray(chunk) | ||
chunk = ensure_ndarray_like(chunk) | ||
# special case object dtype, because incorrect handling can lead to | ||
# segfaults and other bad things happening | ||
if self._dtype != object: | ||
|
@@ -2186,7 +2219,7 @@ def _encode_chunk(self, chunk): | |
chunk = f.encode(chunk) | ||
|
||
# check object encoding | ||
if ensure_ndarray(chunk).dtype == object: | ||
if ensure_ndarray_like(chunk).dtype == object: | ||
raise RuntimeError('cannot write object array without object codec') | ||
|
||
# compress | ||
|
@@ -2196,7 +2229,7 @@ def _encode_chunk(self, chunk): | |
cdata = chunk | ||
|
||
# ensure in-memory data is immutable and easy to compare | ||
if isinstance(self.chunk_store, MutableMapping): | ||
if isinstance(self.chunk_store, KVStore): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this being changed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since all store classes derives from |
||
cdata = ensure_bytes(cdata) | ||
|
||
return cdata | ||
|
@@ -2354,12 +2387,22 @@ def hexdigest(self, hashname="sha1"): | |
return checksum | ||
|
||
def __getstate__(self): | ||
return (self._store, self._path, self._read_only, self._chunk_store, | ||
self._synchronizer, self._cache_metadata, self._attrs.cache, | ||
self._partial_decompress, self._write_empty_chunks, self._version) | ||
return { | ||
"store": self._store, | ||
"path": self._path, | ||
"read_only": self._read_only, | ||
"chunk_store": self._chunk_store, | ||
"synchronizer": self._synchronizer, | ||
"cache_metadata": self._cache_metadata, | ||
"cache_attrs": self._attrs.cache, | ||
"partial_decompress": self._partial_decompress, | ||
"write_empty_chunks": self._write_empty_chunks, | ||
"zarr_version": self._version, | ||
"meta_array": self._meta_array, | ||
} | ||
|
||
def __setstate__(self, state): | ||
self.__init__(*state) | ||
self.__init__(**state) | ||
|
||
def _synchronized_op(self, f, *args, **kwargs): | ||
|
||
|
@@ -2466,7 +2509,7 @@ def append(self, data, axis=0): | |
|
||
Parameters | ||
---------- | ||
data : array_like | ||
data : array-like | ||
Data to be appended. | ||
axis : int | ||
Axis along which to append. | ||
|
@@ -2502,7 +2545,7 @@ def _append_nosync(self, data, axis=0): | |
|
||
# ensure data is array-like | ||
if not hasattr(data, 'shape'): | ||
data = np.asanyarray(data) | ||
data = np.asanyarray(data, like=self._meta_array) | ||
|
||
# ensure shapes are compatible for non-append dimensions | ||
self_shape_preserved = tuple(s for i, s in enumerate(self._shape) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if this could be picked up from the
Store
itself. That way aStore
could specify its return type (NumPy or otherwise). This would save the user from getting involved as much in the process and hopefully make it easier for them to get started. WDYT?@grlee77 would be good to get your thoughts as well in light of the
BaseStore
work 🙂There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would be a good idea to have
Store
define a defaultmeta_array
but in the general case, I think aStore
should be able to handle both NumPy and other types like CuPy arrays simultaneously.