Skip to content

Commit 68cd799

Browse files
committed
some refactoring and documentation
1 parent 24d8cf3 commit 68cd799

File tree

7 files changed

+284
-156
lines changed

7 files changed

+284
-156
lines changed

zarr/array.py

Lines changed: 51 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -9,100 +9,24 @@
99

1010

1111
from zarr.blosc import compress, decompress
12-
13-
14-
def _is_total_slice(item, shape):
15-
"""Determine whether `item` specifies a complete slice of array with the
16-
given `shape`. Used to optimise __setitem__ operations on the Chunk
17-
class."""
18-
19-
if item == Ellipsis:
20-
return True
21-
if item == slice(None):
22-
return True
23-
if isinstance(item, tuple):
24-
return all(
25-
(isinstance(s, slice) and
26-
((s == slice(None)) or (s.stop - s.start == l)))
27-
for s, l in zip(item, shape)
28-
)
29-
return False
30-
31-
32-
def _normalize_axis_selection(item, l):
33-
"""Convenience function to normalize a selection within a single axis
34-
of size `l`."""
35-
36-
if isinstance(item, int):
37-
if item < 0:
38-
# handle wraparound
39-
item = l + item
40-
if item > (l - 1) or item < 0:
41-
raise IndexError('index out of bounds: %s' % item)
42-
return item, item + 1
43-
44-
elif isinstance(item, slice):
45-
if item.step is not None and item.step != 1:
46-
raise NotImplementedError('slice with step not supported')
47-
start = 0 if item.start is None else item.start
48-
stop = l if item.stop is None else item.stop
49-
if start < 0:
50-
start = l + start
51-
if stop < 0:
52-
stop = l + stop
53-
if start < 0 or stop < 0:
54-
raise IndexError('index out of bounds: %s, %s' % (start, stop))
55-
if stop > l:
56-
stop = l
57-
return start, stop
58-
59-
else:
60-
raise ValueError('expected integer or slice, found: %r' % item)
61-
62-
63-
def _normalize_array_selection(item, shape):
64-
"""Convenience function to normalize a selection within an array with
65-
the given `shape`."""
66-
67-
# normalize item
68-
if isinstance(item, int):
69-
item = (item,)
70-
elif isinstance(item, slice):
71-
item = (item,)
72-
elif item == Ellipsis:
73-
item = (slice(None),)
74-
75-
# handle tuple of indices/slices
76-
if isinstance(item, tuple):
77-
78-
# determine start and stop indices for all axes
79-
selection = tuple(_normalize_axis_selection(i, l)
80-
for i, l in zip(item, shape))
81-
82-
# fill out selection if not completely specified
83-
if len(selection) < len(shape):
84-
selection += tuple((0, l) for l in shape[len(selection):])
85-
86-
return selection
87-
88-
else:
89-
raise ValueError('expected indices or slice, found: %r' % item)
90-
91-
92-
def _get_chunk_range(selection, chunks):
93-
"""Convenience function to get a range over all chunk indices,
94-
for iterating over chunks."""
95-
chunk_range = [range(start//l, int(np.ceil(stop/l)))
96-
for (start, stop), l in zip(selection, chunks)]
97-
return chunk_range
12+
from zarr.util import is_total_slice, normalize_array_selection, \
13+
get_chunk_range
9814

9915

10016
class Array(object):
10117

10218
def __init__(self, store):
19+
"""Instantiate an array.
20+
21+
Parameters
22+
----------
23+
store : zarr.store.base.ArrayStore
24+
Array store.
25+
26+
"""
10327
self._store = store
10428

105-
# store configuration metadata
29+
# configuration metadata
10630
self._shape = store.meta['shape']
10731
self._chunks = store.meta['chunks']
10832
self._dtype = store.meta['dtype']
@@ -113,7 +37,7 @@ def __init__(self, store):
11337
self._shuffle = store.meta['shuffle']
11438
self._fill_value = store.meta['fill_value']
11539

116-
# store user-defined attributes
40+
# user-defined attributes
11741
self._attrs = store.attrs
11842

11943
@property
@@ -172,7 +96,7 @@ def nbytes(self):
17296
def __getitem__(self, item):
17397

17498
# normalize selection
175-
selection = _normalize_array_selection(item, self._shape)
99+
selection = normalize_array_selection(item, self._shape)
176100

177101
# determine output array shape
178102
out_shape = tuple(stop - start for start, stop in selection)
@@ -181,7 +105,7 @@ def __getitem__(self, item):
181105
out = np.empty(out_shape, dtype=self._dtype)
182106

183107
# determine indices of chunks overlapping the selection
184-
chunk_range = _get_chunk_range(selection, self._chunks)
108+
chunk_range = get_chunk_range(selection, self._chunks)
185109

186110
# iterate over chunks in range
187111
for cidx in itertools.product(*chunk_range):
@@ -215,10 +139,10 @@ def __array__(self):
215139
def __setitem__(self, key, value):
216140

217141
# normalize selection
218-
selection = _normalize_array_selection(key, self._shape)
142+
selection = normalize_array_selection(key, self._shape)
219143

220144
# determine indices of chunks overlapping the selection
221-
chunk_range = _get_chunk_range(selection, self._chunks)
145+
chunk_range = get_chunk_range(selection, self._chunks)
222146

223147
# iterate over chunks in range
224148
for cidx in itertools.product(*chunk_range):
@@ -250,35 +174,59 @@ def __setitem__(self, key, value):
250174
self._chunk_setitem(cidx, chunk_selection, value[value_selection])
251175

252176
def _chunk_getitem(self, cidx, item, dest):
177+
"""Obtain part or whole of a chunk.
178+
179+
Parameters
180+
----------
181+
cidx : tuple of ints
182+
Indices of the chunk.
183+
item : tuple of slices
184+
Location of region within the chunk.
185+
dest : ndarray
186+
Numpy array to store result in.
187+
188+
"""
253189

254190
# override this in sub-classes, e.g., if need to use a lock
255191

256192
# obtain compressed data for chunk
257193
cdata = self._store.data[cidx]
258194

259-
if _is_total_slice(item, self._chunks) and dest.flags.c_contiguous:
195+
if is_total_slice(item, self._chunks) and dest.flags.c_contiguous:
260196

261197
# optimisation: we want the whole chunk, and the destination is
262198
# C contiguous, so we can decompress directly from the chunk
263199
# into the destination array
264-
decompress(cdata, dest, self._cname, self._clevel, self._shuffle)
200+
decompress(cdata, dest)
265201

266202
else:
267203

268204
# decompress chunk
269205
chunk = np.empty(self._chunks, dtype=self._dtype)
270-
decompress(cdata, chunk, self._cname, self._clevel, self._shuffle)
206+
decompress(cdata, chunk)
271207

272208
# set data in output array
273209
# (split into two lines for profiling)
274210
tmp = chunk[item]
275211
dest[:] = tmp
276212

277213
def _chunk_setitem(self, cidx, key, value):
278-
214+
"""Replace part or whole of a chunk.
215+
216+
Parameters
217+
----------
218+
cidx : tuple of ints
219+
Indices of the chunk.
220+
key : tuple of slices
221+
Location of region within the chunk.
222+
value : scalar or ndarray
223+
Value to set.
224+
225+
"""
226+
279227
# override this in sub-classes, e.g., if need to use a lock
280228

281-
if _is_total_slice(key, self._chunks):
229+
if is_total_slice(key, self._chunks):
282230

283231
# optimisation: we are completely replacing the chunk, so no need
284232
# to access the existing chunk data
@@ -302,7 +250,7 @@ def _chunk_setitem(self, cidx, key, value):
302250

303251
# decompress
304252
chunk = np.empty(self._chunks, dtype=self._dtype)
305-
decompress(cdata, chunk, self._cname, self._clevel, self._shuffle)
253+
decompress(cdata, chunk)
306254

307255
# modify
308256
chunk[key] = value
@@ -329,13 +277,15 @@ def append(self, data, axis=0):
329277
# TODO
330278
pass
331279

332-
# TODO
333-
334280

335281
class SynchronizedArray(Array):
336282

337283
def __init__(self, store, synchronizer):
338284
super(SynchronizedArray, self).__init__(store)
339285
self._synchronizer = synchronizer
340286

341-
# TODO
287+
def _chunk_setitem(self, cidx, key, value):
288+
with self._synchronizer.lock_chunk(cidx):
289+
super(SynchronizedArray, self)._chunk_setitem(cidx, key, value)
290+
291+
# TODO synchronize anything else?

zarr/blosc.pyx

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,45 @@ import numpy as np
66
cimport numpy as np
77

88

9-
def decompress(bytes cdata, np.ndarray array, bytes cname, int clevel,
10-
int shuffle):
9+
def decompress(bytes cdata, np.ndarray array):
10+
"""Decompress data into a numpy array.
11+
12+
Parameters
13+
----------
14+
cdata : bytes
15+
Compressed data, including blosc header.
16+
array : ndarray
17+
Numpy array to decompress into.
18+
19+
Notes
20+
-----
21+
Assumes that the size of the destination array is correct for the size of
22+
the uncompressed data.
23+
24+
"""
1125
# TODO
1226
pass
1327

1428

1529
def compress(np.ndarray array, bytes cname, int clevel, int shuffle):
30+
"""Compress data in a numpy array.
31+
32+
Parameters
33+
----------
34+
array : ndarray
35+
Numpy array containing data to be compressed.
36+
cname : bytes
37+
Name of compression library to use.
38+
clevel : int
39+
Compression level.
40+
shuffle : int
41+
Shuffle filter.
42+
43+
Returns
44+
-------
45+
cdata : bytes
46+
Compressed data.
47+
48+
"""
1649
# TODO
1750
pass

zarr/store/base.py

Lines changed: 12 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -6,77 +6,36 @@
66

77

88
class ArrayStore(metaclass=ABCMeta):
9+
"""Abstract class defining the interface for storage of a single array."""
910

1011
@property
1112
@abstractmethod
12-
def meta(self): pass
13-
14-
@property
15-
@abstractmethod
16-
def data(self): pass
17-
18-
@property
19-
@abstractmethod
20-
def attrs(self): pass
21-
22-
@property
23-
@abstractmethod
24-
def cbytes(self): pass
25-
26-
@property
27-
@abstractmethod
28-
def initialized(self): pass
29-
30-
31-
class MemoryStore(ArrayStore):
32-
33-
def __init__(self):
34-
# TODO
35-
pass
36-
37-
@property
38-
def meta(self):
39-
pass
40-
41-
@property
42-
def data(self):
43-
pass
44-
45-
@property
46-
def attrs(self):
47-
pass
48-
49-
@property
50-
def cbytes(self):
51-
pass
52-
53-
@property
54-
def initialized(self):
55-
pass
56-
57-
58-
class DirectoryStore(ArrayStore):
59-
60-
def __init__(self, path):
61-
self._path = path
62-
# TODO
63-
64-
@property
6513
def meta(self):
14+
"""A MutableMapping holding configuration metadata for the array."""
6615
pass
6716

6817
@property
18+
@abstractmethod
6919
def data(self):
20+
"""A MutableMapping holding compressed data for each chunk of the
21+
array."""
7022
pass
7123

7224
@property
25+
@abstractmethod
7326
def attrs(self):
27+
"""A MutableMapping holding user-defined attributes."""
7428
pass
7529

7630
@property
31+
@abstractmethod
7732
def cbytes(self):
33+
"""The total size in number of bytes of compressed data held for the
34+
array."""
7835
pass
7936

8037
@property
38+
@abstractmethod
8139
def initialized(self):
40+
"""The number of chunks that have been initialized."""
8241
pass

0 commit comments

Comments
 (0)