9
9
10
10
11
11
from zarr .blosc import compress , decompress
12
-
13
-
14
- def _is_total_slice (item , shape ):
15
- """Determine whether `item` specifies a complete slice of array with the
16
- given `shape`. Used to optimise __setitem__ operations on the Chunk
17
- class."""
18
-
19
- if item == Ellipsis :
20
- return True
21
- if item == slice (None ):
22
- return True
23
- if isinstance (item , tuple ):
24
- return all (
25
- (isinstance (s , slice ) and
26
- ((s == slice (None )) or (s .stop - s .start == l )))
27
- for s , l in zip (item , shape )
28
- )
29
- return False
30
-
31
-
32
- def _normalize_axis_selection (item , l ):
33
- """Convenience function to normalize a selection within a single axis
34
- of size `l`."""
35
-
36
- if isinstance (item , int ):
37
- if item < 0 :
38
- # handle wraparound
39
- item = l + item
40
- if item > (l - 1 ) or item < 0 :
41
- raise IndexError ('index out of bounds: %s' % item )
42
- return item , item + 1
43
-
44
- elif isinstance (item , slice ):
45
- if item .step is not None and item .step != 1 :
46
- raise NotImplementedError ('slice with step not supported' )
47
- start = 0 if item .start is None else item .start
48
- stop = l if item .stop is None else item .stop
49
- if start < 0 :
50
- start = l + start
51
- if stop < 0 :
52
- stop = l + stop
53
- if start < 0 or stop < 0 :
54
- raise IndexError ('index out of bounds: %s, %s' % (start , stop ))
55
- if stop > l :
56
- stop = l
57
- return start , stop
58
-
59
- else :
60
- raise ValueError ('expected integer or slice, found: %r' % item )
61
-
62
-
63
- def _normalize_array_selection (item , shape ):
64
- """Convenience function to normalize a selection within an array with
65
- the given `shape`."""
66
-
67
- # normalize item
68
- if isinstance (item , int ):
69
- item = (item ,)
70
- elif isinstance (item , slice ):
71
- item = (item ,)
72
- elif item == Ellipsis :
73
- item = (slice (None ),)
74
-
75
- # handle tuple of indices/slices
76
- if isinstance (item , tuple ):
77
-
78
- # determine start and stop indices for all axes
79
- selection = tuple (_normalize_axis_selection (i , l )
80
- for i , l in zip (item , shape ))
81
-
82
- # fill out selection if not completely specified
83
- if len (selection ) < len (shape ):
84
- selection += tuple ((0 , l ) for l in shape [len (selection ):])
85
-
86
- return selection
87
-
88
- else :
89
- raise ValueError ('expected indices or slice, found: %r' % item )
90
-
91
-
92
- def _get_chunk_range (selection , chunks ):
93
- """Convenience function to get a range over all chunk indices,
94
- for iterating over chunks."""
95
- chunk_range = [range (start // l , int (np .ceil (stop / l )))
96
- for (start , stop ), l in zip (selection , chunks )]
97
- return chunk_range
12
+ from zarr .util import is_total_slice , normalize_array_selection , \
13
+ get_chunk_range
98
14
99
15
100
16
class Array (object ):
101
17
102
18
def __init__ (self , store ):
19
+ """Instantiate an array.
20
+
21
+ Parameters
22
+ ----------
23
+ store : zarr.store.base.ArrayStore
24
+ Array store.
25
+
26
+ """
103
27
self ._store = store
104
28
105
- # store configuration metadata
29
+ # configuration metadata
106
30
self ._shape = store .meta ['shape' ]
107
31
self ._chunks = store .meta ['chunks' ]
108
32
self ._dtype = store .meta ['dtype' ]
@@ -113,7 +37,7 @@ def __init__(self, store):
113
37
self ._shuffle = store .meta ['shuffle' ]
114
38
self ._fill_value = store .meta ['fill_value' ]
115
39
116
- # store user-defined attributes
40
+ # user-defined attributes
117
41
self ._attrs = store .attrs
118
42
119
43
@property
@@ -172,7 +96,7 @@ def nbytes(self):
172
96
def __getitem__ (self , item ):
173
97
174
98
# normalize selection
175
- selection = _normalize_array_selection (item , self ._shape )
99
+ selection = normalize_array_selection (item , self ._shape )
176
100
177
101
# determine output array shape
178
102
out_shape = tuple (stop - start for start , stop in selection )
@@ -181,7 +105,7 @@ def __getitem__(self, item):
181
105
out = np .empty (out_shape , dtype = self ._dtype )
182
106
183
107
# determine indices of chunks overlapping the selection
184
- chunk_range = _get_chunk_range (selection , self ._chunks )
108
+ chunk_range = get_chunk_range (selection , self ._chunks )
185
109
186
110
# iterate over chunks in range
187
111
for cidx in itertools .product (* chunk_range ):
@@ -215,10 +139,10 @@ def __array__(self):
215
139
def __setitem__ (self , key , value ):
216
140
217
141
# normalize selection
218
- selection = _normalize_array_selection (key , self ._shape )
142
+ selection = normalize_array_selection (key , self ._shape )
219
143
220
144
# determine indices of chunks overlapping the selection
221
- chunk_range = _get_chunk_range (selection , self ._chunks )
145
+ chunk_range = get_chunk_range (selection , self ._chunks )
222
146
223
147
# iterate over chunks in range
224
148
for cidx in itertools .product (* chunk_range ):
@@ -250,35 +174,59 @@ def __setitem__(self, key, value):
250
174
self ._chunk_setitem (cidx , chunk_selection , value [value_selection ])
251
175
252
176
def _chunk_getitem (self , cidx , item , dest ):
177
+ """Obtain part or whole of a chunk.
178
+
179
+ Parameters
180
+ ----------
181
+ cidx : tuple of ints
182
+ Indices of the chunk.
183
+ item : tuple of slices
184
+ Location of region within the chunk.
185
+ dest : ndarray
186
+ Numpy array to store result in.
187
+
188
+ """
253
189
254
190
# override this in sub-classes, e.g., if need to use a lock
255
191
256
192
# obtain compressed data for chunk
257
193
cdata = self ._store .data [cidx ]
258
194
259
- if _is_total_slice (item , self ._chunks ) and dest .flags .c_contiguous :
195
+ if is_total_slice (item , self ._chunks ) and dest .flags .c_contiguous :
260
196
261
197
# optimisation: we want the whole chunk, and the destination is
262
198
# C contiguous, so we can decompress directly from the chunk
263
199
# into the destination array
264
- decompress (cdata , dest , self . _cname , self . _clevel , self . _shuffle )
200
+ decompress (cdata , dest )
265
201
266
202
else :
267
203
268
204
# decompress chunk
269
205
chunk = np .empty (self ._chunks , dtype = self ._dtype )
270
- decompress (cdata , chunk , self . _cname , self . _clevel , self . _shuffle )
206
+ decompress (cdata , chunk )
271
207
272
208
# set data in output array
273
209
# (split into two lines for profiling)
274
210
tmp = chunk [item ]
275
211
dest [:] = tmp
276
212
277
213
def _chunk_setitem (self , cidx , key , value ):
278
-
214
+ """Replace part or whole of a chunk.
215
+
216
+ Parameters
217
+ ----------
218
+ cidx : tuple of ints
219
+ Indices of the chunk.
220
+ key : tuple of slices
221
+ Location of region within the chunk.
222
+ value : scalar or ndarray
223
+ Value to set.
224
+
225
+ """
226
+
279
227
# override this in sub-classes, e.g., if need to use a lock
280
228
281
- if _is_total_slice (key , self ._chunks ):
229
+ if is_total_slice (key , self ._chunks ):
282
230
283
231
# optimisation: we are completely replacing the chunk, so no need
284
232
# to access the existing chunk data
@@ -302,7 +250,7 @@ def _chunk_setitem(self, cidx, key, value):
302
250
303
251
# decompress
304
252
chunk = np .empty (self ._chunks , dtype = self ._dtype )
305
- decompress (cdata , chunk , self . _cname , self . _clevel , self . _shuffle )
253
+ decompress (cdata , chunk )
306
254
307
255
# modify
308
256
chunk [key ] = value
@@ -329,13 +277,15 @@ def append(self, data, axis=0):
329
277
# TODO
330
278
pass
331
279
332
- # TODO
333
-
334
280
335
281
class SynchronizedArray (Array ):
336
282
337
283
def __init__ (self , store , synchronizer ):
338
284
super (SynchronizedArray , self ).__init__ (store )
339
285
self ._synchronizer = synchronizer
340
286
341
- # TODO
287
+ def _chunk_setitem (self , cidx , key , value ):
288
+ with self ._synchronizer .lock_chunk (cidx ):
289
+ super (SynchronizedArray , self )._chunk_setitem (cidx , key , value )
290
+
291
+ # TODO synchronize anything else?
0 commit comments