Skip to content

Commit 3555c1b

Browse files
committed
tested array getitem and setitem; cc #21
1 parent 7b043fb commit 3555c1b

File tree

3 files changed

+219
-29
lines changed

3 files changed

+219
-29
lines changed

zarr/array.py

+82-23
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,45 @@
55
from functools import reduce # TODO PY2 compatibility
66
import operator
77
import itertools
8+
import multiprocessing
89
import numpy as np
910

1011

11-
from zarr.blosc import compress, decompress
12+
from zarr import blosc
1213
from zarr.util import is_total_slice, normalize_array_selection, \
1314
get_chunk_range
1415

1516

17+
_blosc_use_context = False
18+
19+
20+
def set_blosc_options(use_context=False, nthreads=None):
21+
"""Set options for how the blosc compressor is used.
22+
23+
Parameters
24+
----------
25+
use_context : bool, optional
26+
If False, blosc will be used in non-contextual mode, which is best
27+
when using zarr in a single-threaded environment because it allows
28+
blosc to use multiple threads internally. If True, blosc will be used
29+
in contextual mode, which is better when using zarr in a
30+
multi-threaded environment like dask.array because it avoids the blosc
31+
global lock and so multiple blosc operations can be running
32+
concurrently.
33+
nthreads : int, optional
34+
Number of internal threads to use when running blosc in non-contextual
35+
mode.
36+
37+
"""
38+
global _blosc_use_context
39+
_blosc_use_context = use_context
40+
if not use_context:
41+
if nthreads is None:
42+
# diminishing returns beyond 4 threads?
43+
nthreads = max(4, multiprocessing.cpu_count())
44+
blosc.set_nthreads(nthreads)
45+
46+
1647
class Array(object):
1748

1849
def __init__(self, store):
@@ -38,6 +69,10 @@ def __init__(self, store):
3869
# user-defined attributes
3970
self._attrs = store.attrs
4071

72+
@property
73+
def store(self):
74+
return self._store
75+
4176
@property
4277
def shape(self):
4378
return self._shape
@@ -170,11 +205,13 @@ def __setitem__(self, key, value):
170205
# determine index within value
171206
value_selection = tuple(
172207
slice(max(0, o - start), min(o + c - start, stop - start))
173-
for (start, stop), o, c, in zip(selection, offset, self._chunks)
208+
for (start, stop), o, c, in zip(selection, offset,
209+
self._chunks)
174210
)
175211

176212
# put data
177-
self._chunk_setitem(cidx, chunk_selection, value[value_selection])
213+
self._chunk_setitem(cidx, chunk_selection,
214+
value[value_selection])
178215

179216
def _chunk_getitem(self, cidx, item, dest):
180217
"""Obtain part or whole of a chunk.
@@ -192,26 +229,36 @@ def _chunk_getitem(self, cidx, item, dest):
192229

193230
# override this in sub-classes, e.g., if need to use a lock
194231

195-
# obtain compressed data for chunk
196-
cdata = self._store.data[cidx]
232+
try:
233+
234+
# obtain compressed data for chunk
235+
cdata = self._store.data[cidx]
197236

198-
if is_total_slice(item, self._chunks) and dest.flags.c_contiguous:
237+
except KeyError:
199238

200-
# optimisation: we want the whole chunk, and the destination is
201-
# C contiguous, so we can decompress directly from the chunk
202-
# into the destination array
203-
decompress(cdata, dest)
239+
# chunk not initialized
240+
if self._fill_value is not None:
241+
dest.fill(self._fill_value)
204242

205243
else:
206244

207-
# decompress chunk
208-
chunk = np.empty(self._chunks, dtype=self._dtype)
209-
decompress(cdata, chunk)
245+
if is_total_slice(item, self._chunks) and dest.flags.c_contiguous:
210246

211-
# set data in output array
212-
# (split into two lines for profiling)
213-
tmp = chunk[item]
214-
dest[:] = tmp
247+
# optimisation: we want the whole chunk, and the destination is
248+
# C contiguous, so we can decompress directly from the chunk
249+
# into the destination array
250+
blosc.decompress(cdata, dest, _blosc_use_context)
251+
252+
else:
253+
254+
# decompress chunk
255+
chunk = np.empty(self._chunks, dtype=self._dtype)
256+
blosc.decompress(cdata, chunk, _blosc_use_context)
257+
258+
# set data in output array
259+
# (split into two lines for profiling)
260+
tmp = chunk[item]
261+
dest[:] = tmp
215262

216263
def _chunk_setitem(self, cidx, key, value):
217264
"""Replace part or whole of a chunk.
@@ -248,18 +295,30 @@ def _chunk_setitem(self, cidx, key, value):
248295
else:
249296
# partially replace the contents of this chunk
250297

251-
# obtain compressed data for chunk
252-
cdata = self._store.data[cidx]
298+
try:
299+
300+
# obtain compressed data for chunk
301+
cdata = self._store.data[cidx]
302+
303+
except KeyError:
304+
305+
# chunk not initialized
306+
chunk = np.empty(self.chunks, dtype=self._dtype)
307+
if self._fill_value is not None:
308+
chunk.fill(self._fill_value)
309+
310+
else:
253311

254-
# decompress
255-
chunk = np.empty(self._chunks, dtype=self._dtype)
256-
decompress(cdata, chunk)
312+
# decompress chunk
313+
chunk = np.empty(self.chunks, dtype=self._dtype)
314+
blosc.decompress(cdata, chunk, _blosc_use_context)
257315

258316
# modify
259317
chunk[key] = value
260318

261319
# compress
262-
cdata = compress(chunk, self._cname, self._clevel, self._shuffle)
320+
cdata = blosc.compress(chunk, self._cname, self._clevel,
321+
self._shuffle, _blosc_use_context)
263322

264323
# store
265324
self._store.data[cidx] = cdata

zarr/blosc.pyx

+4-2
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ cdef extern from "blosc.h":
3737
size_t blocksize, int numinternalthreads) nogil
3838
int blosc_decompress_ctx(const void *src, void *dest, size_t destsize,
3939
int numinternalthreads) nogil
40-
void blosc_cbuffer_sizes(void *cbuffer, size_t *nbytes,
41-
size_t *cbytes, size_t *blocksize)
4240

4341

4442
def version():
@@ -64,6 +62,10 @@ def compname_to_compcode(bytes cname):
6462
return blosc_compname_to_compcode(cname)
6563

6664

65+
def set_nthreads(int nthreads):
66+
blosc_set_nthreads(nthreads)
67+
68+
6769
def decompress(bytes cdata, np.ndarray array, use_context):
6870
"""Decompress data into a numpy array.
6971

zarr/tests/test_array.py

+133-4
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,24 @@
33

44

55
import numpy as np
6-
from nose.tools import eq_ as eq
6+
from numpy.testing import assert_array_equal
7+
from nose.tools import eq_ as eq, assert_is_instance
78

89

910
from zarr.store.memory import MemoryStore
1011
from zarr.array import Array
1112
from zarr import defaults
1213

1314

15+
def create_array(shape, chunks, **kwargs):
16+
store = MemoryStore(shape, chunks, **kwargs)
17+
return Array(store)
18+
19+
1420
def test_1d():
1521

1622
a = np.arange(1050)
17-
store = MemoryStore(a.shape, chunks=100, dtype=a.dtype)
18-
z = Array(store)
23+
z = create_array(a.shape, chunks=100, dtype=a.dtype)
1924

2025
# check properties
2126
eq(a.shape, z.shape)
@@ -28,4 +33,128 @@ def test_1d():
2833
eq(0, z.cbytes)
2934
eq(0, z.initialized)
3035

31-
# TODO
36+
# check empty
37+
b = z[:]
38+
assert_is_instance(b, np.ndarray)
39+
eq(a.shape, b.shape)
40+
eq(a.dtype, b.dtype)
41+
42+
# set data
43+
z[:] = a
44+
45+
# check properties
46+
eq(a.nbytes, z.nbytes)
47+
assert z.cbytes > 0
48+
eq(z.store.cbytes, z.cbytes)
49+
eq(11, z.initialized)
50+
51+
# check slicing
52+
assert_array_equal(a, z[:])
53+
assert_array_equal(a, z[...])
54+
# noinspection PyTypeChecker
55+
assert_array_equal(a, z[slice(None)])
56+
assert_array_equal(a[:10], z[:10])
57+
assert_array_equal(a[10:20], z[10:20])
58+
assert_array_equal(a[-10:], z[-10:])
59+
# ...across chunk boundaries...
60+
assert_array_equal(a[:110], z[:110])
61+
assert_array_equal(a[190:310], z[190:310])
62+
assert_array_equal(a[-110:], z[-110:])
63+
64+
# check partial assignment
65+
b = np.arange(1e5, 2e5)
66+
z[190:310] = b[190:310]
67+
assert_array_equal(a[:190], z[:190])
68+
assert_array_equal(b[190:310], z[190:310])
69+
assert_array_equal(a[310:], z[310:])
70+
71+
72+
def test_array_1d_fill_value():
73+
74+
for fill_value in -1, 0, 1, 10:
75+
76+
a = np.arange(1050)
77+
f = np.empty_like(a)
78+
f.fill(fill_value)
79+
z = create_array(shape=a.shape, chunks=100, dtype=a.dtype,
80+
fill_value=fill_value)
81+
z[190:310] = a[190:310]
82+
83+
assert_array_equal(f[:190], z[:190])
84+
assert_array_equal(a[190:310], z[190:310])
85+
assert_array_equal(f[310:], z[310:])
86+
87+
88+
def test_array_1d_set_scalar():
89+
90+
# setup
91+
a = np.empty(100)
92+
z = create_array(shape=a.shape, chunks=10, dtype=a.dtype)
93+
z[:] = a
94+
assert_array_equal(a, z[:])
95+
96+
for value in -1, 0, 1, 10:
97+
a[15:35] = value
98+
z[15:35] = value
99+
assert_array_equal(a, z[:])
100+
a[:] = value
101+
z[:] = value
102+
assert_array_equal(a, z[:])
103+
104+
105+
def test_array_2d():
106+
107+
a = np.arange(10000).reshape((1000, 10))
108+
z = create_array(shape=a.shape, chunks=(100, 2), dtype=a.dtype)
109+
110+
# check properties
111+
eq(a.shape, z.shape)
112+
eq(a.dtype, z.dtype)
113+
eq((100, 2), z.chunks)
114+
eq(defaults.cname, z.cname)
115+
eq(defaults.clevel, z.clevel)
116+
eq(defaults.shuffle, z.shuffle)
117+
eq(0, z.cbytes)
118+
eq(0, z.initialized)
119+
120+
# set data
121+
z[:] = a
122+
123+
# check properties
124+
eq(a.nbytes, z.nbytes)
125+
assert z.cbytes > 0
126+
eq(50, z.initialized)
127+
128+
# check slicing
129+
assert_array_equal(a, z[:])
130+
assert_array_equal(a, z[...])
131+
# noinspection PyTypeChecker
132+
assert_array_equal(a, z[slice(None)])
133+
assert_array_equal(a[:10], z[:10])
134+
assert_array_equal(a[10:20], z[10:20])
135+
assert_array_equal(a[-10:], z[-10:])
136+
assert_array_equal(a[:, :2], z[:, :2])
137+
assert_array_equal(a[:, 2:4], z[:, 2:4])
138+
assert_array_equal(a[:, -2:], z[:, -2:])
139+
assert_array_equal(a[:10, :2], z[:10, :2])
140+
assert_array_equal(a[10:20, 2:4], z[10:20, 2:4])
141+
assert_array_equal(a[-10:, -2:], z[-10:, -2:])
142+
# ...across chunk boundaries...
143+
assert_array_equal(a[:110], z[:110])
144+
assert_array_equal(a[190:310], z[190:310])
145+
assert_array_equal(a[-110:], z[-110:])
146+
assert_array_equal(a[:, :3], z[:, :3])
147+
assert_array_equal(a[:, 3:7], z[:, 3:7])
148+
assert_array_equal(a[:, -3:], z[:, -3:])
149+
assert_array_equal(a[:110, :3], z[:110, :3])
150+
assert_array_equal(a[190:310, 3:7], z[190:310, 3:7])
151+
assert_array_equal(a[-110:, -3:], z[-110:, -3:])
152+
153+
# check partial assignment
154+
b = np.arange(10000, 20000).reshape((1000, 10))
155+
z[190:310, 3:7] = b[190:310, 3:7]
156+
assert_array_equal(a[:190], z[:190])
157+
assert_array_equal(a[:, :3], z[:, :3])
158+
assert_array_equal(b[190:310, 3:7], z[190:310, 3:7])
159+
assert_array_equal(a[310:], z[310:])
160+
assert_array_equal(a[:, 7:], z[:, 7:])

0 commit comments

Comments
 (0)