Skip to content

Commit 6454f5c

Browse files
committed
resolves #7 adding support for Fortran order within chunks
1 parent e8c3791 commit 6454f5c

10 files changed

+154
-59
lines changed

PERSISTENCE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ For example::
4848
},
4949
"dtype": "<i4",
5050
"fill_value": 42,
51+
"order": "C",
5152
"shape": [
5253
1000000,
5354
1000

README.rst

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@ Create an array:
5555
>>> import zarr
5656
>>> z = zarr.empty(shape=(10000, 1000), dtype='i4', chunks=(1000, 100))
5757
>>> z
58-
zarr.core.Array((10000, 1000), int32, chunks=(1000, 100))
58+
zarr.core.Array((10000, 1000), int32, chunks=(1000, 100), order=C)
5959
compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz', 'shuffle': 1}
60-
nbytes: 38.1M; nbytes_stored: 300; ratio: 133333.3; initialized: 0/100
60+
nbytes: 38.1M; nbytes_stored: 318; ratio: 125786.2; initialized: 0/100
6161
store: builtins.dict
6262
6363
Fill it with some data:
@@ -66,7 +66,7 @@ Fill it with some data:
6666
6767
>>> z[:] = np.arange(10000000, dtype='i4').reshape(10000, 1000)
6868
>>> z
69-
zarr.core.Array((10000, 1000), int32, chunks=(1000, 100))
69+
zarr.core.Array((10000, 1000), int32, chunks=(1000, 100), order=C)
7070
compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz', 'shuffle': 1}
7171
nbytes: 38.1M; nbytes_stored: 2.0M; ratio: 19.3; initialized: 100/100
7272
store: builtins.dict
@@ -106,13 +106,13 @@ Resize the array and add more data:
106106
107107
>>> z.resize(20000, 1000)
108108
>>> z
109-
zarr.core.Array((20000, 1000), int32, chunks=(1000, 100))
109+
zarr.core.Array((20000, 1000), int32, chunks=(1000, 100), order=C)
110110
compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz', 'shuffle': 1}
111111
nbytes: 76.3M; nbytes_stored: 2.0M; ratio: 38.5; initialized: 100/200
112112
store: builtins.dict
113113
>>> z[10000:, :] = np.arange(10000000, dtype='i4').reshape(10000, 1000)
114114
>>> z
115-
zarr.core.Array((20000, 1000), int32, chunks=(1000, 100))
115+
zarr.core.Array((20000, 1000), int32, chunks=(1000, 100), order=C)
116116
compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz', 'shuffle': 1}
117117
nbytes: 76.3M; nbytes_stored: 4.0M; ratio: 19.3; initialized: 200/200
118118
store: builtins.dict
@@ -126,13 +126,13 @@ append data to any axis:
126126
>>> z = zarr.array(a, chunks=(1000, 100))
127127
>>> z.append(a+a)
128128
>>> z
129-
zarr.core.Array((20000, 1000), int32, chunks=(1000, 100))
129+
zarr.core.Array((20000, 1000), int32, chunks=(1000, 100), order=C)
130130
compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz', 'shuffle': 1}
131131
nbytes: 76.3M; nbytes_stored: 3.6M; ratio: 21.2; initialized: 200/200
132132
store: builtins.dict
133133
>>> z.append(np.vstack([a, a]), axis=1)
134134
>>> z
135-
zarr.core.Array((20000, 2000), int32, chunks=(1000, 100))
135+
zarr.core.Array((20000, 2000), int32, chunks=(1000, 100), order=C)
136136
compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz', 'shuffle': 1}
137137
nbytes: 152.6M; nbytes_stored: 7.6M; ratio: 20.2; initialized: 400/400
138138
store: builtins.dict
@@ -148,7 +148,7 @@ Create a persistent array (data stored on disk):
148148
>>> z = zarr.open(path, mode='w', shape=(10000, 1000), dtype='i4', chunks=(1000, 100))
149149
>>> z[:] = np.arange(10000000, dtype='i4').reshape(10000, 1000)
150150
>>> z
151-
zarr.core.Array((10000, 1000), int32, chunks=(1000, 100))
151+
zarr.core.Array((10000, 1000), int32, chunks=(1000, 100), order=C)
152152
compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz', 'shuffle': 1}
153153
nbytes: 38.1M; nbytes_stored: 2.0M; ratio: 19.3; initialized: 100/100
154154
store: zarr.mappings.DirectoryMap

zarr/core.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,16 @@
1212
from zarr.compression import get_compressor_cls
1313
from zarr.util import is_total_slice, normalize_array_selection, \
1414
get_chunk_range, human_readable_size, normalize_shape, normalize_chunks, \
15-
normalize_resize_args
15+
normalize_resize_args, normalize_order
1616
from zarr.meta import decode_metadata, encode_metadata
1717
from zarr.attrs import Attributes, SynchronizedAttributes
1818
from zarr.compat import itervalues
1919
from zarr.errors import ReadOnlyError
2020

2121

2222
def init_store(store, shape, chunks, dtype=None, compression='blosc',
23-
compression_opts=None, fill_value=None, overwrite=False):
23+
compression_opts=None, fill_value=None,
24+
order='C', overwrite=False):
2425
"""Initialise an array store with the given configuration."""
2526

2627
# guard conditions
@@ -36,14 +37,15 @@ def init_store(store, shape, chunks, dtype=None, compression='blosc',
3637
compression_opts = compressor_cls.normalize_opts(
3738
compression_opts
3839
)
40+
order = normalize_order(order)
3941

4042
# delete any pre-existing items in store
4143
store.clear()
4244

4345
# initialise metadata
4446
meta = dict(shape=shape, chunks=chunks, dtype=dtype,
4547
compression=compression, compression_opts=compression_opts,
46-
fill_value=fill_value)
48+
fill_value=fill_value, order=order)
4749
store['meta'] = encode_metadata(meta)
4850

4951
# initialise attributes
@@ -69,9 +71,9 @@ def __init__(self, store, readonly=False):
6971
>>> zarr.init_store(store, shape=1000, chunks=100)
7072
>>> z = zarr.Array(store)
7173
>>> z
72-
zarr.core.Array((1000,), float64, chunks=(100,))
74+
zarr.core.Array((1000,), float64, chunks=(100,), order=C)
7375
compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz', 'shuffle': 1}
74-
nbytes: 7.8K; nbytes_stored: 271; ratio: 29.5; initialized: 0/10
76+
nbytes: 7.8K; nbytes_stored: 289; ratio: 27.7; initialized: 0/10
7577
store: builtins.dict
7678
7779
""" # flake8: noqa
@@ -96,6 +98,7 @@ def __init__(self, store, readonly=False):
9698
self.compression = meta['compression']
9799
self.compression_opts = meta['compression_opts']
98100
self.fill_value = meta['fill_value']
101+
self.order = meta['order']
99102
compressor_cls = get_compressor_cls(self.compression)
100103
self.compressor = compressor_cls(self.compression_opts)
101104

@@ -106,7 +109,7 @@ def flush_metadata(self):
106109
meta = dict(shape=self.shape, chunks=self.chunks, dtype=self.dtype,
107110
compression=self.compression,
108111
compression_opts=self.compression_opts,
109-
fill_value=self.fill_value)
112+
fill_value=self.fill_value, order=self.order)
110113
self.store['meta'] = encode_metadata(meta)
111114

112115
@property
@@ -156,7 +159,7 @@ def __getitem__(self, item):
156159
out_shape = tuple(stop - start for start, stop in selection)
157160

158161
# setup output array
159-
out = np.empty(out_shape, dtype=self.dtype)
162+
out = np.empty(out_shape, dtype=self.dtype, order=self.order)
160163

161164
# determine indices of chunks overlapping the selection
162165
chunk_range = get_chunk_range(selection, self.chunks)
@@ -263,17 +266,20 @@ def _chunk_getitem(self, cidx, item, dest):
263266

264267
else:
265268

266-
if is_total_slice(item, self.chunks) and dest.flags.c_contiguous:
269+
if is_total_slice(item, self.chunks) and \
270+
((self.order == 'C' and dest.flags.c_contiguous) or
271+
(self.order == 'F' and dest.flags.f_contiguous)):
267272

268273
# optimisation: we want the whole chunk, and the destination is
269-
# C contiguous, so we can decompress directly from the chunk
274+
# contiguous, so we can decompress directly from the chunk
270275
# into the destination array
271276
self.compressor.decompress(cdata, dest)
272277

273278
else:
274279

275280
# decompress chunk
276-
chunk = np.empty(self.chunks, dtype=self.dtype)
281+
chunk = np.empty(self.chunks, dtype=self.dtype,
282+
order=self.order)
277283
self.compressor.decompress(cdata, chunk)
278284

279285
# set data in output array
@@ -305,13 +311,17 @@ def _chunk_setitem(self, cidx, key, value):
305311
if np.isscalar(value):
306312

307313
# setup array filled with value
308-
chunk = np.empty(self.chunks, dtype=self.dtype)
314+
chunk = np.empty(self.chunks, dtype=self.dtype,
315+
order=self.order)
309316
chunk.fill(value)
310317

311318
else:
312319

313-
# ensure array is C contiguous
314-
chunk = np.ascontiguousarray(value, dtype=self.dtype)
320+
# ensure array is contiguous
321+
if self.order == 'F':
322+
chunk = np.asfortranarray(value, dtype=self.dtype)
323+
else:
324+
chunk = np.ascontiguousarray(value, dtype=self.dtype)
315325

316326
else:
317327
# partially replace the contents of this chunk
@@ -325,14 +335,16 @@ def _chunk_setitem(self, cidx, key, value):
325335
except KeyError:
326336

327337
# chunk not initialized
328-
chunk = np.empty(self.chunks, dtype=self.dtype)
338+
chunk = np.empty(self.chunks, dtype=self.dtype,
339+
order=self.order)
329340
if self.fill_value is not None:
330341
chunk.fill(self.fill_value)
331342

332343
else:
333344

334345
# decompress chunk
335-
chunk = np.empty(self.chunks, dtype=self.dtype)
346+
chunk = np.empty(self.chunks, dtype=self.dtype,
347+
order=self.order)
336348
self.compressor.decompress(cdata, chunk)
337349

338350
# modify
@@ -350,6 +362,7 @@ def __repr__(self):
350362
r += '%s' % str(self.shape)
351363
r += ', %s' % str(self.dtype)
352364
r += ', chunks=%s' % str(self.chunks)
365+
r += ', order=%s' % self.order
353366
r += ')'
354367
r += '\n compression: %s' % self.compression
355368
r += '; compression_opts: %s' % str(self.compression_opts)
@@ -469,9 +482,9 @@ def __init__(self, store, synchronizer, readonly=False):
469482
>>> synchronizer = zarr.ThreadSynchronizer()
470483
>>> z = zarr.SynchronizedArray(store, synchronizer)
471484
>>> z
472-
zarr.core.SynchronizedArray((1000,), float64, chunks=(100,))
485+
zarr.core.SynchronizedArray((1000,), float64, chunks=(100,), order=C)
473486
compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz', 'shuffle': 1}
474-
nbytes: 7.8K; nbytes_stored: 271; ratio: 29.5; initialized: 0/10
487+
nbytes: 7.8K; nbytes_stored: 289; ratio: 27.7; initialized: 0/10
475488
store: builtins.dict
476489
synchronizer: zarr.sync.ThreadSynchronizer
477490

0 commit comments

Comments
 (0)