Skip to content

Commit 07f634b

Browse files
committed
added implementation of low-level blosc functions with simple test; cc #21
1 parent 2e39ddd commit 07f634b

File tree

5 files changed

+278
-6
lines changed

5 files changed

+278
-6
lines changed

zarr/PythonHelper.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/*
2+
* This header is to help compile on both Python
3+
* 2 and Python 3 with the same Cython source code.
4+
*/
5+
#ifndef PYTHON_HELPER_H
6+
#define PYTHON_HELPER_H
7+
8+
#include "Python.h"
9+
10+
#if PY_VERSION_HEX >= 0x03000000
11+
#define PyBuffer_FromMemory(ptr, size) PyMemoryView_FromMemory(ptr, size, PyBUF_READ|PyBUF_WRITE)
12+
#endif
13+
14+
#endif /* PYTHON_HELPER_H */

zarr/blosc.pyx

Lines changed: 135 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,74 @@
22
from __future__ import absolute_import, print_function, division
33

44

5+
import sys
6+
import ctypes
7+
# noinspection PyUnresolvedReferences
58
import numpy as np
69
cimport numpy as np
10+
# noinspection PyUnresolvedReferences
11+
from libc.stdint cimport uintptr_t
12+
# noinspection PyUnresolvedReferences
13+
from .definitions cimport malloc, realloc, free, PyBytes_AsString
714

815

9-
def decompress(bytes cdata, np.ndarray array):
16+
PY2 = sys.version_info[0] == 2
17+
18+
19+
cdef extern from "blosc.h":
20+
cdef enum:
21+
BLOSC_MAX_OVERHEAD,
22+
BLOSC_VERSION_STRING,
23+
BLOSC_VERSION_DATE
24+
25+
void blosc_init()
26+
void blosc_destroy()
27+
int blosc_set_nthreads(int nthreads)
28+
int blosc_set_compressor(const char *compname)
29+
int blosc_compress(int clevel, int doshuffle, size_t typesize,
30+
size_t nbytes, void *src, void *dest,
31+
size_t destsize) nogil
32+
int blosc_decompress(void *src, void *dest, size_t destsize) nogil
33+
int blosc_compname_to_compcode(const char *compname)
34+
int blosc_compress_ctx(int clevel, int doshuffle, size_t typesize,
35+
size_t nbytes, const void* src, void* dest,
36+
size_t destsize, const char* compressor,
37+
size_t blocksize, int numinternalthreads) nogil
38+
int blosc_decompress_ctx(const void *src, void *dest, size_t destsize,
39+
int numinternalthreads) nogil
40+
void blosc_cbuffer_sizes(void *cbuffer, size_t *nbytes,
41+
size_t *cbytes, size_t *blocksize)
42+
43+
44+
def version():
45+
"""Return the version of blosc that zarr was compiled with."""
46+
47+
ver_str = <char*> BLOSC_VERSION_STRING
48+
ver_date = <char*> BLOSC_VERSION_DATE
49+
if not PY2:
50+
ver_str = ver_str.decode()
51+
ver_date = ver_date.decode()
52+
return ver_str, ver_date
53+
54+
55+
def init():
56+
blosc_init()
57+
58+
59+
def destroy():
60+
blosc_destroy()
61+
62+
63+
def compname_to_compcode(cname):
64+
if not isinstance(cname, bytes):
65+
cname = cname.encode('ascii')
66+
ccode = blosc_compname_to_compcode(cname)
67+
if ccode < 0:
68+
raise ValueError('compressor not available: %r' % cname)
69+
return ccode
70+
71+
72+
def decompress(bytes cdata, np.ndarray array, use_context):
1073
"""Decompress data into a numpy array.
1174
1275
Parameters
@@ -15,18 +78,42 @@ def decompress(bytes cdata, np.ndarray array):
1578
Compressed data, including blosc header.
1679
array : ndarray
1780
Numpy array to decompress into.
81+
use_context : bool
82+
If True, use blosc contextual mode. Otherwise use global locking mode.
1883
1984
Notes
2085
-----
2186
Assumes that the size of the destination array is correct for the size of
2287
the uncompressed data.
2388
2489
"""
25-
# TODO
26-
pass
2790

91+
cdef:
92+
int ret
93+
char* source
94+
char* dest
95+
size_t nbytes
2896

29-
def compress(np.ndarray array, bytes cname, int clevel, int shuffle):
97+
# setup
98+
source = PyBytes_AsString(cdata)
99+
dest = array.data
100+
nbytes = array.nbytes
101+
102+
# perform decompression
103+
if use_context:
104+
with nogil:
105+
ret = blosc_decompress_ctx(source, dest, nbytes, 1)
106+
107+
else:
108+
ret = blosc_decompress(source, dest, nbytes)
109+
110+
# handle errors
111+
if ret <= 0:
112+
raise RuntimeError('error during blosc decompression: %d' % ret)
113+
114+
115+
def compress(np.ndarray array, char* cname, int clevel, int shuffle,
116+
use_context):
30117
"""Compress data in a numpy array.
31118
32119
Parameters
@@ -39,12 +126,54 @@ def compress(np.ndarray array, bytes cname, int clevel, int shuffle):
39126
Compression level.
40127
shuffle : int
41128
Shuffle filter.
129+
use_context : bool
130+
If True, use blosc contextual mode. Otherwise use global locking mode.
42131
43132
Returns
44133
-------
45134
cdata : bytes
46135
Compressed data.
47136
48137
"""
49-
# TODO
50-
pass
138+
139+
cdef:
140+
char* source
141+
char* dest
142+
char* cdata
143+
size_t nbytes, cbytes, itemsize
144+
bytes cdata_bytes
145+
146+
# obtain reference to underlying buffer
147+
source = array.data
148+
149+
# allocate memory for compressed data
150+
nbytes = array.nbytes
151+
itemsize = array.dtype.itemsize
152+
dest = <char*> malloc(nbytes + BLOSC_MAX_OVERHEAD)
153+
154+
# perform compression
155+
if use_context:
156+
with nogil:
157+
cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes,
158+
source, dest,
159+
nbytes + BLOSC_MAX_OVERHEAD, cname,
160+
0, 1)
161+
162+
else:
163+
compressor_set = blosc_set_compressor(cname)
164+
if compressor_set < 0:
165+
raise ValueError('compressor not supported: %r' % cname)
166+
cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, source,
167+
dest, nbytes + BLOSC_MAX_OVERHEAD)
168+
169+
# check compression was successful
170+
if cbytes <= 0:
171+
raise RuntimeError('error during blosc compression: %d' % cbytes)
172+
173+
# free the unused memory
174+
cdata = <char*> realloc(dest, cbytes)
175+
176+
# store as bytes
177+
cdata_bytes = ctypes.string_at(<uintptr_t> cdata, cbytes)
178+
179+
return cdata_bytes

zarr/definitions.pxd

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
########################################################################
2+
#
3+
# License: BSD
4+
# Created: August 05, 2010
5+
# Author: Francesc Alted - [email protected]
6+
#
7+
########################################################################
8+
9+
"""Here are some definitions for some C headers dependencies."""
10+
11+
import sys
12+
13+
# Standard C functions.
14+
cdef extern from "stdlib.h":
15+
ctypedef long size_t
16+
ctypedef long uintptr_t
17+
void *malloc(size_t size)
18+
void *realloc(void *ptr, size_t size)
19+
void free(void *ptr)
20+
21+
cdef extern from "string.h":
22+
char *strchr(char *s, int c)
23+
char *strcpy(char *dest, char *src)
24+
char *strncpy(char *dest, char *src, size_t n)
25+
int strcmp(char *s1, char *s2)
26+
char *strdup(char *s)
27+
void *memcpy(void *dest, void *src, size_t n)
28+
void *memset(void *s, int c, size_t n)
29+
30+
cdef extern from "time.h":
31+
ctypedef int time_t
32+
33+
34+
#-----------------------------------------------------------------------------
35+
36+
# Some helper routines from the Python API
37+
# PythonHelper.h is used to help make
38+
# python 2 and 3 both work.
39+
cdef extern from "PythonHelper.h":
40+
41+
# special types
42+
ctypedef int Py_ssize_t
43+
44+
# references
45+
void Py_INCREF(object)
46+
void Py_DECREF(object)
47+
48+
# To release global interpreter lock (GIL) for threading
49+
void Py_BEGIN_ALLOW_THREADS()
50+
void Py_END_ALLOW_THREADS()
51+
52+
# Functions for integers
53+
object PyInt_FromLong(long)
54+
long PyInt_AsLong(object)
55+
object PyLong_FromLongLong(long long)
56+
long long PyLong_AsLongLong(object)
57+
58+
# Functions for floating points
59+
object PyFloat_FromDouble(double)
60+
61+
# Functions for strings
62+
object PyBytes_FromString(char *)
63+
object PyBytes_FromStringAndSize(char *s, int len)
64+
char *PyBytes_AsString(object string)
65+
char *PyBytes_AS_STRING(object string)
66+
size_t PyBytes_GET_SIZE(object string)
67+
68+
# Functions for lists
69+
int PyList_Append(object list, object item)
70+
71+
# Functions for tuples
72+
object PyTuple_New(int)
73+
int PyTuple_SetItem(object, int, object)
74+
object PyTuple_GetItem(object, int)
75+
int PyTuple_Size(object tuple)
76+
77+
# Functions for dicts
78+
int PyDict_Contains(object p, object key)
79+
object PyDict_GetItem(object p, object key)
80+
81+
# Functions for objects
82+
object PyObject_GetItem(object o, object key)
83+
int PyObject_SetItem(object o, object key, object v)
84+
int PyObject_DelItem(object o, object key)
85+
long PyObject_Length(object o)
86+
int PyObject_Compare(object o1, object o2)
87+
int PyObject_AsReadBuffer(object obj, void **buffer, Py_ssize_t *buffer_len)
88+
89+
# Functions for buffers
90+
object PyBuffer_FromMemory(void *ptr, Py_ssize_t size)
91+
92+
ctypedef unsigned int Py_uintptr_t
93+
94+
95+
#-----------------------------------------------------------------------------
96+
97+
98+
## Local Variables:
99+
## mode: python
100+
## tab-width: 4
101+
## fill-column: 78
102+
## End:

zarr/tests/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, print_function, division
3+
4+

zarr/tests/test_blosc.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, print_function, division
3+
4+
5+
import numpy as np
6+
from numpy.testing import assert_array_equal
7+
8+
9+
from zarr.blosc import compress, decompress
10+
11+
12+
def test_round_trip():
13+
14+
for use_context in True, False:
15+
16+
a = np.arange(1000, dtype='i4')
17+
cdata = compress(a, b'blosclz', 5, 1, use_context)
18+
assert isinstance(cdata, bytes)
19+
assert len(cdata) < a.nbytes
20+
21+
b = np.empty_like(a)
22+
decompress(cdata, b, use_context)
23+
assert_array_equal(a, b)

0 commit comments

Comments
 (0)