Skip to content

Commit aab5392

Browse files
authored
Bitround Codec (#299)
1 parent eacd6e2 commit aab5392

File tree

5 files changed

+175
-0
lines changed

5 files changed

+175
-0
lines changed

docs/bitround.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Bitround
2+
========
3+
.. automodule:: numcodecs.bitround
4+
5+
.. autoclass:: BitRound
6+
7+
.. autoattribute:: codec_id
8+
.. automethod:: encode
9+
.. automethod:: decode
10+
.. automethod:: get_config
11+
.. automethod:: from_config

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ Contents
7171
delta
7272
fixedscaleoffset
7373
quantize
74+
bitround
7475
packbits
7576
categorize
7677
checksum32

docs/release.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ Unreleased
3636
By :user:`Dimitri Papadopoulos Orfanos <DimitriPapadopoulos>`,
3737
:issue:`295`, :issue:`294`, :issue:`293`, and :issue:`292`.
3838

39+
* Add bitround codec
40+
By :user:`Ryan Abernathy <rabernat>` and :user:`Martin Durant <martindurant>`, :issue:`298`.
41+
3942
* Drop Python 3.6
4043
By :user:`Josh Moore <joshmoore>`, :issue:`318`.
4144

numcodecs/bitround.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import numpy as np
2+
3+
4+
from .abc import Codec
5+
from .compat import ensure_ndarray_like, ndarray_copy
6+
7+
# The size in bits of the mantissa/significand for the various floating types
8+
# You cannot keep more bits of data than you have available
9+
# https://en.wikipedia.org/wiki/IEEE_754
10+
max_bits = {
11+
"float16": 10,
12+
"float32": 23,
13+
"float64": 52,
14+
}
15+
16+
17+
class BitRound(Codec):
18+
"""Floating-point bit rounding codec
19+
20+
Drops a specified number of bits from the floating point mantissa,
21+
leaving an array more amenable to compression. The number of bits to keep should
22+
be determined by an information analysis of the data to be compressed.
23+
The approach is based on the paper by Klöwer et al. 2021
24+
(https://www.nature.com/articles/s43588-021-00156-2). See
25+
https://github.com/zarr-developers/numcodecs/issues/298 for discussion
26+
and the original implementation in Julia referred to at
27+
https://github.com/milankl/BitInformation.jl
28+
29+
Parameters
30+
----------
31+
32+
keepbits: int
33+
The number of bits of the mantissa to keep. The range allowed
34+
depends on the dtype input data. If keepbits is
35+
equal to the maximum allowed for the data type, this is equivalent
36+
to no transform.
37+
"""
38+
39+
codec_id = 'bitround'
40+
41+
def __init__(self, keepbits: int):
42+
if keepbits < 0:
43+
raise ValueError("keepbits must be zero or positive")
44+
self.keepbits = keepbits
45+
46+
def encode(self, buf):
47+
"""Create int array by rounding floating-point data
48+
49+
The itemsize will be preserved, but the output should be much more
50+
compressible.
51+
"""
52+
a = ensure_ndarray_like(buf)
53+
if not a.dtype.kind == "f" or a.dtype.itemsize > 8:
54+
raise TypeError("Only float arrays (16-64bit) can be bit-rounded")
55+
bits = max_bits[str(a.dtype)]
56+
# cast float to int type of same width (preserve endianness)
57+
a_int_dtype = np.dtype(a.dtype.str.replace("f", "i"))
58+
all_set = np.array(-1, dtype=a_int_dtype)
59+
if self.keepbits == bits:
60+
return a
61+
if self.keepbits > bits:
62+
raise ValueError("Keepbits too large for given dtype")
63+
b = a.view(a_int_dtype)
64+
maskbits = bits - self.keepbits
65+
mask = (all_set >> maskbits) << maskbits
66+
half_quantum1 = (1 << (maskbits - 1)) - 1
67+
b += ((b >> maskbits) & 1) + half_quantum1
68+
b &= mask
69+
return b
70+
71+
def decode(self, buf, out=None):
72+
"""Remake floats from ints
73+
74+
As with ``encode``, preserves itemsize.
75+
"""
76+
buf = ensure_ndarray_like(buf)
77+
# Cast back from `int` to `float` type (noop if a `float`ing type buffer is provided)
78+
dt = np.dtype(buf.dtype.str.replace("i", "f"))
79+
data = buf.view(dt)
80+
return ndarray_copy(data, out)

numcodecs/tests/test_bitround.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import numpy as np
2+
3+
import pytest
4+
5+
from numcodecs.bitround import BitRound, max_bits
6+
7+
# adapted from https://github.com/milankl/BitInformation.jl/blob/main/test/round_nearest.jl
8+
9+
10+
# TODO: add other dtypes
11+
@pytest.fixture(params=["float32", "float64"])
12+
def dtype(request):
13+
return request.param
14+
15+
16+
def round(data, keepbits):
17+
codec = BitRound(keepbits=keepbits)
18+
data = data.copy() # otherwise overwrites the input
19+
encoded = codec.encode(data)
20+
return codec.decode(encoded)
21+
22+
23+
def test_round_zero_to_zero(dtype):
24+
a = np.zeros((3, 2), dtype=dtype)
25+
# Don't understand Milan's original test:
26+
# How is it possible to have negative keepbits?
27+
# for k in range(-5, 50):
28+
for k in range(0, max_bits[dtype]):
29+
ar = round(a, k)
30+
np.testing.assert_equal(a, ar)
31+
32+
33+
def test_round_one_to_one(dtype):
34+
a = np.ones((3, 2), dtype=dtype)
35+
for k in range(0, max_bits[dtype]):
36+
ar = round(a, k)
37+
np.testing.assert_equal(a, ar)
38+
39+
40+
def test_round_minus_one_to_minus_one(dtype):
41+
a = -np.ones((3, 2), dtype=dtype)
42+
for k in range(0, max_bits[dtype]):
43+
ar = round(a, k)
44+
np.testing.assert_equal(a, ar)
45+
46+
47+
def test_no_rounding(dtype):
48+
a = np.random.random_sample((300, 200)).astype(dtype)
49+
keepbits = max_bits[dtype]
50+
ar = round(a, keepbits)
51+
np.testing.assert_equal(a, ar)
52+
53+
54+
APPROX_KEEPBITS = {"float32": 11, "float64": 18}
55+
56+
57+
def test_approx_equal(dtype):
58+
a = np.random.random_sample((300, 200)).astype(dtype)
59+
ar = round(a, APPROX_KEEPBITS[dtype])
60+
# Mimic julia behavior - https://docs.julialang.org/en/v1/base/math/#Base.isapprox
61+
rtol = np.sqrt(np.finfo(np.float32).eps)
62+
# This gets us much closer but still failing for ~6% of the array
63+
# It does pass if we add 1 to keepbits (11 instead of 10)
64+
# Is there an off-by-one issue here?
65+
np.testing.assert_allclose(a, ar, rtol=rtol)
66+
67+
68+
def test_idempotence(dtype):
69+
a = np.random.random_sample((300, 200)).astype(dtype)
70+
for k in range(20):
71+
ar = round(a, k)
72+
ar2 = round(a, k)
73+
np.testing.assert_equal(ar, ar2)
74+
75+
76+
def test_errors():
77+
with pytest.raises(ValueError):
78+
BitRound(keepbits=99).encode(np.array([0], dtype="float32"))
79+
with pytest.raises(TypeError):
80+
BitRound(keepbits=10).encode(np.array([0]))

0 commit comments

Comments
 (0)