Skip to content

Commit 222cff8

Browse files
committed
Merge pull request #477 from shoyer/h5netcdf-bytes-attrs
Bytes attributes are decoded to strings with engine='h5netcdf'
2 parents 7c9a2fe + 71c30f7 commit 222cff8

File tree

3 files changed

+47
-8
lines changed

3 files changed

+47
-8
lines changed

doc/whats-new.rst

+2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ Bug fixes
5858
- Fixed a bug in serializing scalar datetime variable to netCDF.
5959
- Fixed a bug that could occur in serialization of 0-dimensional integer arrays.
6060
- Fixed a bug where concatenating DataArrays was not always lazy (:issue:`464`).
61+
- When reading datasets with h5netcdf, bytes attributes are decoded to strings.
62+
This allows conventions decoding to work properly on Python 3 (:issue:`451`).
6163
6264
v0.5.1 (15 June 2015)
6365
---------------------

xray/backends/h5netcdf_.py

+25-7
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,33 @@
22
from .. import Variable
33
from ..conventions import cf_encoder
44
from ..core import indexing
5-
from ..core.utils import FrozenOrderedDict, close_on_error
6-
from ..core.pycompat import iteritems, basestring, unicode_type, OrderedDict
5+
from ..core.utils import FrozenOrderedDict, close_on_error, Frozen
6+
from ..core.pycompat import iteritems, bytes_type, unicode_type, OrderedDict
77

88
from .common import AbstractWritableDataStore
99
from .netCDF4_ import _nc4_group, _nc4_values_and_dtype
1010

1111

12+
def maybe_decode_bytes(txt):
13+
if isinstance(txt, bytes_type):
14+
return txt.decode('utf-8')
15+
else:
16+
return txt
17+
18+
19+
def _read_attributes(h5netcdf_var):
20+
# GH451
21+
# to ensure conventions decoding works properly on Python 3, decode all
22+
# bytes attributes to strings
23+
attrs = OrderedDict()
24+
for k in h5netcdf_var.ncattrs():
25+
v = h5netcdf_var.getncattr(k)
26+
if k not in ['_FillValue', 'missing_value']:
27+
v = maybe_decode_bytes(v)
28+
attrs[k] = v
29+
return attrs
30+
31+
1232
class H5NetCDFStore(AbstractWritableDataStore):
1333
"""Store for reading and writing data via h5netcdf
1434
"""
@@ -33,8 +53,7 @@ def store(self, variables, attributes):
3353
def open_store_variable(self, var):
3454
dimensions = var.dimensions
3555
data = indexing.LazilyIndexedArray(var)
36-
attributes = OrderedDict((k, var.getncattr(k))
37-
for k in var.ncattrs())
56+
attrs = _read_attributes(var)
3857

3958
# netCDF4 specific encoding
4059
encoding = dict(var.filters())
@@ -44,15 +63,14 @@ def open_store_variable(self, var):
4463
# save source so __repr__ can detect if it's local or not
4564
encoding['source'] = self._filename
4665

47-
return Variable(dimensions, data, attributes, encoding)
66+
return Variable(dimensions, data, attrs, encoding)
4867

4968
def get_variables(self):
5069
return FrozenOrderedDict((k, self.open_store_variable(v))
5170
for k, v in iteritems(self.ds.variables))
5271

5372
def get_attrs(self):
54-
return FrozenOrderedDict((k, self.ds.getncattr(k))
55-
for k in self.ds.ncattrs())
73+
return Frozen(_read_attributes(self.ds))
5674

5775
def get_dimensions(self):
5876
return self.ds.dimensions

xray/test/test_backends.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,7 @@ def test_engine(self):
661661
with self.assertRaisesRegexp(ValueError, 'can only read'):
662662
open_dataset(BytesIO(netcdf_bytes), engine='foobar')
663663

664-
def test_cross_engine_read_write(self):
664+
def test_cross_engine_read_write_netcdf3(self):
665665
data = create_test_data()
666666
valid_engines = set()
667667
if has_netCDF4:
@@ -704,6 +704,25 @@ def test_complex(self):
704704
with self.roundtrip(expected) as actual:
705705
self.assertDatasetEqual(expected, actual)
706706

707+
def test_cross_engine_read_write_netcdf4(self):
708+
data = create_test_data().drop('dim3')
709+
data.attrs['foo'] = 'bar'
710+
valid_engines = ['netcdf4', 'h5netcdf']
711+
for write_engine in valid_engines:
712+
with create_tmp_file() as tmp_file:
713+
data.to_netcdf(tmp_file, engine=write_engine)
714+
for read_engine in valid_engines:
715+
with open_dataset(tmp_file, engine=read_engine) as actual:
716+
self.assertDatasetIdentical(data, actual)
717+
718+
def test_read_byte_attrs_as_unicode(self):
719+
with create_tmp_file() as tmp_file:
720+
with nc4.Dataset(tmp_file, 'w') as nc:
721+
nc.foo = b'bar'
722+
actual = open_dataset(tmp_file)
723+
expected = Dataset(attrs={'foo': 'bar'})
724+
self.assertDatasetIdentical(expected, actual)
725+
707726

708727
@requires_dask
709728
@requires_netCDF4

0 commit comments

Comments
 (0)