pydata · shoyer · Jul 16, 2015 · Jul 15, 2015
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -58,6 +58,8 @@ Bug fixes
 - Fixed a bug in serializing scalar datetime variable to netCDF.
 - Fixed a bug that could occur in serialization of 0-dimensional integer arrays.
 - Fixed a bug where concatenating DataArrays was not always lazy (:issue:`464`).
+- When reading datasets with h5netcdf, bytes attributes are decoded to strings.
+  This allows conventions decoding to work properly on Python 3 (:issue:`451`).
 
 v0.5.1 (15 June 2015)
 ---------------------

diff --git a/xray/backends/h5netcdf_.py b/xray/backends/h5netcdf_.py
@@ -2,13 +2,33 @@
 from .. import Variable
 from ..conventions import cf_encoder
 from ..core import indexing
-from ..core.utils import FrozenOrderedDict, close_on_error
-from ..core.pycompat import iteritems, basestring, unicode_type, OrderedDict
+from ..core.utils import FrozenOrderedDict, close_on_error, Frozen
+from ..core.pycompat import iteritems, bytes_type, unicode_type, OrderedDict
 
 from .common import AbstractWritableDataStore
 from .netCDF4_ import _nc4_group, _nc4_values_and_dtype
 
 
+def maybe_decode_bytes(txt):
+    if isinstance(txt, bytes_type):
+        return txt.decode('utf-8')
+    else:
+        return txt
+
+
+def _read_attributes(h5netcdf_var):
+    # GH451
+    # to ensure conventions decoding works properly on Python 3, decode all
+    # bytes attributes to strings
+    attrs = OrderedDict()
+    for k in h5netcdf_var.ncattrs():
+        v = h5netcdf_var.getncattr(k)
+        if k not in ['_FillValue', 'missing_value']:
+            v = maybe_decode_bytes(v)
+        attrs[k] = v
+    return attrs
+
+
 class H5NetCDFStore(AbstractWritableDataStore):
     """Store for reading and writing data via h5netcdf
     """
@@ -33,8 +53,7 @@ def store(self, variables, attributes):
     def open_store_variable(self, var):
         dimensions = var.dimensions
         data = indexing.LazilyIndexedArray(var)
-        attributes = OrderedDict((k, var.getncattr(k))
-                                 for k in var.ncattrs())
+        attrs = _read_attributes(var)
 
         # netCDF4 specific encoding
         encoding = dict(var.filters())
@@ -44,15 +63,14 @@ def open_store_variable(self, var):
         # save source so __repr__ can detect if it's local or not
         encoding['source'] = self._filename
 
-        return Variable(dimensions, data, attributes, encoding)
+        return Variable(dimensions, data, attrs, encoding)
 
     def get_variables(self):
         return FrozenOrderedDict((k, self.open_store_variable(v))
                                  for k, v in iteritems(self.ds.variables))
 
     def get_attrs(self):
-        return FrozenOrderedDict((k, self.ds.getncattr(k))
-                                 for k in self.ds.ncattrs())
+        return Frozen(_read_attributes(self.ds))
 
     def get_dimensions(self):
         return self.ds.dimensions

diff --git a/xray/test/test_backends.py b/xray/test/test_backends.py
@@ -661,7 +661,7 @@ def test_engine(self):
         with self.assertRaisesRegexp(ValueError, 'can only read'):
             open_dataset(BytesIO(netcdf_bytes), engine='foobar')
 
-    def test_cross_engine_read_write(self):
+    def test_cross_engine_read_write_netcdf3(self):
         data = create_test_data()
         valid_engines = set()
         if has_netCDF4:
@@ -704,6 +704,25 @@ def test_complex(self):
         with self.roundtrip(expected) as actual:
             self.assertDatasetEqual(expected, actual)
 
+    def test_cross_engine_read_write_netcdf4(self):
+        data = create_test_data().drop('dim3')
+        data.attrs['foo'] = 'bar'
+        valid_engines = ['netcdf4', 'h5netcdf']
+        for write_engine in valid_engines:
+            with create_tmp_file() as tmp_file:
+                data.to_netcdf(tmp_file, engine=write_engine)
+                for read_engine in valid_engines:
+                    with open_dataset(tmp_file, engine=read_engine) as actual:
+                        self.assertDatasetIdentical(data, actual)
+
+    def test_read_byte_attrs_as_unicode(self):
+        with create_tmp_file() as tmp_file:
+            with nc4.Dataset(tmp_file, 'w') as nc:
+                nc.foo = b'bar'
+            actual = open_dataset(tmp_file)
+            expected = Dataset(attrs={'foo': 'bar'})
+            self.assertDatasetIdentical(expected, actual)
+
 
 @requires_dask
 @requires_netCDF4