diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fb57a7f8bd838..d33aa614f8067 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -41,6 +41,8 @@ import pandas.algos as algos import pandas.tslib as tslib +from tables.exceptions import NoSuchNodeError, NodeError + from contextlib import contextmanager from distutils.version import LooseVersion @@ -1455,6 +1457,7 @@ def infer(self, handler): """infer this column from the table: create and return a new object""" table = handler.table new_self = self.copy() + new_self._handle = handler._handle new_self.set_table(table) new_self.get_attr() new_self.read_metadata(handler) @@ -1511,6 +1514,10 @@ def cvalues(self): """ return my cython values """ return self.values + @property + def handle(self): + return self._handle + def __iter__(self): return iter(self.values) @@ -1534,6 +1541,7 @@ def validate_names(self): pass def validate_and_set(self, handler, append, **kwargs): + self._handle = handler._handle self.set_table(handler.table) self.validate_col() self.validate_attr(append) @@ -2043,13 +2051,35 @@ def convert(self, values, nan_rep, encoding): def get_attr(self): """ get the data for this colummn """ self.values = getattr(self.attrs, self.kind_attr, None) + if self.values is None: + try: + data = self.handle.get_node(self.attrs._v_node._v_parent, self.kind_attr)[:] + if len(data.shape) > 1 and data.shape[1] > 1: # multiIndex + self.values = map(tuple, data.tolist()) + else: + self.values = data.tolist() + + except NoSuchNodeError: + pass self.dtype = getattr(self.attrs, self.dtype_attr, None) self.meta = getattr(self.attrs, self.meta_attr, None) self.set_kind() def set_attr(self): """ set the data for this colummn """ - setattr(self.attrs, self.kind_attr, self.values) + #setattr(self.attrs, self.kind_attr, self.values) + try: + self.handle.create_carray(self.attrs._v_node._v_parent, + self.kind_attr, + obj=np.array(self.values)) + except NodeError as e: + self.handle.remove_node(self.attrs._v_node._v_parent, + self.kind_attr) + self.handle.create_carray(self.attrs._v_node._v_parent, + self.kind_attr, + obj=np.array(self.values)) + except Exception as e: # for debugging + raise setattr(self.attrs, self.meta_attr, self.meta) if self.dtype is not None: setattr(self.attrs, self.dtype_attr, self.dtype) @@ -3020,12 +3050,50 @@ def set_info(self): """ update our table index info """ self.attrs.info = self.info + def set_non_index_axes(self): + """ Write the axes to carrays """ + def f(dim, flds): + name = "non_index_axes_%d" % dim + try: + self._handle.create_carray(self.attrs._v_node, name, obj=np.array(flds)) + except ValueError as e: + # Should probably make this check: + #if e.message == "unknown type: 'object'": + # raise ValueError("axis {} has dtype 'object' which cannot be saved to carray".format(dim)) + raise + except NodeError as e: + self._handle.remove_node(self.attrs._v_node, name) + self._handle.create_carray(self.attrs._v_node, name, obj=np.array(flds)) + return dim, flds + + replacement = [f(dim, flds) for dim, flds in self.non_index_axes] + self.attrs.non_index_axes = replacement + + def get_non_index_axes(self): + """Load the non-index axes from their carrays. This is a pass-through + for tables stored prior to v0.17""" + def f(dim, flds): + if isinstance(flds, string_types): + flds = self._handle.get_node(self.attrs._v_node, flds)[:] + if len(flds.shape) > 1 and flds.shape[1] > 1: + flds = map(tuple, flds.tolist()) + else: + flds = flds.tolist() + return dim, flds + else: + return dim, flds #if not a string presumably pre v17 list + non_index_axes = getattr(self.attrs, 'non_index_axes', []) + new = [f(dim, flds) for dim, flds in non_index_axes] + return new + def set_attrs(self): """ set our table type & indexables """ self.attrs.table_type = str(self.table_type) self.attrs.index_cols = self.index_cols() self.attrs.values_cols = self.values_cols() - self.attrs.non_index_axes = self.non_index_axes + + #self.attrs.non_index_axes = self.non_index_axes + self.set_non_index_axes() self.attrs.data_columns = self.data_columns self.attrs.nan_rep = self.nan_rep self.attrs.encoding = self.encoding @@ -3035,8 +3103,7 @@ def set_attrs(self): def get_attrs(self): """ retrieve our attributes """ - self.non_index_axes = getattr( - self.attrs, 'non_index_axes', None) or [] + self.non_index_axes = self.get_non_index_axes() self.data_columns = getattr( self.attrs, 'data_columns', None) or [] self.info = getattr( diff --git a/pandas/io/tests/data/legacy_hdf/legacy_non_index_axes_0.17.1.h5 b/pandas/io/tests/data/legacy_hdf/legacy_non_index_axes_0.17.1.h5 new file mode 100644 index 0000000000000..bfbb9171f1bae Binary files /dev/null and b/pandas/io/tests/data/legacy_hdf/legacy_non_index_axes_0.17.1.h5 differ diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index fd8c28bfe0f85..a4b0ea413c08c 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -4606,6 +4606,22 @@ def test_read_nokey(self): df.to_hdf(path, 'df2', mode='a') self.assertRaises(ValueError, read_hdf, path) + def test_legacy_non_index_axes(self): + filename = tm.get_data_path('legacy_hdf/legacy_non_index_axes_0.17.1.h5') + with HDFStore(filename, 'r') as store: + df_legacy = store.get("df") + + index = pd.date_range(start = Timestamp("2015-11-01 0:00"), freq = "H", periods = 3, tz = None) + columns = MultiIndex(levels=[['A', 'B', 'C', 'D'], + [1, 2, 3]], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['alpha', 'num']) + data = np.array([index.asi8+i for i in range(10)]) + df_new = DataFrame(data.T, columns=columns, index=index) + + tm.assert_frame_equal(df_legacy, df_new) + #df_new.to_hdf(filename, "df", format = "table") class TestHDFComplexValues(Base): # GH10447 @@ -5025,5 +5041,7 @@ def _test_sort(obj): if __name__ == '__main__': import nose + #nose.runmodule(argv=[__file__, '-vvs'], + # exit=False) nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False)