From 633be7897bf3b8b0244e24d14ad5c331c33020af Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Thu, 18 Apr 2019 14:17:43 +0200 Subject: [PATCH 01/25] added two test cases for storing wide dataframes in table format --- pandas/tests/io/test_pytables.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index fc17b5f85b68c..a8d09d852f520 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5704,3 +5704,26 @@ def test_dst_transitions(self): store.append('df', df) result = store.select('df') assert_frame_equal(result, df) + + def test_wide_table_format(self): + # test storing wide dataframes with in table format + + df = DataFrame(np.random.random((10,10000))) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + def test_append_wide_table_format(self): + # test append to hdf with wide dataframe + + df1 = DataFrame(np.random.random((10,10000))) + df2 = DataFrame(np.random.random((10,10000))) + + with ensure_clean_path(self.path) as path: + df1.to_hdf(path, 'df', format='table') + df2.to_hdf(path, 'df', append=True) + reread = read_hdf(path) + assert_frame_equal(pd.concat([df1, df2]), reread) + From 3ba10ef781cf83cbf4f51568c6e660eb6648af7d Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Thu, 18 Apr 2019 11:00:38 +0200 Subject: [PATCH 02/25] Added support for wide tables with format 'table'. Columns are saved as vlarray with object atom. pre-cleanup. --- pandas/io/pytables.py | 90 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 4 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 82c80a13372d7..5bc9ccd000a15 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -46,6 +46,8 @@ from pandas.core.config import get_option from pandas.core.computation.pytables import Expr, maybe_expression +from tables.exceptions import NoSuchNodeError, NodeError + from pandas._libs import tslib, algos, lib from distutils.version import LooseVersion @@ -1560,6 +1562,7 @@ def infer(self, handler): """infer this column from the table: create and return a new object""" table = handler.table new_self = self.copy() + new_self._handle = handler._handle new_self.set_table(table) new_self.get_attr() new_self.read_metadata(handler) @@ -1616,6 +1619,10 @@ def cvalues(self): """ return my cython values """ return self.values + @property + def handle(self): + return self._handle + def __iter__(self): return iter(self.values) @@ -1639,6 +1646,7 @@ def validate_names(self): pass def validate_and_set(self, handler, append, **kwargs): + self._handle = handler._handle self.set_table(handler.table) self.validate_col() self.validate_attr(append) @@ -2162,13 +2170,40 @@ def convert(self, values, nan_rep, encoding): def get_attr(self): """ get the data for this colummn """ self.values = getattr(self.attrs, self.kind_attr, None) + if self.values is None: + try: + data = self.handle.get_node(self.attrs._v_node._v_parent, self.kind_attr)[:] + data = np.array(data, dtype='object') + if len(data.shape) > 1 and data.shape[1] > 1: # multiIndex + self.values = list(map(tuple, data.tolist())) + else: + self.values = data.tolist() + except NoSuchNodeError: + pass + self.dtype = getattr(self.attrs, self.dtype_attr, None) self.meta = getattr(self.attrs, self.meta_attr, None) self.set_kind() def set_attr(self): """ set the data for this colummn """ - setattr(self.attrs, self.kind_attr, self.values) + #setattr(self.attrs, self.kind_attr, self.values) + def write_attr_node(): + arr = np.array(self.values, dtype='object') + vlarray = self.handle.create_vlarray(self.attrs._v_node._v_parent, + self.kind_attr, _tables().ObjectAtom(), + filters = self.table.filters) + for fld in arr: + vlarray.append(fld) + + try: + write_attr_node() + + except NodeError: + self.handle.remove_node(self.attrs._v_node._v_parent, + self.kind_attr) + write_attr_node() + setattr(self.attrs, self.meta_attr, self.meta) if self.dtype is not None: setattr(self.attrs, self.dtype_attr, self.dtype) @@ -3152,12 +3187,55 @@ def set_info(self): """ update our table index info """ self.attrs.info = self.info + def set_non_index_axes(self): + """ Write the axes to carrays """ + def write_attr_node(name, flds): + arr = np.array(flds, dtype='object') + vlarray = self._handle.create_vlarray(self.attrs._v_node, + name, _tables().ObjectAtom(), + filters=self._filters) + for fld in arr: + vlarray.append(fld) + + def f(dim, flds): + name = "non_index_axes_%d" % dim + try: + write_attr_node(name, flds) + except NodeError: + self._handle.remove_node(self.attrs._v_node, + self.kind_attr) + write_attr_node(name, flds) + return dim, name + + replacement = [f(dim, flds) for dim, flds in self.non_index_axes] + self.attrs.non_index_axes = replacement + + + def get_non_index_axes(self): + """Load the non-index axes from their carrays. This is a pass-through + for tables stored prior to v0.xx""" + def f(dim, flds): + if isinstance(flds, string_types): + flds = self._handle.get_node(self.attrs._v_node, flds)[:] + flds = np.array(flds, dtype='object') + if len(flds.shape) > 1 and flds.shape[1] > 1: + flds = list(map(tuple, flds.tolist())) + else: + flds = flds.tolist() + return dim, flds + else: + return dim, flds #if not a string presumably pre v17 list + non_index_axes = getattr(self.attrs, 'non_index_axes', []) + new = [f(dim, flds) for dim, flds in non_index_axes] + return new + def set_attrs(self): """ set our table type & indexables """ self.attrs.table_type = str(self.table_type) self.attrs.index_cols = self.index_cols() self.attrs.values_cols = self.values_cols() - self.attrs.non_index_axes = self.non_index_axes + #self.attrs.non_index_axes = self.non_index_axes + self.set_non_index_axes() self.attrs.data_columns = self.data_columns self.attrs.nan_rep = self.nan_rep self.attrs.encoding = self.encoding @@ -3167,8 +3245,10 @@ def set_attrs(self): def get_attrs(self): """ retrieve our attributes """ - self.non_index_axes = getattr( - self.attrs, 'non_index_axes', None) or [] +# ============================================================================= +# self.non_index_axes = getattr( +# self.attrs, 'non_index_axes', None) or [] +# ============================================================================= self.data_columns = getattr( self.attrs, 'data_columns', None) or [] self.info = getattr( @@ -3186,6 +3266,7 @@ def get_attrs(self): ] self.metadata = getattr( self.attrs, 'metadata', None) or [] + self.non_index_axes = self.get_non_index_axes() def validate_version(self, where=None): """ are we trying to operate on an old version? """ @@ -3464,6 +3545,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, info['names'] = list(a.names) info['type'] = a.__class__.__name__ + #self.non_index_axes.append((i, a)) self.non_index_axes.append((i, append_axis)) # set axis positions (based on the axes) From 4c20cddfdefcff551fa5700e3759a41339fb0efd Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Thu, 18 Apr 2019 14:04:17 +0200 Subject: [PATCH 03/25] cleanup --- pandas/io/pytables.py | 58 ++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5bc9ccd000a15..b8f584a416749 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2169,10 +2169,13 @@ def convert(self, values, nan_rep, encoding): def get_attr(self): """ get the data for this colummn """ + # reading tables prior to 0.x.x self.values = getattr(self.attrs, self.kind_attr, None) + if self.values is None: try: - data = self.handle.get_node(self.attrs._v_node._v_parent, self.kind_attr)[:] + data = self.handle.get_node(self.attrs._v_node._v_parent, + self.kind_attr)[:] data = np.array(data, dtype='object') if len(data.shape) > 1 and data.shape[1] > 1: # multiIndex self.values = list(map(tuple, data.tolist())) @@ -2187,22 +2190,14 @@ def get_attr(self): def set_attr(self): """ set the data for this colummn """ - #setattr(self.attrs, self.kind_attr, self.values) - def write_attr_node(): - arr = np.array(self.values, dtype='object') - vlarray = self.handle.create_vlarray(self.attrs._v_node._v_parent, - self.kind_attr, _tables().ObjectAtom(), - filters = self.table.filters) - for fld in arr: - vlarray.append(fld) + group, key = self.attrs._v_node._v_parent, self.kind_attr + if key in group: + self.handle.remove_node(group, key) - try: - write_attr_node() - - except NodeError: - self.handle.remove_node(self.attrs._v_node._v_parent, - self.kind_attr) - write_attr_node() + vlarray = self.handle.create_vlarray(group, key, + _tables().ObjectAtom()) + for fld in self.values: + vlarray.append(fld) setattr(self.attrs, self.meta_attr, self.meta) if self.dtype is not None: @@ -3189,28 +3184,22 @@ def set_info(self): def set_non_index_axes(self): """ Write the axes to carrays """ - def write_attr_node(name, flds): - arr = np.array(flds, dtype='object') - vlarray = self._handle.create_vlarray(self.attrs._v_node, - name, _tables().ObjectAtom(), - filters=self._filters) - for fld in arr: - vlarray.append(fld) + group = self.attrs._v_node def f(dim, flds): - name = "non_index_axes_%d" % dim - try: - write_attr_node(name, flds) - except NodeError: - self._handle.remove_node(self.attrs._v_node, - self.kind_attr) - write_attr_node(name, flds) - return dim, name + key = "non_index_axes_%d" % dim + if key in group: + self.handle.remove_node(group, key) + + vlarray = self._handle.create_vlarray(group, key, + _tables().ObjectAtom()) + for fld in flds: + vlarray.append(fld) + return dim, key replacement = [f(dim, flds) for dim, flds in self.non_index_axes] self.attrs.non_index_axes = replacement - def get_non_index_axes(self): """Load the non-index axes from their carrays. This is a pass-through for tables stored prior to v0.xx""" @@ -3234,7 +3223,6 @@ def set_attrs(self): self.attrs.table_type = str(self.table_type) self.attrs.index_cols = self.index_cols() self.attrs.values_cols = self.values_cols() - #self.attrs.non_index_axes = self.non_index_axes self.set_non_index_axes() self.attrs.data_columns = self.data_columns self.attrs.nan_rep = self.nan_rep @@ -3245,10 +3233,6 @@ def set_attrs(self): def get_attrs(self): """ retrieve our attributes """ -# ============================================================================= -# self.non_index_axes = getattr( -# self.attrs, 'non_index_axes', None) or [] -# ============================================================================= self.data_columns = getattr( self.attrs, 'data_columns', None) or [] self.info = getattr( From 2ecc05e3cda08f4bd9e28085ee83b347d4e332b0 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Thu, 18 Apr 2019 17:13:26 +0200 Subject: [PATCH 04/25] Linting, cleanup and replaced string_types with str --- pandas/io/pytables.py | 15 +++++++-------- pandas/tests/io/test_pytables.py | 7 +++---- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fa92a202ed906..9b8b1c7d72df9 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -38,12 +38,11 @@ from pandas.core.index import ensure_index from pandas.core.internals import BlockManager, _block_shape, make_block -from tables.exceptions import NoSuchNodeError, NodeError +from tables.exceptions import NoSuchNodeError from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, pprint_thing - # versioning attribute _version = '0.15.2' @@ -2248,7 +2247,8 @@ def get_attr(self): data = self.handle.get_node(self.attrs._v_node._v_parent, self.kind_attr)[:] data = np.array(data, dtype='object') - if len(data.shape) > 1 and data.shape[1] > 1: # multiIndex + # check for multiindex + if len(data.shape) > 1 and data.shape[1] > 1: self.values = list(map(tuple, data.tolist())) else: self.values = data.tolist() @@ -2260,7 +2260,7 @@ def get_attr(self): self.set_kind() def set_attr(self): - """ set the data for this colummn """ + """ set the data for this column """ group, key = self.attrs._v_node._v_parent, self.kind_attr if key in group: self.handle.remove_node(group, key) @@ -3281,7 +3281,7 @@ def f(dim, flds): self.handle.remove_node(group, key) vlarray = self._handle.create_vlarray(group, key, - _tables().ObjectAtom()) + _tables().ObjectAtom()) for fld in flds: vlarray.append(fld) return dim, key @@ -3293,7 +3293,7 @@ def get_non_index_axes(self): """Load the non-index axes from their carrays. This is a pass-through for tables stored prior to v0.xx""" def f(dim, flds): - if isinstance(flds, string_types): + if isinstance(flds, str): flds = self._handle.get_node(self.attrs._v_node, flds)[:] flds = np.array(flds, dtype='object') if len(flds.shape) > 1 and flds.shape[1] > 1: @@ -3302,7 +3302,7 @@ def f(dim, flds): flds = flds.tolist() return dim, flds else: - return dim, flds #if not a string presumably pre v17 list + return dim, flds # if not a string presumably pre v0.xx list non_index_axes = getattr(self.attrs, 'non_index_axes', []) new = [f(dim, flds) for dim, flds in non_index_axes] return new @@ -3623,7 +3623,6 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, info['names'] = list(a.names) info['type'] = a.__class__.__name__ - #self.non_index_axes.append((i, a)) self.non_index_axes.append((i, append_axis)) # set axis positions (based on the axes) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 8a50a392250aa..4d98d1a70ba8a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5152,7 +5152,7 @@ def test_dst_transitions(self): def test_wide_table_format(self): # test storing wide dataframes with in table format - df = DataFrame(np.random.random((10,10000))) + df = DataFrame(np.random.random((10, 10000))) with ensure_clean_path(self.path) as path: df.to_hdf(path, 'df', format='table') @@ -5162,12 +5162,11 @@ def test_wide_table_format(self): def test_append_wide_table_format(self): # test append to hdf with wide dataframe - df1 = DataFrame(np.random.random((10,10000))) - df2 = DataFrame(np.random.random((10,10000))) + df1 = DataFrame(np.random.random((10, 10000))) + df2 = DataFrame(np.random.random((10, 10000))) with ensure_clean_path(self.path) as path: df1.to_hdf(path, 'df', format='table') df2.to_hdf(path, 'df', append=True) reread = read_hdf(path) assert_frame_equal(pd.concat([df1, df2]), reread) - From 6451f8c54b7f3f920a7ee51c852d6821b2f72312 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Sat, 20 Apr 2019 14:03:43 +0200 Subject: [PATCH 05/25] Fixed tables import --- pandas/io/pytables.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9b8b1c7d72df9..b300229e4f981 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -38,8 +38,6 @@ from pandas.core.index import ensure_index from pandas.core.internals import BlockManager, _block_shape, make_block -from tables.exceptions import NoSuchNodeError - from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, pprint_thing @@ -2252,7 +2250,7 @@ def get_attr(self): self.values = list(map(tuple, data.tolist())) else: self.values = data.tolist() - except NoSuchNodeError: + except _table_mod.exceptions.NoSuchNodeError: pass self.dtype = getattr(self.attrs, self.dtype_attr, None) From 059bbc1427f882a874031938af152030c9de0833 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Sat, 20 Apr 2019 14:04:22 +0200 Subject: [PATCH 06/25] changed test to only check compression filter on table data, not columns --- pandas/tests/io/test_pytables.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 4d98d1a70ba8a..4d0943f0a1a74 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -836,8 +836,10 @@ def test_complibs_default_settings(self): assert node.filters.complevel == 0 assert node.filters.complib is None for node in h5file.walk_nodes(where='/dfc', classname='Leaf'): - assert node.filters.complevel == 9 - assert node.filters.complib == 'blosc' + # only check table, skip column + if node.name == 'table': + assert node.filters.complevel == 9 + assert node.filters.complib == 'blosc' def test_complibs(self): # GH14478 From 1c1f872335bf169b8d2449a2424332f333027706 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Sat, 20 Apr 2019 14:56:08 +0200 Subject: [PATCH 07/25] added tests for reading columns from legacy tables. Rearranged position of tests. --- pandas/tests/io/test_pytables.py | 74 ++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 4d0943f0a1a74..51d9d959f0255 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4728,8 +4728,60 @@ def test_read_py2_hdf_file_in_py3(self, datapath): mode='r') as store: result = store['p'] assert_frame_equal(result, expected) + + def test_wide_table_format(self): + # GH 26135 + # test storing wide dataframes with in table format + + df = DataFrame(np.random.random((10, 10000))) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + def test_append_wide_table_format(self): + # GH 26135 + # test append to hdf with wide dataframe + df1 = DataFrame(np.random.random((10, 10000))) + df2 = DataFrame(np.random.random((10, 10000))) + + with ensure_clean_path(self.path) as path: + df1.to_hdf(path, 'df', format='table') + df2.to_hdf(path, 'df', append=True) + reread = read_hdf(path) + assert_frame_equal(pd.concat([df1, df2]), reread) + + @xfail_non_writeable + def test_legacy_table_table_format_read(self, datapath): + # GH 26135 + # test read of legacy table with table format and column + # saved as pytables metadata + + column_numeric = [1, 2, 3, 4] + column_str_1 = ['A', 'B', 'C', 'D'] + column_str_2 = ['Ä', 'Ö', 'Â', 'é'] + column_dt = pd.date_range('19700101', '19700104') + column_multi_1 = pd.MultiIndex.from_tuples( + zip(column_numeric, column_str_1)) + column_multi_2 = pd.MultiIndex.from_tuples( + zip(column_str_2, column_dt)) + + columns = [column_numeric, column_str_1, column_str_2, column_dt, + column_multi_1, column_multi_2] + + data = np.arange(0, 16).reshape(4,4) + + with ensure_clean_store( + datapath('io', 'data', 'legacy_hdf', + 'legacy_table_table_format.h5'), + mode='r') as store: + for i, column in enumerate(columns): + table_name = 'table_{}'.format(i) + df = pd.DataFrame(data, columns=column) + tm.assert_frame_equal(store[table_name], df) + class TestHDFComplexValues(Base): # GH10447 @@ -5150,25 +5202,3 @@ def test_dst_transitions(self): store.append('df', df) result = store.select('df') assert_frame_equal(result, df) - - def test_wide_table_format(self): - # test storing wide dataframes with in table format - - df = DataFrame(np.random.random((10, 10000))) - - with ensure_clean_path(self.path) as path: - df.to_hdf(path, 'df', format='table') - reread = read_hdf(path, 'df') - assert_frame_equal(df, reread) - - def test_append_wide_table_format(self): - # test append to hdf with wide dataframe - - df1 = DataFrame(np.random.random((10, 10000))) - df2 = DataFrame(np.random.random((10, 10000))) - - with ensure_clean_path(self.path) as path: - df1.to_hdf(path, 'df', format='table') - df2.to_hdf(path, 'df', append=True) - reread = read_hdf(path) - assert_frame_equal(pd.concat([df1, df2]), reread) From e4d81bf270e8752d32dbf0983fc6d9caa7fe23ef Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Sat, 20 Apr 2019 15:07:48 +0200 Subject: [PATCH 08/25] Linting --- pandas/tests/io/test_pytables.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 51d9d959f0255..2a5e31604bd88 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4728,7 +4728,7 @@ def test_read_py2_hdf_file_in_py3(self, datapath): mode='r') as store: result = store['p'] assert_frame_equal(result, expected) - + def test_wide_table_format(self): # GH 26135 # test storing wide dataframes with in table format @@ -4752,27 +4752,27 @@ def test_append_wide_table_format(self): df2.to_hdf(path, 'df', append=True) reread = read_hdf(path) assert_frame_equal(pd.concat([df1, df2]), reread) - + @xfail_non_writeable def test_legacy_table_table_format_read(self, datapath): # GH 26135 - # test read of legacy table with table format and column + # test read of legacy table with table format and column # saved as pytables metadata - + column_numeric = [1, 2, 3, 4] column_str_1 = ['A', 'B', 'C', 'D'] column_str_2 = ['Ä', 'Ö', 'Â', 'é'] column_dt = pd.date_range('19700101', '19700104') column_multi_1 = pd.MultiIndex.from_tuples( - zip(column_numeric, column_str_1)) + zip(column_numeric, column_str_1)) column_multi_2 = pd.MultiIndex.from_tuples( - zip(column_str_2, column_dt)) - + zip(column_str_2, column_dt)) + columns = [column_numeric, column_str_1, column_str_2, column_dt, column_multi_1, column_multi_2] - - data = np.arange(0, 16).reshape(4,4) - + + data = np.arange(0, 16).reshape(4, 4) + with ensure_clean_store( datapath('io', 'data', 'legacy_hdf', 'legacy_table_table_format.h5'), @@ -4781,7 +4781,8 @@ def test_legacy_table_table_format_read(self, datapath): table_name = 'table_{}'.format(i) df = pd.DataFrame(data, columns=column) tm.assert_frame_equal(store[table_name], df) - + + class TestHDFComplexValues(Base): # GH10447 From 37efd629f27719771dd47f6dcfed4aa0b8607cd3 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Sat, 20 Apr 2019 15:28:49 +0200 Subject: [PATCH 09/25] added legacy hdf file for tests --- .../legacy_hdf/legacy_table_table_format.h5 | Bin 0 -> 64688 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/io/data/legacy_hdf/legacy_table_table_format.h5 diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_table_format.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_table_format.h5 new file mode 100644 index 0000000000000000000000000000000000000000..44c8e795c0b8d80542d3e7a96e9636586025f712 GIT binary patch literal 64688 zcmeHQ3v44vS}tdH_p;uZ+3X9#Q)UsuE0*BQ?koW<$l0CQ%bY@DGP6Stt%x&;XOr8E z2L`KnZicTY{eoRZMs zE9`!8`TW$y3zHKXUkw=^XTw7YRZZfLXc!TL8b9^kOQSE0pT9Ws(&W^4j9!|t)ju;d zO!Xeouzq|>QJ{}KYS3?diid#D*Fh@$zgH;0<8zvB7+fjD0?BQr!)$4>pg31S#er9r zmgine%-on=x}GVR^kOk{O(m=R9#aXIIR|+ckrGeI#oTOmezq`EyqU}B4%f@mtNGb$s^p~L zdJjA6-N@t%i_2NaVa1|M=fu$P*`YJedJpP)ne5#1{9^W1P^VFTzkKPXZ&W;Jvr_GlEWy! zm?_?r<*L1RIqKyzuU=o8&CBbY!nNpzmEKTCB!Z0lbV^7~iG%rK>~W;`DF1%hs;be~ z*QYD?({)Zn#!dnyMwkGP(%mTJUwioXUNLm?){CQ4Q={*dbqv0@UOY4OtZuMbzJ>g_ zk9sG>{)6bR?O&2rA7vURHoQ0$tbLgc1Nor7v}?Z98!zak;2tujWM|2|uksQ1--hQ` z&itrsCUvk{Q9P~=j1?jH@xh@HY?es=!EuV(1+U-uCN@UwOOxTHjA0CU3ntp?dYPC!f+f_MK0C`itDlM)?;MnwuWF z_ur4NM5OP8<}C^NTDCfVR)1I;PZ8m3jgmF3fSWAyI)mHfmX-e znO_&a7VQ{2D5f*~=G)Bp@{a!KX0REflU7n<~OdkbT zP$GHIeSYIO@lUx%6a`hjoAKNkdBc9=`d5HF6|&b_uE`ap7t3PqY4F`kR2yc0|qK3==(10h_-#KzzgmXqUgZrWSq4!`MGDoeurQ zURht{?k{e@%=Ppa)qIQHU)(1B!z(>B+lZjr6X;!|znG$VBrdOq1C+5x#FENmUH_x~e`iuHe9k0K* zaTfg2eg5Lz&|mc6ANCiwpThIq{|3P0^cTJLb%~Z(c89(;^1L_nhs|GH`&RHxv;N}h zw*jB+2&BUQdxZive=$jX#B^wvzgS741!MojbVlee_NKq+XXmqByz}kg3zlPczp+eu z2z-NDBCvjA{rqD2dG4`0WPfpB)V-W@*iX^NbKsZm^OGyYKV{qi{RcG;+G+k_Kbc<$ zkCB~vbU*pD=Dls@^vc7}PO<49yPcAxR}o|S`urC@S5U&}w@{C^Lyt!EioM45$Xo9L z{o};<*axJ~x=)XoAbeur?KDnOlny~}Q-I?IX;Rb`y4~CiPS{>s`7WekeLC0EHD_ng zWvhU%TY4E;%x}%^17P%2xE@(iFd}!4Q18!Wj&c7A^)4^ld)KZ@|3RjW9zU($o5H>q z*`>^l%#!kS>fb-)`Xs*O^`OW3!hU+3`D$!ZUR)kAx}aT;r@KmzbG(85{skU5xZQ^T z5z4uHv7x9YgQNcY<2Bh0364wbeP=^aS7q4jJtJ3|lWT^*gZcievBR*R4vjzg*6hS` zHiLElYOL|NDU!Jy8W5WgmBC8h73CK0&AXqPp)-)Ba1d?{T~l(p^g` z@F(*wNKycb)vkG-2aQ`E&aTOke>X$5Rs| zAW|eR3}5>1Z{Y67C3OQ4@r3r{YW7Dk?pzc89QHFDXxgs9{?_$=qFwGiJpf9?0O&vD z4-mr0(MJ2G74tQQ&%Q**BWn4i>ULQ7>uJB=F;4su`W=cR*zPs(QRGnu>x-Ss;Gb21 z>c8LnXk35r3aV79HE|Nuaaq?*z5j7yMu)8ustkVg((IMYk~6KsTKY`@M|m}^PG~&1 z?w{i8PiN%ezm!}eAO-8Z1wrBt4;^-Ks@UK!S?JH#e;2KEm6VlD>e+|Z&2|L z@tqAjO$8OQhirJv_v!DkVIV(6#jE5ecw@J&)a>V*8o*U+<(rq>Z;fAzNMl35s!3pvBTi+>tOrsT6a_7QY3zqaKfg8h zUP3>WhzQ!eVw%Qbt^FI?c^E`Iuh_o$+dR=fRx@ft`hy&1zNm3rMkNZXpVqx!5VZ06 zMmRn-D?mf?^Jd8Q@kSB+k^*qd&va&drG6M0{GMF5&oi#v!2PV#eH!@}^m)b&;_EW?93a$Vpaah{mgkXPCwXD! ze}Xa3*x&R#qgg-Y?dQcG!5y=Q6Y_W zi;5cNsGna<f=Tp#|D*+{E!EdEJ{;x{CmCeR;jr10dKq%72hTQz#F@D zrFJ`1v!@)>jq<*={%p|Y{-sdvYj%TOWaK?~(C>TXV?PQ!_J>->x#5+%fdV{lI8B|! zXcyw{>$SbtdQWC4=%?>gP3FUxO1KrMMQNN?T`eR_4J>ngE z9Bi88f2@YfoOb8$2lzYMBR^8p?S{kvlWhMTG%nrl@8y-cYCM{_NoV5EroBeH-6=nhx^K@4$pO~u-4p*^ zBmODZh@ha#bMrhq!#^DV-6Zntkezmr_^*@CjGk_{To`;54>T4&S4jc3j~N<1YL8q{ z``oIwSpT#AUA~3K!Sh~z{$lA(;PZ8m3jgmF3fTO`IPnqFpdJokNo>c4ZHsFH7XKXHWgk9~e9NPZ&UAg>ecr;LT4;1YK@z}X++6L-k| z;^5D?mvb)5`NU6gPIp4T`7Wb>J7dS0_8RqlE9Pr?-0ZI5c^P?`&>6w|F3R(veW;%d zMmpPt488cpY0n?kZ>;<@_+=WvvHmUS>xNz-{wY(>0YbkKbYQ=6@54ya!I zGsT;^jP6K%J6H2^FA?1i>hpU@RPRP6Uszn0@06)}PCGU+G<$Q2#+Vyq( zJWIcz$#{hCdG3-Pgk6ZM)qa9%CQs{Sp9ELv|;~cH@TT3Jly3cRz ze+-l=hx{Wc!1f74_FD%(3i6DQ-FTdSt8#(tapo0zp539Z&G_sM{b8HuT=-S+PqXu! z>0bvv+Yv~G|Mv<7Y<}wi@e$LZ)qX46KkL7V7L5H9(;4ABXK&7P`q}x+7aP9;zPL*Y z(yoVANe_WNfY-9kRc;PvgWL%lM12Kd@mSpXGe)x4rR#UK*Tru?bD8 z_P?LR?Hu%bo?A8jbAHeBmt+aspTYW`=VjGR`AJdvwa`TL@;M3QcR=+=K3P6&{}wnj z1>lIc1gIC?ypiOSs^;=>SJud^$ zf2ZRvJnj+|bYQKfH%5-ow3$#bcy%xxR19AoDpsxz^=#F_-s6Kv3iVE>IvxU$;~K{) zf~N`6CjFE;zV=yu+V1<#>v!I`^VXfWBYjnp$UaZrdUHZ`{bNr)rTzVPKK1D@%4DRE z$ySxi_19Ik)Zy1|yXbw``$4b6|BwP91wsmh6zCNS9Q-}z&ct6ip1%DFPSpMKTH zL7kG{&y_!QR6dK0X7}m8`-g-NAhte_6ZY3BI|MyS0iIV#6T?^NJ{SHPC(KvnKS!FJ zT*tbu=JW(zwhH*Vxj#h~^V`AoRQTO*MM2(QApGt(gY5O0cg8e_SVIZI5J9`L2Tan{bvirwYl&&bS#rJwGu4?SCpzZ%X@HiNSBacXR zI$nBkebb!2Gycorz6a0y^9u?r`pw5S{~pWZA>B2r+58qrIQ9!(Tg=WY^!oMVa?)!t z`>Jz&JN%q1mZMw0YZ=_}I2iJU2VbzgI`S79Nf>)Ip)(Ht{_5er`@KN^-59#zvF`6H z>3PNo6CkMa)la^4S})fS%_U*CA7F7sGNqTVk)5>|) z!N0^!t<$|3xfJyGYHyPZTBe=@1eLEl&9@v+jeiDo86$aN?2%yX$KSs14kdmEcOCk-IxFQ(9Y4U;I4o8J!?`@)wuHZpMif?X*@CT^E=pX8u%O3 zsBlGsK^>QM-Q<1v!+rVd33J4B{l5GaiU->+TcU!U9UBJnH>h}<_|Ar%rh-kfhirJv z_tSr6!$3a!1uNtycw@J&)O`0l=_=p6TZ9se(!@VKDt5cDVoZ1ag*|A`xBdtmQhkS1rYW%G$U@8G)3 zZwHU>hVzMvg1tMR=>FaB)qeor#90v{DHj7b`#rc6`JJ$!Q zma`c-Um{D@)D;h>i6K16hjcrk8aL5E2@CA?DP54G!DagMMZI@zk#Q8Ui;#2 zuY~b8&FbJ&LwB7i?HnFAjpMRC>mq|T@BjVb`1IfpwvX5M!7s*+WxhG+dy|KoXWaY( z+8+b(m}e|^)p%$6=KdQuR-yYd`m!_rHZ|)Doca$EC8_I}ID4??8O#5QTU-Zl$iJY^ zGp-X~m#OCfVY~=*;CaTW1g|~-~EpLF76nk5$D0($D-46&9v8O-?YZ> z2zs13PW%!2A8KsW+rNkX?)M1A3s-2s+5Nw#iT*Ld1PH3!=1#(YUJBty1yke|Y1uKd`^=jo0c@yB*4YNu#`Ptv?%d Rxj!Dtea&>Ri|Xb6{{#4sdMy9| literal 0 HcmV?d00001 From 05aac5b385fd7c8fffded138025cc2873cf740f6 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Tue, 23 Apr 2019 10:52:05 +0200 Subject: [PATCH 10/25] Numpy in windows creates int32 arrays by default. Need to cast to int64 explicitly to match stored table. --- pandas/tests/io/test_pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 2a5e31604bd88..e2c13d5a5e83b 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4771,7 +4771,7 @@ def test_legacy_table_table_format_read(self, datapath): columns = [column_numeric, column_str_1, column_str_2, column_dt, column_multi_1, column_multi_2] - data = np.arange(0, 16).reshape(4, 4) + data = np.arange(0, 16).reshape(4, 4).astype('int64') with ensure_clean_store( datapath('io', 'data', 'legacy_hdf', From c539d9db6a122aba46e0a5fc6aaa2e5e9e0d6ba1 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Thu, 18 Apr 2019 14:17:43 +0200 Subject: [PATCH 11/25] added two test cases for storing wide dataframes in table format --- pandas/tests/io/test_pytables.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index df18518cc701a..a8d74b4ed42d2 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5148,3 +5148,26 @@ def test_dst_transitions(self): store.append('df', df) result = store.select('df') assert_frame_equal(result, df) + + def test_wide_table_format(self): + # test storing wide dataframes with in table format + + df = DataFrame(np.random.random((10,10000))) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + def test_append_wide_table_format(self): + # test append to hdf with wide dataframe + + df1 = DataFrame(np.random.random((10,10000))) + df2 = DataFrame(np.random.random((10,10000))) + + with ensure_clean_path(self.path) as path: + df1.to_hdf(path, 'df', format='table') + df2.to_hdf(path, 'df', append=True) + reread = read_hdf(path) + assert_frame_equal(pd.concat([df1, df2]), reread) + From c553ee5b40a58fbb963588bf3ab12c10e4f74102 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Thu, 18 Apr 2019 11:00:38 +0200 Subject: [PATCH 12/25] Added support for wide tables with format 'table'. Columns are saved as vlarray with object atom. pre-cleanup. --- pandas/io/pytables.py | 92 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 85cc738667a34..743b4dd508871 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -41,6 +41,8 @@ from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, pprint_thing +from tables.exceptions import NoSuchNodeError, NodeError + # versioning attribute _version = '0.15.2' @@ -1611,6 +1613,7 @@ def infer(self, handler): """infer this column from the table: create and return a new object""" table = handler.table new_self = self.copy() + new_self._handle = handler._handle new_self.set_table(table) new_self.get_attr() new_self.read_metadata(handler) @@ -1668,6 +1671,10 @@ def cvalues(self): """ return my cython values """ return self.values + @property + def handle(self): + return self._handle + def __iter__(self): return iter(self.values) @@ -1691,6 +1698,7 @@ def validate_names(self): pass def validate_and_set(self, handler, append): + self._handle = handler._handle self.set_table(handler.table) self.validate_col() self.validate_attr(append) @@ -2232,13 +2240,40 @@ def convert(self, values, nan_rep, encoding, errors): def get_attr(self): """ get the data for this column """ self.values = getattr(self.attrs, self.kind_attr, None) + if self.values is None: + try: + data = self.handle.get_node(self.attrs._v_node._v_parent, self.kind_attr)[:] + data = np.array(data, dtype='object') + if len(data.shape) > 1 and data.shape[1] > 1: # multiIndex + self.values = list(map(tuple, data.tolist())) + else: + self.values = data.tolist() + except NoSuchNodeError: + pass + self.dtype = getattr(self.attrs, self.dtype_attr, None) self.meta = getattr(self.attrs, self.meta_attr, None) self.set_kind() def set_attr(self): - """ set the data for this column """ - setattr(self.attrs, self.kind_attr, self.values) + """ set the data for this colummn """ + #setattr(self.attrs, self.kind_attr, self.values) + def write_attr_node(): + arr = np.array(self.values, dtype='object') + vlarray = self.handle.create_vlarray(self.attrs._v_node._v_parent, + self.kind_attr, _tables().ObjectAtom(), + filters = self.table.filters) + for fld in arr: + vlarray.append(fld) + + try: + write_attr_node() + + except NodeError: + self.handle.remove_node(self.attrs._v_node._v_parent, + self.kind_attr) + write_attr_node() + setattr(self.attrs, self.meta_attr, self.meta) if self.dtype is not None: setattr(self.attrs, self.dtype_attr, self.dtype) @@ -3240,12 +3275,55 @@ def set_info(self): """ update our table index info """ self.attrs.info = self.info + def set_non_index_axes(self): + """ Write the axes to carrays """ + def write_attr_node(name, flds): + arr = np.array(flds, dtype='object') + vlarray = self._handle.create_vlarray(self.attrs._v_node, + name, _tables().ObjectAtom(), + filters=self._filters) + for fld in arr: + vlarray.append(fld) + + def f(dim, flds): + name = "non_index_axes_%d" % dim + try: + write_attr_node(name, flds) + except NodeError: + self._handle.remove_node(self.attrs._v_node, + self.kind_attr) + write_attr_node(name, flds) + return dim, name + + replacement = [f(dim, flds) for dim, flds in self.non_index_axes] + self.attrs.non_index_axes = replacement + + + def get_non_index_axes(self): + """Load the non-index axes from their carrays. This is a pass-through + for tables stored prior to v0.xx""" + def f(dim, flds): + if isinstance(flds, string_types): + flds = self._handle.get_node(self.attrs._v_node, flds)[:] + flds = np.array(flds, dtype='object') + if len(flds.shape) > 1 and flds.shape[1] > 1: + flds = list(map(tuple, flds.tolist())) + else: + flds = flds.tolist() + return dim, flds + else: + return dim, flds #if not a string presumably pre v17 list + non_index_axes = getattr(self.attrs, 'non_index_axes', []) + new = [f(dim, flds) for dim, flds in non_index_axes] + return new + def set_attrs(self): """ set our table type & indexables """ self.attrs.table_type = str(self.table_type) self.attrs.index_cols = self.index_cols() self.attrs.values_cols = self.values_cols() - self.attrs.non_index_axes = self.non_index_axes + #self.attrs.non_index_axes = self.non_index_axes + self.set_non_index_axes() self.attrs.data_columns = self.data_columns self.attrs.nan_rep = self.nan_rep self.attrs.encoding = self.encoding @@ -3256,8 +3334,10 @@ def set_attrs(self): def get_attrs(self): """ retrieve our attributes """ - self.non_index_axes = getattr( - self.attrs, 'non_index_axes', None) or [] +# ============================================================================= +# self.non_index_axes = getattr( +# self.attrs, 'non_index_axes', None) or [] +# ============================================================================= self.data_columns = getattr( self.attrs, 'data_columns', None) or [] self.info = getattr( @@ -3276,6 +3356,7 @@ def get_attrs(self): ] self.metadata = getattr( self.attrs, 'metadata', None) or [] + self.non_index_axes = self.get_non_index_axes() def validate_version(self, where=None): """ are we trying to operate on an old version? """ @@ -3557,6 +3638,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, info['names'] = list(a.names) info['type'] = a.__class__.__name__ + #self.non_index_axes.append((i, a)) self.non_index_axes.append((i, append_axis)) # set axis positions (based on the axes) From 872552b7a561880c9c2e82a5925c1b3e6d52a285 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Thu, 18 Apr 2019 14:04:17 +0200 Subject: [PATCH 13/25] cleanup --- pandas/io/pytables.py | 60 ++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 743b4dd508871..577d7e55a8946 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2238,11 +2238,14 @@ def convert(self, values, nan_rep, encoding, errors): return self def get_attr(self): - """ get the data for this column """ + """ get the data for this colummn """ + # reading tables prior to 0.x.x self.values = getattr(self.attrs, self.kind_attr, None) + if self.values is None: try: - data = self.handle.get_node(self.attrs._v_node._v_parent, self.kind_attr)[:] + data = self.handle.get_node(self.attrs._v_node._v_parent, + self.kind_attr)[:] data = np.array(data, dtype='object') if len(data.shape) > 1 and data.shape[1] > 1: # multiIndex self.values = list(map(tuple, data.tolist())) @@ -2257,22 +2260,14 @@ def get_attr(self): def set_attr(self): """ set the data for this colummn """ - #setattr(self.attrs, self.kind_attr, self.values) - def write_attr_node(): - arr = np.array(self.values, dtype='object') - vlarray = self.handle.create_vlarray(self.attrs._v_node._v_parent, - self.kind_attr, _tables().ObjectAtom(), - filters = self.table.filters) - for fld in arr: - vlarray.append(fld) + group, key = self.attrs._v_node._v_parent, self.kind_attr + if key in group: + self.handle.remove_node(group, key) - try: - write_attr_node() - - except NodeError: - self.handle.remove_node(self.attrs._v_node._v_parent, - self.kind_attr) - write_attr_node() + vlarray = self.handle.create_vlarray(group, key, + _tables().ObjectAtom()) + for fld in self.values: + vlarray.append(fld) setattr(self.attrs, self.meta_attr, self.meta) if self.dtype is not None: @@ -3277,28 +3272,22 @@ def set_info(self): def set_non_index_axes(self): """ Write the axes to carrays """ - def write_attr_node(name, flds): - arr = np.array(flds, dtype='object') - vlarray = self._handle.create_vlarray(self.attrs._v_node, - name, _tables().ObjectAtom(), - filters=self._filters) - for fld in arr: - vlarray.append(fld) + group = self.attrs._v_node def f(dim, flds): - name = "non_index_axes_%d" % dim - try: - write_attr_node(name, flds) - except NodeError: - self._handle.remove_node(self.attrs._v_node, - self.kind_attr) - write_attr_node(name, flds) - return dim, name + key = "non_index_axes_%d" % dim + if key in group: + self.handle.remove_node(group, key) + + vlarray = self._handle.create_vlarray(group, key, + _tables().ObjectAtom()) + for fld in flds: + vlarray.append(fld) + return dim, key replacement = [f(dim, flds) for dim, flds in self.non_index_axes] self.attrs.non_index_axes = replacement - def get_non_index_axes(self): """Load the non-index axes from their carrays. This is a pass-through for tables stored prior to v0.xx""" @@ -3322,7 +3311,6 @@ def set_attrs(self): self.attrs.table_type = str(self.table_type) self.attrs.index_cols = self.index_cols() self.attrs.values_cols = self.values_cols() - #self.attrs.non_index_axes = self.non_index_axes self.set_non_index_axes() self.attrs.data_columns = self.data_columns self.attrs.nan_rep = self.nan_rep @@ -3334,10 +3322,6 @@ def set_attrs(self): def get_attrs(self): """ retrieve our attributes """ -# ============================================================================= -# self.non_index_axes = getattr( -# self.attrs, 'non_index_axes', None) or [] -# ============================================================================= self.data_columns = getattr( self.attrs, 'data_columns', None) or [] self.info = getattr( From ee3cdba10c34ae01cd2fb98ec65b8462c217364b Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Thu, 18 Apr 2019 17:13:26 +0200 Subject: [PATCH 14/25] Linting, cleanup and replaced string_types with str --- pandas/io/pytables.py | 14 +++++++------- pandas/tests/io/test_pytables.py | 7 +++---- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 577d7e55a8946..1d8b611591515 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -41,7 +41,7 @@ from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, pprint_thing -from tables.exceptions import NoSuchNodeError, NodeError +from tables.exceptions import NoSuchNodeError # versioning attribute _version = '0.15.2' @@ -2247,7 +2247,8 @@ def get_attr(self): data = self.handle.get_node(self.attrs._v_node._v_parent, self.kind_attr)[:] data = np.array(data, dtype='object') - if len(data.shape) > 1 and data.shape[1] > 1: # multiIndex + # check for multiindex + if len(data.shape) > 1 and data.shape[1] > 1: self.values = list(map(tuple, data.tolist())) else: self.values = data.tolist() @@ -2259,7 +2260,7 @@ def get_attr(self): self.set_kind() def set_attr(self): - """ set the data for this colummn """ + """ set the data for this column """ group, key = self.attrs._v_node._v_parent, self.kind_attr if key in group: self.handle.remove_node(group, key) @@ -3280,7 +3281,7 @@ def f(dim, flds): self.handle.remove_node(group, key) vlarray = self._handle.create_vlarray(group, key, - _tables().ObjectAtom()) + _tables().ObjectAtom()) for fld in flds: vlarray.append(fld) return dim, key @@ -3292,7 +3293,7 @@ def get_non_index_axes(self): """Load the non-index axes from their carrays. This is a pass-through for tables stored prior to v0.xx""" def f(dim, flds): - if isinstance(flds, string_types): + if isinstance(flds, str): flds = self._handle.get_node(self.attrs._v_node, flds)[:] flds = np.array(flds, dtype='object') if len(flds.shape) > 1 and flds.shape[1] > 1: @@ -3301,7 +3302,7 @@ def f(dim, flds): flds = flds.tolist() return dim, flds else: - return dim, flds #if not a string presumably pre v17 list + return dim, flds # if not a string presumably pre v0.xx list non_index_axes = getattr(self.attrs, 'non_index_axes', []) new = [f(dim, flds) for dim, flds in non_index_axes] return new @@ -3622,7 +3623,6 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, info['names'] = list(a.names) info['type'] = a.__class__.__name__ - #self.non_index_axes.append((i, a)) self.non_index_axes.append((i, append_axis)) # set axis positions (based on the axes) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index a8d74b4ed42d2..6b93d76b33457 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5152,7 +5152,7 @@ def test_dst_transitions(self): def test_wide_table_format(self): # test storing wide dataframes with in table format - df = DataFrame(np.random.random((10,10000))) + df = DataFrame(np.random.random((10, 10000))) with ensure_clean_path(self.path) as path: df.to_hdf(path, 'df', format='table') @@ -5162,12 +5162,11 @@ def test_wide_table_format(self): def test_append_wide_table_format(self): # test append to hdf with wide dataframe - df1 = DataFrame(np.random.random((10,10000))) - df2 = DataFrame(np.random.random((10,10000))) + df1 = DataFrame(np.random.random((10, 10000))) + df2 = DataFrame(np.random.random((10, 10000))) with ensure_clean_path(self.path) as path: df1.to_hdf(path, 'df', format='table') df2.to_hdf(path, 'df', append=True) reread = read_hdf(path) assert_frame_equal(pd.concat([df1, df2]), reread) - From a2c27648e42ed407938d9a00252a1501974e8f2a Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Sat, 20 Apr 2019 14:03:43 +0200 Subject: [PATCH 15/25] Fixed tables import --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1d8b611591515..19b660a5ea038 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2252,7 +2252,7 @@ def get_attr(self): self.values = list(map(tuple, data.tolist())) else: self.values = data.tolist() - except NoSuchNodeError: + except _table_mod.exceptions.NoSuchNodeError: pass self.dtype = getattr(self.attrs, self.dtype_attr, None) From f8c94cb53399507dd7698b33ee1a060551886997 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Sat, 20 Apr 2019 14:04:22 +0200 Subject: [PATCH 16/25] changed test to only check compression filter on table data, not columns --- pandas/tests/io/test_pytables.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 6b93d76b33457..0fca62ac3aec4 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -836,8 +836,10 @@ def test_complibs_default_settings(self): assert node.filters.complevel == 0 assert node.filters.complib is None for node in h5file.walk_nodes(where='/dfc', classname='Leaf'): - assert node.filters.complevel == 9 - assert node.filters.complib == 'blosc' + # only check table, skip column + if node.name == 'table': + assert node.filters.complevel == 9 + assert node.filters.complib == 'blosc' def test_complibs(self): # GH14478 From 8484293fd409e907c66f78a151a8b8b03494b19b Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Sat, 20 Apr 2019 14:56:08 +0200 Subject: [PATCH 17/25] added tests for reading columns from legacy tables. Rearranged position of tests. --- pandas/tests/io/test_pytables.py | 74 ++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 0fca62ac3aec4..cdbc745c07643 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4728,8 +4728,60 @@ def test_read_py2_hdf_file_in_py3(self, datapath): mode='r') as store: result = store['p'] assert_frame_equal(result, expected) + + def test_wide_table_format(self): + # GH 26135 + # test storing wide dataframes with in table format + + df = DataFrame(np.random.random((10, 10000))) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + def test_append_wide_table_format(self): + # GH 26135 + # test append to hdf with wide dataframe + df1 = DataFrame(np.random.random((10, 10000))) + df2 = DataFrame(np.random.random((10, 10000))) + + with ensure_clean_path(self.path) as path: + df1.to_hdf(path, 'df', format='table') + df2.to_hdf(path, 'df', append=True) + reread = read_hdf(path) + assert_frame_equal(pd.concat([df1, df2]), reread) + + @xfail_non_writeable + def test_legacy_table_table_format_read(self, datapath): + # GH 26135 + # test read of legacy table with table format and column + # saved as pytables metadata + + column_numeric = [1, 2, 3, 4] + column_str_1 = ['A', 'B', 'C', 'D'] + column_str_2 = ['Ä', 'Ö', 'Â', 'é'] + column_dt = pd.date_range('19700101', '19700104') + column_multi_1 = pd.MultiIndex.from_tuples( + zip(column_numeric, column_str_1)) + column_multi_2 = pd.MultiIndex.from_tuples( + zip(column_str_2, column_dt)) + + columns = [column_numeric, column_str_1, column_str_2, column_dt, + column_multi_1, column_multi_2] + + data = np.arange(0, 16).reshape(4,4) + + with ensure_clean_store( + datapath('io', 'data', 'legacy_hdf', + 'legacy_table_table_format.h5'), + mode='r') as store: + for i, column in enumerate(columns): + table_name = 'table_{}'.format(i) + df = pd.DataFrame(data, columns=column) + tm.assert_frame_equal(store[table_name], df) + class TestHDFComplexValues(Base): # GH10447 @@ -5150,25 +5202,3 @@ def test_dst_transitions(self): store.append('df', df) result = store.select('df') assert_frame_equal(result, df) - - def test_wide_table_format(self): - # test storing wide dataframes with in table format - - df = DataFrame(np.random.random((10, 10000))) - - with ensure_clean_path(self.path) as path: - df.to_hdf(path, 'df', format='table') - reread = read_hdf(path, 'df') - assert_frame_equal(df, reread) - - def test_append_wide_table_format(self): - # test append to hdf with wide dataframe - - df1 = DataFrame(np.random.random((10, 10000))) - df2 = DataFrame(np.random.random((10, 10000))) - - with ensure_clean_path(self.path) as path: - df1.to_hdf(path, 'df', format='table') - df2.to_hdf(path, 'df', append=True) - reread = read_hdf(path) - assert_frame_equal(pd.concat([df1, df2]), reread) From c3db771296617b588ce707635732384ad585e404 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Sat, 20 Apr 2019 15:07:48 +0200 Subject: [PATCH 18/25] Linting --- pandas/tests/io/test_pytables.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index cdbc745c07643..bc48e13215dcc 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4728,7 +4728,7 @@ def test_read_py2_hdf_file_in_py3(self, datapath): mode='r') as store: result = store['p'] assert_frame_equal(result, expected) - + def test_wide_table_format(self): # GH 26135 # test storing wide dataframes with in table format @@ -4752,27 +4752,27 @@ def test_append_wide_table_format(self): df2.to_hdf(path, 'df', append=True) reread = read_hdf(path) assert_frame_equal(pd.concat([df1, df2]), reread) - + @xfail_non_writeable def test_legacy_table_table_format_read(self, datapath): # GH 26135 - # test read of legacy table with table format and column + # test read of legacy table with table format and column # saved as pytables metadata - + column_numeric = [1, 2, 3, 4] column_str_1 = ['A', 'B', 'C', 'D'] column_str_2 = ['Ä', 'Ö', 'Â', 'é'] column_dt = pd.date_range('19700101', '19700104') column_multi_1 = pd.MultiIndex.from_tuples( - zip(column_numeric, column_str_1)) + zip(column_numeric, column_str_1)) column_multi_2 = pd.MultiIndex.from_tuples( - zip(column_str_2, column_dt)) - + zip(column_str_2, column_dt)) + columns = [column_numeric, column_str_1, column_str_2, column_dt, column_multi_1, column_multi_2] - - data = np.arange(0, 16).reshape(4,4) - + + data = np.arange(0, 16).reshape(4, 4) + with ensure_clean_store( datapath('io', 'data', 'legacy_hdf', 'legacy_table_table_format.h5'), @@ -4781,7 +4781,8 @@ def test_legacy_table_table_format_read(self, datapath): table_name = 'table_{}'.format(i) df = pd.DataFrame(data, columns=column) tm.assert_frame_equal(store[table_name], df) - + + class TestHDFComplexValues(Base): # GH10447 From 95b193c7351ca6d42380f7bf4ba3ac563dd0c223 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Sat, 20 Apr 2019 15:28:49 +0200 Subject: [PATCH 19/25] added legacy hdf file for tests --- .../legacy_hdf/legacy_table_table_format.h5 | Bin 0 -> 64688 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/io/data/legacy_hdf/legacy_table_table_format.h5 diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_table_format.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_table_format.h5 new file mode 100644 index 0000000000000000000000000000000000000000..44c8e795c0b8d80542d3e7a96e9636586025f712 GIT binary patch literal 64688 zcmeHQ3v44vS}tdH_p;uZ+3X9#Q)UsuE0*BQ?koW<$l0CQ%bY@DGP6Stt%x&;XOr8E z2L`KnZicTY{eoRZMs zE9`!8`TW$y3zHKXUkw=^XTw7YRZZfLXc!TL8b9^kOQSE0pT9Ws(&W^4j9!|t)ju;d zO!Xeouzq|>QJ{}KYS3?diid#D*Fh@$zgH;0<8zvB7+fjD0?BQr!)$4>pg31S#er9r zmgine%-on=x}GVR^kOk{O(m=R9#aXIIR|+ckrGeI#oTOmezq`EyqU}B4%f@mtNGb$s^p~L zdJjA6-N@t%i_2NaVa1|M=fu$P*`YJedJpP)ne5#1{9^W1P^VFTzkKPXZ&W;Jvr_GlEWy! zm?_?r<*L1RIqKyzuU=o8&CBbY!nNpzmEKTCB!Z0lbV^7~iG%rK>~W;`DF1%hs;be~ z*QYD?({)Zn#!dnyMwkGP(%mTJUwioXUNLm?){CQ4Q={*dbqv0@UOY4OtZuMbzJ>g_ zk9sG>{)6bR?O&2rA7vURHoQ0$tbLgc1Nor7v}?Z98!zak;2tujWM|2|uksQ1--hQ` z&itrsCUvk{Q9P~=j1?jH@xh@HY?es=!EuV(1+U-uCN@UwOOxTHjA0CU3ntp?dYPC!f+f_MK0C`itDlM)?;MnwuWF z_ur4NM5OP8<}C^NTDCfVR)1I;PZ8m3jgmF3fSWAyI)mHfmX-e znO_&a7VQ{2D5f*~=G)Bp@{a!KX0REflU7n<~OdkbT zP$GHIeSYIO@lUx%6a`hjoAKNkdBc9=`d5HF6|&b_uE`ap7t3PqY4F`kR2yc0|qK3==(10h_-#KzzgmXqUgZrWSq4!`MGDoeurQ zURht{?k{e@%=Ppa)qIQHU)(1B!z(>B+lZjr6X;!|znG$VBrdOq1C+5x#FENmUH_x~e`iuHe9k0K* zaTfg2eg5Lz&|mc6ANCiwpThIq{|3P0^cTJLb%~Z(c89(;^1L_nhs|GH`&RHxv;N}h zw*jB+2&BUQdxZive=$jX#B^wvzgS741!MojbVlee_NKq+XXmqByz}kg3zlPczp+eu z2z-NDBCvjA{rqD2dG4`0WPfpB)V-W@*iX^NbKsZm^OGyYKV{qi{RcG;+G+k_Kbc<$ zkCB~vbU*pD=Dls@^vc7}PO<49yPcAxR}o|S`urC@S5U&}w@{C^Lyt!EioM45$Xo9L z{o};<*axJ~x=)XoAbeur?KDnOlny~}Q-I?IX;Rb`y4~CiPS{>s`7WekeLC0EHD_ng zWvhU%TY4E;%x}%^17P%2xE@(iFd}!4Q18!Wj&c7A^)4^ld)KZ@|3RjW9zU($o5H>q z*`>^l%#!kS>fb-)`Xs*O^`OW3!hU+3`D$!ZUR)kAx}aT;r@KmzbG(85{skU5xZQ^T z5z4uHv7x9YgQNcY<2Bh0364wbeP=^aS7q4jJtJ3|lWT^*gZcievBR*R4vjzg*6hS` zHiLElYOL|NDU!Jy8W5WgmBC8h73CK0&AXqPp)-)Ba1d?{T~l(p^g` z@F(*wNKycb)vkG-2aQ`E&aTOke>X$5Rs| zAW|eR3}5>1Z{Y67C3OQ4@r3r{YW7Dk?pzc89QHFDXxgs9{?_$=qFwGiJpf9?0O&vD z4-mr0(MJ2G74tQQ&%Q**BWn4i>ULQ7>uJB=F;4su`W=cR*zPs(QRGnu>x-Ss;Gb21 z>c8LnXk35r3aV79HE|Nuaaq?*z5j7yMu)8ustkVg((IMYk~6KsTKY`@M|m}^PG~&1 z?w{i8PiN%ezm!}eAO-8Z1wrBt4;^-Ks@UK!S?JH#e;2KEm6VlD>e+|Z&2|L z@tqAjO$8OQhirJv_v!DkVIV(6#jE5ecw@J&)a>V*8o*U+<(rq>Z;fAzNMl35s!3pvBTi+>tOrsT6a_7QY3zqaKfg8h zUP3>WhzQ!eVw%Qbt^FI?c^E`Iuh_o$+dR=fRx@ft`hy&1zNm3rMkNZXpVqx!5VZ06 zMmRn-D?mf?^Jd8Q@kSB+k^*qd&va&drG6M0{GMF5&oi#v!2PV#eH!@}^m)b&;_EW?93a$Vpaah{mgkXPCwXD! ze}Xa3*x&R#qgg-Y?dQcG!5y=Q6Y_W zi;5cNsGna<f=Tp#|D*+{E!EdEJ{;x{CmCeR;jr10dKq%72hTQz#F@D zrFJ`1v!@)>jq<*={%p|Y{-sdvYj%TOWaK?~(C>TXV?PQ!_J>->x#5+%fdV{lI8B|! zXcyw{>$SbtdQWC4=%?>gP3FUxO1KrMMQNN?T`eR_4J>ngE z9Bi88f2@YfoOb8$2lzYMBR^8p?S{kvlWhMTG%nrl@8y-cYCM{_NoV5EroBeH-6=nhx^K@4$pO~u-4p*^ zBmODZh@ha#bMrhq!#^DV-6Zntkezmr_^*@CjGk_{To`;54>T4&S4jc3j~N<1YL8q{ z``oIwSpT#AUA~3K!Sh~z{$lA(;PZ8m3jgmF3fTO`IPnqFpdJokNo>c4ZHsFH7XKXHWgk9~e9NPZ&UAg>ecr;LT4;1YK@z}X++6L-k| z;^5D?mvb)5`NU6gPIp4T`7Wb>J7dS0_8RqlE9Pr?-0ZI5c^P?`&>6w|F3R(veW;%d zMmpPt488cpY0n?kZ>;<@_+=WvvHmUS>xNz-{wY(>0YbkKbYQ=6@54ya!I zGsT;^jP6K%J6H2^FA?1i>hpU@RPRP6Uszn0@06)}PCGU+G<$Q2#+Vyq( zJWIcz$#{hCdG3-Pgk6ZM)qa9%CQs{Sp9ELv|;~cH@TT3Jly3cRz ze+-l=hx{Wc!1f74_FD%(3i6DQ-FTdSt8#(tapo0zp539Z&G_sM{b8HuT=-S+PqXu! z>0bvv+Yv~G|Mv<7Y<}wi@e$LZ)qX46KkL7V7L5H9(;4ABXK&7P`q}x+7aP9;zPL*Y z(yoVANe_WNfY-9kRc;PvgWL%lM12Kd@mSpXGe)x4rR#UK*Tru?bD8 z_P?LR?Hu%bo?A8jbAHeBmt+aspTYW`=VjGR`AJdvwa`TL@;M3QcR=+=K3P6&{}wnj z1>lIc1gIC?ypiOSs^;=>SJud^$ zf2ZRvJnj+|bYQKfH%5-ow3$#bcy%xxR19AoDpsxz^=#F_-s6Kv3iVE>IvxU$;~K{) zf~N`6CjFE;zV=yu+V1<#>v!I`^VXfWBYjnp$UaZrdUHZ`{bNr)rTzVPKK1D@%4DRE z$ySxi_19Ik)Zy1|yXbw``$4b6|BwP91wsmh6zCNS9Q-}z&ct6ip1%DFPSpMKTH zL7kG{&y_!QR6dK0X7}m8`-g-NAhte_6ZY3BI|MyS0iIV#6T?^NJ{SHPC(KvnKS!FJ zT*tbu=JW(zwhH*Vxj#h~^V`AoRQTO*MM2(QApGt(gY5O0cg8e_SVIZI5J9`L2Tan{bvirwYl&&bS#rJwGu4?SCpzZ%X@HiNSBacXR zI$nBkebb!2Gycorz6a0y^9u?r`pw5S{~pWZA>B2r+58qrIQ9!(Tg=WY^!oMVa?)!t z`>Jz&JN%q1mZMw0YZ=_}I2iJU2VbzgI`S79Nf>)Ip)(Ht{_5er`@KN^-59#zvF`6H z>3PNo6CkMa)la^4S})fS%_U*CA7F7sGNqTVk)5>|) z!N0^!t<$|3xfJyGYHyPZTBe=@1eLEl&9@v+jeiDo86$aN?2%yX$KSs14kdmEcOCk-IxFQ(9Y4U;I4o8J!?`@)wuHZpMif?X*@CT^E=pX8u%O3 zsBlGsK^>QM-Q<1v!+rVd33J4B{l5GaiU->+TcU!U9UBJnH>h}<_|Ar%rh-kfhirJv z_tSr6!$3a!1uNtycw@J&)O`0l=_=p6TZ9se(!@VKDt5cDVoZ1ag*|A`xBdtmQhkS1rYW%G$U@8G)3 zZwHU>hVzMvg1tMR=>FaB)qeor#90v{DHj7b`#rc6`JJ$!Q zma`c-Um{D@)D;h>i6K16hjcrk8aL5E2@CA?DP54G!DagMMZI@zk#Q8Ui;#2 zuY~b8&FbJ&LwB7i?HnFAjpMRC>mq|T@BjVb`1IfpwvX5M!7s*+WxhG+dy|KoXWaY( z+8+b(m}e|^)p%$6=KdQuR-yYd`m!_rHZ|)Doca$EC8_I}ID4??8O#5QTU-Zl$iJY^ zGp-X~m#OCfVY~=*;CaTW1g|~-~EpLF76nk5$D0($D-46&9v8O-?YZ> z2zs13PW%!2A8KsW+rNkX?)M1A3s-2s+5Nw#iT*Ld1PH3!=1#(YUJBty1yke|Y1uKd`^=jo0c@yB*4YNu#`Ptv?%d Rxj!Dtea&>Ri|Xb6{{#4sdMy9| literal 0 HcmV?d00001 From 3684fa63935fbc40b2bcac5d48cc8ef38c7d0fa1 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Tue, 23 Apr 2019 10:52:05 +0200 Subject: [PATCH 20/25] Numpy in windows creates int32 arrays by default. Need to cast to int64 explicitly to match stored table. --- pandas/tests/io/test_pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index bc48e13215dcc..09fdd15a18046 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4771,7 +4771,7 @@ def test_legacy_table_table_format_read(self, datapath): columns = [column_numeric, column_str_1, column_str_2, column_dt, column_multi_1, column_multi_2] - data = np.arange(0, 16).reshape(4, 4) + data = np.arange(0, 16).reshape(4, 4).astype('int64') with ensure_clean_store( datapath('io', 'data', 'legacy_hdf', From 54c1657311d7161e2c9f5cb4aa010bf508c087f1 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Fri, 17 May 2019 17:18:49 +0200 Subject: [PATCH 21/25] celanup --- pandas/io/pytables.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 19b660a5ea038..e8c6f7efacf12 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -41,8 +41,6 @@ from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, pprint_thing -from tables.exceptions import NoSuchNodeError - # versioning attribute _version = '0.15.2' From 96d0ec6d615a252760faf844b009e5eb7d4af4b0 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Fri, 26 Jul 2019 11:14:06 +0200 Subject: [PATCH 22/25] Included unsaved changes for rebase. Fixed typo. Corrected Docstring. --- pandas/io/pytables.py | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 94674b4801957..84df5999c594c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2436,7 +2436,7 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None): return self def get_attr(self): - """ get the data for this colummn """ + """ get the data for this column """ # reading tables prior to 0.x.x self.values = getattr(self.attrs, self.kind_attr, None) @@ -3518,7 +3518,7 @@ def set_info(self): self.attrs.info = self.info def set_non_index_axes(self): - """ Write the axes to carrays """ + """ Write the axes to vlarrays """ group = self.attrs._v_node def f(dim, flds): @@ -3536,8 +3536,7 @@ def f(dim, flds): self.attrs.non_index_axes = replacement def get_non_index_axes(self): - """Load the non-index axes from their carrays. This is a pass-through - for tables stored prior to v0.xx""" + """Load the non-index axes from their vlarrays.""" def f(dim, flds): if isinstance(flds, str): flds = self._handle.get_node(self.attrs._v_node, flds)[:] @@ -3569,28 +3568,6 @@ def set_attrs(self): def get_attrs(self): """ retrieve our attributes """ -<<<<<<< HEAD - self.data_columns = getattr( - self.attrs, 'data_columns', None) or [] - self.info = getattr( - self.attrs, 'info', None) or dict() - self.nan_rep = getattr(self.attrs, 'nan_rep', None) - self.encoding = _ensure_encoding( - getattr(self.attrs, 'encoding', None)) - self.errors = _ensure_decoded(getattr(self.attrs, 'errors', 'strict')) - self.levels = getattr( - self.attrs, 'levels', None) or [] - self.index_axes = [ - a.infer(self) for a in self.indexables if a.is_an_indexable - ] - self.values_axes = [ - a.infer(self) for a in self.indexables if not a.is_an_indexable - ] - self.metadata = getattr( - self.attrs, 'metadata', None) or [] - self.non_index_axes = self.get_non_index_axes() -======= - self.non_index_axes = getattr(self.attrs, "non_index_axes", None) or [] self.data_columns = getattr(self.attrs, "data_columns", None) or [] self.info = getattr(self.attrs, "info", None) or dict() self.nan_rep = getattr(self.attrs, "nan_rep", None) @@ -3602,7 +3579,8 @@ def get_attrs(self): a.infer(self) for a in self.indexables if not a.is_an_indexable ] self.metadata = getattr(self.attrs, "metadata", None) or [] ->>>>>>> upstream/master + self.non_index_axes = self.get_non_index_axes() + def validate_version(self, where=None): """ are we trying to operate on an old version? """ From 4d0466e5006a2d01fde70301709ea3221ea9e8ba Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Fri, 26 Jul 2019 11:26:14 +0200 Subject: [PATCH 23/25] Black refromatting --- pandas/io/pytables.py | 20 +++++------ pandas/tests/io/pytables/test_pytables.py | 42 +++++++++++++---------- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 84df5999c594c..93d27a8bd4374 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2442,9 +2442,10 @@ def get_attr(self): if self.values is None: try: - data = self.handle.get_node(self.attrs._v_node._v_parent, - self.kind_attr)[:] - data = np.array(data, dtype='object') + data = self.handle.get_node( + self.attrs._v_node._v_parent, self.kind_attr + )[:] + data = np.array(data, dtype="object") # check for multiindex if len(data.shape) > 1 and data.shape[1] > 1: self.values = list(map(tuple, data.tolist())) @@ -2463,8 +2464,7 @@ def set_attr(self): if key in group: self.handle.remove_node(group, key) - vlarray = self.handle.create_vlarray(group, key, - _tables().ObjectAtom()) + vlarray = self.handle.create_vlarray(group, key, _tables().ObjectAtom()) for fld in self.values: vlarray.append(fld) @@ -3526,8 +3526,7 @@ def f(dim, flds): if key in group: self.handle.remove_node(group, key) - vlarray = self._handle.create_vlarray(group, key, - _tables().ObjectAtom()) + vlarray = self._handle.create_vlarray(group, key, _tables().ObjectAtom()) for fld in flds: vlarray.append(fld) return dim, key @@ -3537,10 +3536,11 @@ def f(dim, flds): def get_non_index_axes(self): """Load the non-index axes from their vlarrays.""" + def f(dim, flds): if isinstance(flds, str): flds = self._handle.get_node(self.attrs._v_node, flds)[:] - flds = np.array(flds, dtype='object') + flds = np.array(flds, dtype="object") if len(flds.shape) > 1 and flds.shape[1] > 1: flds = list(map(tuple, flds.tolist())) else: @@ -3548,7 +3548,8 @@ def f(dim, flds): return dim, flds else: return dim, flds # if not a string presumably pre v0.xx list - non_index_axes = getattr(self.attrs, 'non_index_axes', []) + + non_index_axes = getattr(self.attrs, "non_index_axes", []) new = [f(dim, flds) for dim, flds in non_index_axes] return new @@ -3581,7 +3582,6 @@ def get_attrs(self): self.metadata = getattr(self.attrs, "metadata", None) or [] self.non_index_axes = self.get_non_index_axes() - def validate_version(self, where=None): """ are we trying to operate on an old version? """ if where is not None: diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index 121f33f2b3031..3491e84e37dfe 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -4935,8 +4935,8 @@ def test_wide_table_format(self): df = DataFrame(np.random.random((10, 10000))) with ensure_clean_path(self.path) as path: - df.to_hdf(path, 'df', format='table') - reread = read_hdf(path, 'df') + df.to_hdf(path, "df", format="table") + reread = read_hdf(path, "df") assert_frame_equal(df, reread) def test_append_wide_table_format(self): @@ -4947,8 +4947,8 @@ def test_append_wide_table_format(self): df2 = DataFrame(np.random.random((10, 10000))) with ensure_clean_path(self.path) as path: - df1.to_hdf(path, 'df', format='table') - df2.to_hdf(path, 'df', append=True) + df1.to_hdf(path, "df", format="table") + df2.to_hdf(path, "df", append=True) reread = read_hdf(path) assert_frame_equal(pd.concat([df1, df2]), reread) @@ -4958,25 +4958,29 @@ def test_legacy_table_table_format_read(self, datapath): # saved as pytables metadata column_numeric = [1, 2, 3, 4] - column_str_1 = ['A', 'B', 'C', 'D'] - column_str_2 = ['Ä', 'Ö', 'Â', 'é'] - column_dt = pd.date_range('19700101', '19700104') - column_multi_1 = pd.MultiIndex.from_tuples( - zip(column_numeric, column_str_1)) - column_multi_2 = pd.MultiIndex.from_tuples( - zip(column_str_2, column_dt)) - - columns = [column_numeric, column_str_1, column_str_2, column_dt, - column_multi_1, column_multi_2] + column_str_1 = ["A", "B", "C", "D"] + column_str_2 = ["Ä", "Ö", "Â", "é"] + column_dt = pd.date_range("19700101", "19700104") + column_multi_1 = pd.MultiIndex.from_tuples(zip(column_numeric, column_str_1)) + column_multi_2 = pd.MultiIndex.from_tuples(zip(column_str_2, column_dt)) + + columns = [ + column_numeric, + column_str_1, + column_str_2, + column_dt, + column_multi_1, + column_multi_2, + ] - data = np.arange(0, 16).reshape(4, 4).astype('int64') + data = np.arange(0, 16).reshape(4, 4).astype("int64") with ensure_clean_store( - datapath('io', 'data', 'legacy_hdf', - 'legacy_table_table_format.h5'), - mode='r') as store: + datapath("io", "data", "legacy_hdf", "legacy_table_table_format.h5"), + mode="r", + ) as store: for i, column in enumerate(columns): - table_name = 'table_{}'.format(i) + table_name = "table_{}".format(i) df = pd.DataFrame(data, columns=column) tm.assert_frame_equal(store[table_name], df) From af10a712ea58632fd57824f71907c04e533444c4 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Fri, 26 Jul 2019 11:59:39 +0200 Subject: [PATCH 24/25] Fix for blosc compression test case --- pandas/tests/io/pytables/test_pytables.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index 3491e84e37dfe..f98e28dbcb8b7 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -874,8 +874,10 @@ def test_complibs_default_settings(self): assert node.filters.complevel == 0 assert node.filters.complib is None for node in h5file.walk_nodes(where="/dfc", classname="Leaf"): - assert node.filters.complevel == 9 - assert node.filters.complib == "blosc" + # only check table, skip column + if node.name == 'table': + assert node.filters.complevel == 9 + assert node.filters.complib == 'blosc' def test_complibs(self): # GH14478 From f725d20bd121c54c44b2b2418d1759541855cf36 Mon Sep 17 00:00:00 2001 From: Peter Tillmann Date: Fri, 26 Jul 2019 12:02:54 +0200 Subject: [PATCH 25/25] black reformating --- pandas/tests/io/pytables/test_pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index f98e28dbcb8b7..c337cfd8e944d 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -875,9 +875,9 @@ def test_complibs_default_settings(self): assert node.filters.complib is None for node in h5file.walk_nodes(where="/dfc", classname="Leaf"): # only check table, skip column - if node.name == 'table': + if node.name == "table": assert node.filters.complevel == 9 - assert node.filters.complib == 'blosc' + assert node.filters.complib == "blosc" def test_complibs(self): # GH14478