From 11de0d24b368f29083067de95c0707afa4368b0f Mon Sep 17 00:00:00 2001 From: hugo Date: Sat, 22 Aug 2015 15:34:23 -0400 Subject: [PATCH 1/4] wip --- pandas/io/pytables.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8ef6363f836ae..dc7d5ba45837c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2100,6 +2100,7 @@ def get_attr(self): def set_attr(self): """ set the data for this colummn """ + import pdb;pdb.set_trace() setattr(self.attrs, self.kind_attr, self.values) setattr(self.attrs, self.meta_attr, self.meta) if self.dtype is not None: @@ -3061,12 +3062,19 @@ def set_info(self): """ update our table index info """ self.attrs.info = self.info + def set_non_index_axes(self): + for dim, flds in self.non_index_axes: + name = "non_index_axes_%d" % dim + self._handle.create_carray(self.attrs._v_node._v_pathname, name, obj=np.array(flds)) + def set_attrs(self): """ set our table type & indexables """ self.attrs.table_type = str(self.table_type) self.attrs.index_cols = self.index_cols() self.attrs.values_cols = self.values_cols() - self.attrs.non_index_axes = self.non_index_axes + + #self.attrs.non_index_axes = self.non_index_axes + self.set_non_index_axes() self.attrs.data_columns = self.data_columns self.attrs.nan_rep = self.nan_rep self.attrs.encoding = self.encoding From 143985aa3140d902c42015f86af2f61ffefd50ed Mon Sep 17 00:00:00 2001 From: hugo Date: Sat, 22 Aug 2015 16:03:13 -0400 Subject: [PATCH 2/4] wip --- pandas/io/pytables.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index dc7d5ba45837c..80037f0b891ef 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -11,7 +11,8 @@ import itertools import warnings import os - +from six import string_types +from tables.exceptions import NoSuchNodeError import numpy as np from pandas import (Series, TimeSeries, DataFrame, Panel, Panel4D, Index, MultiIndex, Int64Index, Timestamp) @@ -1478,6 +1479,7 @@ def infer(self, handler): """infer this column from the table: create and return a new object""" table = handler.table new_self = self.copy() + new_self._handle = handler._handle new_self.set_table(table) new_self.get_attr() new_self.read_metadata(handler) @@ -1557,6 +1559,7 @@ def validate_names(self): pass def validate_and_set(self, handler, append, **kwargs): + self._handle = handler._handle self.set_table(handler.table) self.validate_col() self.validate_attr(append) @@ -2094,14 +2097,22 @@ def convert(self, values, nan_rep, encoding): def get_attr(self): """ get the data for this colummn """ self.values = getattr(self.attrs, self.kind_attr, None) + if self.values is None: + try: + self.values = self._handle.get_node(self.attrs._v_node._v_parent, + self.kind_attr)[:].tolist() + except NoSuchNodeError: + pass self.dtype = getattr(self.attrs, self.dtype_attr, None) self.meta = getattr(self.attrs, self.meta_attr, None) self.set_kind() def set_attr(self): """ set the data for this colummn """ - import pdb;pdb.set_trace() - setattr(self.attrs, self.kind_attr, self.values) + #setattr(self.attrs, self.kind_attr, self.values) + self._handle.create_carray(self.attrs._v_node._v_parent, + self.kind_attr, + obj=np.array(self.values)) setattr(self.attrs, self.meta_attr, self.meta) if self.dtype is not None: setattr(self.attrs, self.dtype_attr, self.dtype) @@ -3063,9 +3074,21 @@ def set_info(self): self.attrs.info = self.info def set_non_index_axes(self): + replacement = [] for dim, flds in self.non_index_axes: name = "non_index_axes_%d" % dim - self._handle.create_carray(self.attrs._v_node._v_pathname, name, obj=np.array(flds)) + self._handle.create_carray(self.attrs._v_node, name, obj=np.array(flds)) + replacement.append((dim, name)) + self.attrs.non_index_axes = replacement + + def get_non_index_axes(self): + non_index_axes = getattr(self.attrs, 'non_index_axes', []) + new = [] + for dim, flds in non_index_axes: + if isinstance(flds, string_types): + flds = self._handle.get_node(self.attrs._v_node, flds)[:].tolist() + new.append((dim, flds)) + return new def set_attrs(self): """ set our table type & indexables """ @@ -3084,8 +3107,7 @@ def set_attrs(self): def get_attrs(self): """ retrieve our attributes """ - self.non_index_axes = getattr( - self.attrs, 'non_index_axes', None) or [] + self.non_index_axes = self.get_non_index_axes() self.data_columns = getattr( self.attrs, 'data_columns', None) or [] self.info = getattr( From d548e0f3f2de2244d6196bde05f05b9912101830 Mon Sep 17 00:00:00 2001 From: Michael Booth Date: Mon, 7 Dec 2015 10:37:07 -0500 Subject: [PATCH 3/4] Merge jreback's suggested changes. Also handle rewrite and MultiIndex. And, make sure that non_index_axes is preserved if it was written by legacy code. --- pandas/io/pytables.py | 67 +++++++++++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 96089910e4c6f..d33aa614f8067 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -11,8 +11,7 @@ import itertools import warnings import os -from six import string_types -from tables.exceptions import NoSuchNodeError + import numpy as np import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, Index, @@ -42,6 +41,8 @@ import pandas.algos as algos import pandas.tslib as tslib +from tables.exceptions import NoSuchNodeError, NodeError + from contextlib import contextmanager from distutils.version import LooseVersion @@ -1513,6 +1514,10 @@ def cvalues(self): """ return my cython values """ return self.values + @property + def handle(self): + return self._handle + def __iter__(self): return iter(self.values) @@ -2048,8 +2053,12 @@ def get_attr(self): self.values = getattr(self.attrs, self.kind_attr, None) if self.values is None: try: - self.values = self._handle.get_node(self.attrs._v_node._v_parent, - self.kind_attr)[:].tolist() + data = self.handle.get_node(self.attrs._v_node._v_parent, self.kind_attr)[:] + if len(data.shape) > 1 and data.shape[1] > 1: # multiIndex + self.values = map(tuple, data.tolist()) + else: + self.values = data.tolist() + except NoSuchNodeError: pass self.dtype = getattr(self.attrs, self.dtype_attr, None) @@ -2059,9 +2068,18 @@ def get_attr(self): def set_attr(self): """ set the data for this colummn """ #setattr(self.attrs, self.kind_attr, self.values) - self._handle.create_carray(self.attrs._v_node._v_parent, + try: + self.handle.create_carray(self.attrs._v_node._v_parent, self.kind_attr, obj=np.array(self.values)) + except NodeError as e: + self.handle.remove_node(self.attrs._v_node._v_parent, + self.kind_attr) + self.handle.create_carray(self.attrs._v_node._v_parent, + self.kind_attr, + obj=np.array(self.values)) + except Exception as e: # for debugging + raise setattr(self.attrs, self.meta_attr, self.meta) if self.dtype is not None: setattr(self.attrs, self.dtype_attr, self.dtype) @@ -3033,20 +3051,39 @@ def set_info(self): self.attrs.info = self.info def set_non_index_axes(self): - replacement = [] - for dim, flds in self.non_index_axes: - name = "non_index_axes_%d" % dim - self._handle.create_carray(self.attrs._v_node, name, obj=np.array(flds)) - replacement.append((dim, name)) + """ Write the axes to carrays """ + def f(dim, flds): + name = "non_index_axes_%d" % dim + try: + self._handle.create_carray(self.attrs._v_node, name, obj=np.array(flds)) + except ValueError as e: + # Should probably make this check: + #if e.message == "unknown type: 'object'": + # raise ValueError("axis {} has dtype 'object' which cannot be saved to carray".format(dim)) + raise + except NodeError as e: + self._handle.remove_node(self.attrs._v_node, name) + self._handle.create_carray(self.attrs._v_node, name, obj=np.array(flds)) + return dim, flds + + replacement = [f(dim, flds) for dim, flds in self.non_index_axes] self.attrs.non_index_axes = replacement def get_non_index_axes(self): - non_index_axes = getattr(self.attrs, 'non_index_axes', []) - new = [] - for dim, flds in non_index_axes: + """Load the non-index axes from their carrays. This is a pass-through + for tables stored prior to v0.17""" + def f(dim, flds): if isinstance(flds, string_types): - flds = self._handle.get_node(self.attrs._v_node, flds)[:].tolist() - new.append((dim, flds)) + flds = self._handle.get_node(self.attrs._v_node, flds)[:] + if len(flds.shape) > 1 and flds.shape[1] > 1: + flds = map(tuple, flds.tolist()) + else: + flds = flds.tolist() + return dim, flds + else: + return dim, flds #if not a string presumably pre v17 list + non_index_axes = getattr(self.attrs, 'non_index_axes', []) + new = [f(dim, flds) for dim, flds in non_index_axes] return new def set_attrs(self): From 23aacfe9a994158463cc439777b21887f7f6daf4 Mon Sep 17 00:00:00 2001 From: Michael Booth Date: Mon, 7 Dec 2015 12:18:07 -0500 Subject: [PATCH 4/4] Add test (and data) for reading legacy-format of non_index_axes. --- .../legacy_non_index_axes_0.17.1.h5 | Bin 0 -> 84608 bytes pandas/io/tests/test_pytables.py | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 pandas/io/tests/data/legacy_hdf/legacy_non_index_axes_0.17.1.h5 diff --git a/pandas/io/tests/data/legacy_hdf/legacy_non_index_axes_0.17.1.h5 b/pandas/io/tests/data/legacy_hdf/legacy_non_index_axes_0.17.1.h5 new file mode 100644 index 0000000000000000000000000000000000000000..bfbb9171f1bae9b8b9e83b1a59f72fbc7ec1d274 GIT binary patch literal 84608 zcmeI2O>7&-6~|{uNv3Gpmh({vO_Q=I94Sqrkdi-?n-z<$7|4=9vbF*gpjYH7W=t&^ zemIIzpiU4JZjlB##(YSvxy3@4cD%&6_u~YyEg`_T;|!*W-$>SWFG55&bB)&+K*S8AlgMUh2=< z2UDxmWt)FHVjdhdq_}R>mXBEb$*3u~Vap4-Oh%EU*q78+t7}8%DJBI-?0QvW0naV0AD3a@*3r7NT3X z{E7LQe4ff#evp1e{=2!w@9{tbdA1%?r!%>Hc5y+=^U@Ld^hAnsEI+MNhgKTPFFjkB zd3rvRFP>b?oth~W`{_?kq^;gTZL@owj@Sja&Yh%dr*(Q|zmc7CR3!s4t$9xQ$r8!$RzFJ<#fUH#bE^4%pN@vfU! z5RblU@h+j`UETG|ZnM~Wsm@+y;_6;XW&T$i?wS{JwS+Qp(@_V)`WL)Lvr_YoKD&zO zrzX-zCMLsicCXgc&KoqV<1_C~qUYA@9`%fzy{Nm8P;*qv^+vzqm%a63sa9?3Vy*vx zt~dGdYCRnv&!*z}F)cb4uTREX^{Kc!p#=hahQkH7+V+}(LEc}!(QPk3X2n*jwbJ=w zN*gr>6Y9FQD_s1VUo?Gm*FEYZ+k30q-guV!Sf!4Q=f`Ho;*`@pv^nnY;kavaQRs^ICwTugG3_{M1$v?%=$ZD`yq3#j)53J?Kd#H#ghQtf9$Cd-t+5Wv zm#IeVkC(~+czVk;z4YHCI#%n9jd!-hYAC51ya`iNIjoRbB z`L)KHTdiE=;iQknv#Ashp=iu;*>t=_AiY*)&5QG`>{VMXl~b3A2dzl@Mw&ra=}QFiAfgI)#qO&!jYJ6Yz#_r24>dNKP%zjLlWooWkaocb|zGy8eOQlS1nz%2xRnX>t%2Ti%` z`eRgV4D`JBkZ*8!3&k@NZ%<}3 z^RvZNabf0EhU?jv_?5!+D)8grJtnM|-lVrbq}T1|VSSFH((cLZ{5+{`S|gm7_W7Bw zt@YEz-Rp<<_w%$&jeQ$n#1+;Tj; zUFq@ic-V1-ORz7RHvvc8zZH+N9Qb5xtL*feI-VVmBmvx{+o00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9 z00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9 z00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900`VY1ROP}92)MZ9rB18(NEVN zAO;eTqdJP^0ThXR;#6N8TptLAib*y%w=Ylc)-hc)W(aHs#`x27!|+lty9K$ zi&Aq^ag*sm>#wN?jKw9}Ucy>-)R2nNPa#{FCkIyd((fVqmG-mTAFr7#)%=!Q@x4ZJ zwbn?s+?A@Ae6qayq<$j#gc_$i>lu3|=4bMG%Co&8T~0T;3fYC(3_%Fwx%HSjoyq01 ziwjylN^;VEdLpIu9JODUb+5xK4;#sPM2p#V80otXM;|fr$BnqUdf4QrZ63*;O+eP+ zJ9e1oYt17}S}UDzRxWxRysB2FTR(oh=kOKioAEcia=X;3)M(1})lXl>;i$dFcWi+p`&>jMtRXvIIsVD` z#b3{k8Tt3D_#jc|l1S8;-%I1IC_gh-nD z?)$hc7=2=UIrc-oM#s08TQ*{9{pj{`$F1C_>FwpL*lkauv~sy;-Tt@ReWrUJ!*OpP z*_YAhYmJsyPM&tFZLdk?f!-dyt}@Go+3YFPpRL!kLFA~ow)wU-yxaBA88?{=x3k;z zV8a9TW!LLLtA1JQVfXVT@vQgUJsO&?h4$JC9ky$$NgXkh72h1Xzv#SO-C^Er`01RV zCEuAhiYDRxVz={VbaxfUc#q?5Xw{pYw-Ylb=;#;3vE9#`?BBifmI%#T&h?-3f@SuF z*V*OHe6e>BH+wG78M;ZZzy0$Qk)~BrT!Q-3J3r&Pt8y;u_g*M-^JUIUIR2gT$`#j7 zHh6lzjQF?HI{Kj=bud4Yzy0&H={&&Q_{y%M1JpU|?&|4U+b_F~mt-k@(YS|o)bIOB z_xs8A@h#tS8?EFD&kipm#5Z{#*x|d^t%z^ob9%-vhh_!y66_FJ^k+))Aw%B(% z3+CqI*gg{*`G=o|L-&==O5bO6<_~a&xwUH>S3i zGpBu}&Zf4P>+bLE%8Q>|w%>tOqrkwM_@1ftM8X5h(+iWFswHK33 z^Fn$-zmO&$li(%a`9;C8-!+1Bue_0%RQmsE(zGD+vRgm@Jnxg-0{Qv589Nx#5w%<9 z=Nc;A3@)YUt0jMcQ*w}9vkwwrT>Eg*rfW7e`tv!lXYF(fWPXI7$n7?j`S_s6pEj@i zuz!qDBhr4yKWqp4otL}6>-k>QTl4&uK9rEX?(ZsMpZ{-(uZ~@MHj|j-Vmi;570cf_ z_&e=x?ehNv^Y=Ma7& z>-3=3osX{1?Au7*w{iXF+2y~#I=rDyPCx#)pS(VN<+kOW&%^W1;5(7x z>#zOn#qT@sZv5c)k5?lf-@g%m?b|%UZ|KlqFL z@q=eBJ@K#TL$B=pH#g_-_wnCe{bX_Nz@CHSiP6;AjZ02+$UMCK$;%FX4|4top9