Skip to content

Commit 794e060

Browse files
forbdonutjreback
authored andcommitted
BUG: Fix read of py3 PeriodIndex DataFrame HDF made in py2 (#16781) (#16790)
In Python3, reading a DataFrame with a PeriodIndex from an HDF file created in Python2 would incorrectly return a DataFrame with an Int64Index.
1 parent 5e776fb commit 794e060

File tree

4 files changed

+23
-2
lines changed

4 files changed

+23
-2
lines changed

Diff for: doc/source/whatsnew/v0.20.3.txt

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Bug Fixes
3838
~~~~~~~~~
3939
- Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`)
4040
- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`)
41+
- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`)
4142

4243

4344

Diff for: pandas/io/pytables.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2591,8 +2591,8 @@ def read_index_node(self, node, start=None, stop=None):
25912591
if 'name' in node._v_attrs:
25922592
name = _ensure_str(node._v_attrs.name)
25932593

2594-
index_class = self._alias_to_class(getattr(node._v_attrs,
2595-
'index_class', ''))
2594+
index_class = self._alias_to_class(_ensure_decoded(
2595+
getattr(node._v_attrs, 'index_class', '')))
25962596
factory = self._get_index_factory(index_class)
25972597

25982598
kwargs = {}
7.14 KB
Binary file not shown.

Diff for: pandas/tests/io/test_pytables.py

+20
Original file line numberDiff line numberDiff line change
@@ -5264,6 +5264,26 @@ def test_fspath(self):
52645264
with pd.HDFStore(path) as store:
52655265
assert os.fspath(store) == str(path)
52665266

5267+
def test_read_py2_hdf_file_in_py3(self):
5268+
# GH 16781
5269+
5270+
# tests reading a PeriodIndex DataFrame written in Python2 in Python3
5271+
5272+
# the file was generated in Python 2.7 like so:
5273+
#
5274+
# df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex(
5275+
# ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
5276+
# df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')
5277+
5278+
expected = pd.DataFrame([1., 2, 3], index=pd.PeriodIndex(
5279+
['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
5280+
5281+
with ensure_clean_store(
5282+
tm.get_data_path('periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
5283+
mode='r') as store:
5284+
result = store['p']
5285+
assert_frame_equal(result, expected)
5286+
52675287

52685288
class TestHDFComplexValues(Base):
52695289
# GH10447

0 commit comments

Comments
 (0)