Skip to content

Commit ca4a4fd

Browse files
authored
* Speed up selecting row-groups (#856)
* xfail warning test
1 parent e4b16f8 commit ca4a4fd

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

fastparquet/api.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -311,11 +311,14 @@ def __getitem__(self, item):
311311
new_rgs = self.row_groups[item]
312312
if not isinstance(new_rgs, list):
313313
new_rgs = [new_rgs]
314-
new_pf = copy.deepcopy(self)
315-
new_pf.fmd.row_groups = new_rgs
316-
new_pf._set_attrs()
317-
# would otherwise be "simple" when selecting one rg
318-
new_pf.file_scheme = self.file_scheme
314+
new_pf = object.__new__(ParquetFile)
315+
fmd = copy.copy(self.fmd)
316+
fmd.row_groups = new_rgs
317+
new_pf.__setstate__(
318+
{"fn": self.fn, "open": self.open, "fmd": fmd,
319+
"pandas_nulls": self.pandas_nulls, "_base_dtype": self._base_dtype,
320+
"tz": self.tz}
321+
)
319322
return new_pf
320323

321324
def __len__(self):

fastparquet/test/test_dataframe.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from unittest import mock
33

44
import pandas as pd
5+
import pytest
56
from numpy import empty as np_empty
67
from pandas.testing import assert_frame_equal
78

@@ -33,6 +34,7 @@ def test_empty():
3334
assert len(views) == 5
3435

3536

37+
@pytest.mark.xfail(reason="df._data is going away")
3638
def test_empty_tz_utc():
3739
with warnings.catch_warnings():
3840
warnings.simplefilter("error")

0 commit comments

Comments
 (0)