From 873f5f54d157a254d0dece48782639203b07b842 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 7 Oct 2022 19:06:44 +0200 Subject: [PATCH 1/4] REGR: to_parquet raising with bytes filename --- doc/source/whatsnew/v1.5.1.rst | 1 + pandas/io/parquet.py | 2 ++ pandas/tests/io/test_parquet.py | 10 ++++++++++ 3 files changed, 13 insertions(+) diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst index 4d7576c013fd6..310094dcccdaa 100644 --- a/doc/source/whatsnew/v1.5.1.rst +++ b/doc/source/whatsnew/v1.5.1.rst @@ -84,6 +84,7 @@ Fixed regressions - Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`) - Fixed regression in :meth:`DataFrame.apply` when passing non-zero ``axis`` via keyword argument (:issue:`48656`) - Fixed regression in :meth:`Series.groupby` and :meth:`DataFrame.groupby` when the grouper is a nullable data type (e.g. :class:`Int64`) or a PyArrow-backed string array, contains null values, and ``dropna=False`` (:issue:`48794`) +- Fixed regrssion in :meth:`DataFrame.to_parquet` raising when file name was specified as ``bytes`` (:issue:`48944`) - Fixed regression in :class:`ExcelWriter` where the ``book`` attribute could no longer be set; however setting this attribute is now deprecated and this ability will be removed in a future version of pandas (:issue:`48780`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 6f297457ced41..6b7a10b7fad63 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -189,6 +189,8 @@ def write( and isinstance(path_or_handle.name, (str, bytes)) ): path_or_handle = path_or_handle.name + if isinstance(path_or_handle, bytes): + path_or_handle = path_or_handle.decode() try: if partition_cols is not None: diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 66312468b53c9..56210e21a63f1 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1179,3 +1179,13 @@ def test_close_file_handle_on_read_error(self): read_parquet(path, engine="fastparquet") # The next line raises an error on Windows if the file is still open pathlib.Path(path).unlink(missing_ok=False) + + def test_bytes_file_name(self): + # GH#48944 + df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]}) + with tm.ensure_clean("test.parquet") as path: + with open(path.encode(), "wb") as f: + df.to_parquet(f) + + result = read_parquet(path) + tm.assert_frame_equal(result, df) From 3e6088229cfa7a5e3b614e59c124356b4fb54724 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 7 Oct 2022 22:25:56 +0200 Subject: [PATCH 2/4] Add check --- pandas/tests/io/test_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 56210e21a63f1..a102c4e02f6ca 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1180,7 +1180,7 @@ def test_close_file_handle_on_read_error(self): # The next line raises an error on Windows if the file is still open pathlib.Path(path).unlink(missing_ok=False) - def test_bytes_file_name(self): + def test_bytes_file_name(self, fp): # GH#48944 df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]}) with tm.ensure_clean("test.parquet") as path: From e93dd0032a66cefea6b499f2d8d4b666009016e2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 10 Oct 2022 14:58:52 +0200 Subject: [PATCH 3/4] Fix typo --- doc/source/whatsnew/v1.5.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst index 310094dcccdaa..72f83c8d78662 100644 --- a/doc/source/whatsnew/v1.5.1.rst +++ b/doc/source/whatsnew/v1.5.1.rst @@ -84,7 +84,7 @@ Fixed regressions - Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`) - Fixed regression in :meth:`DataFrame.apply` when passing non-zero ``axis`` via keyword argument (:issue:`48656`) - Fixed regression in :meth:`Series.groupby` and :meth:`DataFrame.groupby` when the grouper is a nullable data type (e.g. :class:`Int64`) or a PyArrow-backed string array, contains null values, and ``dropna=False`` (:issue:`48794`) -- Fixed regrssion in :meth:`DataFrame.to_parquet` raising when file name was specified as ``bytes`` (:issue:`48944`) +- Fixed regression in :meth:`DataFrame.to_parquet` raising when file name was specified as ``bytes`` (:issue:`48944`) - Fixed regression in :class:`ExcelWriter` where the ``book`` attribute could no longer be set; however setting this attribute is now deprecated and this ability will be removed in a future version of pandas (:issue:`48780`) .. --------------------------------------------------------------------------- From c3b258ba2b975f00b3fc36f249b7b70e5120a76f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 11 Oct 2022 23:10:04 +0200 Subject: [PATCH 4/4] Parametrize --- pandas/tests/io/test_parquet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index a102c4e02f6ca..9f47c220a111b 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1180,12 +1180,12 @@ def test_close_file_handle_on_read_error(self): # The next line raises an error on Windows if the file is still open pathlib.Path(path).unlink(missing_ok=False) - def test_bytes_file_name(self, fp): + def test_bytes_file_name(self, engine): # GH#48944 df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]}) with tm.ensure_clean("test.parquet") as path: with open(path.encode(), "wb") as f: df.to_parquet(f) - result = read_parquet(path) + result = read_parquet(path, engine=engine) tm.assert_frame_equal(result, df)