Skip to content

Commit 4558733

Browse files
TST: Add tests for faulty behavior relating to pyarrow categoricals
1 parent 1999ec8 commit 4558733

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

pandas/tests/reshape/test_pivot.py

+40
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
datetime,
44
timedelta,
55
)
6+
import io
67
from itertools import product
78
import re
89

@@ -2827,3 +2828,42 @@ def test_pivot_margins_with_none_index(self):
28272828
),
28282829
)
28292830
tm.assert_frame_equal(result, expected)
2831+
2832+
def test_pivot_with_pyarrow_categorical(self):
2833+
# GH#53051
2834+
2835+
# Create dataframe with categorical colum
2836+
df = (
2837+
pd.DataFrame([("A", 1), ("B", 2), ("C", 3)], columns=["string_column", "number_column"])
2838+
.astype({"string_column": "string", "number_column": "float32"})
2839+
.astype({"string_column": "category", "number_column": "float32"})
2840+
)
2841+
2842+
# Convert dataframe to pyarrow backend
2843+
with io.BytesIO() as buffer:
2844+
df.to_parquet(buffer)
2845+
buffer.seek(0) # Reset buffer position
2846+
df = pd.read_parquet(buffer, dtype_backend="pyarrow")
2847+
2848+
2849+
# Check that pivot works
2850+
df = df.pivot(columns=["string_column"], values=["number_column"])
2851+
2852+
# Assert that values of result are correct to prevent silent failure
2853+
multi_index = pd.MultiIndex.from_arrays(
2854+
[
2855+
["number_column", "number_column", "number_column"],
2856+
["A", "B", "C"]
2857+
],
2858+
names=(None, "string_column")
2859+
)
2860+
df_expected = pd.DataFrame(
2861+
[
2862+
[1.0, np.nan, np.nan],
2863+
[np.nan, 2.0, np.nan],
2864+
[np.nan, np.nan, 3.0]
2865+
],
2866+
columns=multi_index
2867+
)
2868+
tm.assert_frame_equal(df, df_expected, check_dtype=False, check_column_type=False)
2869+

pandas/tests/test_multilevel.py

+21
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
import io
23

34
import numpy as np
45
import pytest
@@ -318,6 +319,26 @@ def test_multiindex_dt_with_nan(self):
318319
expected = Series(["a", "b", "c", "d"], name=("sub", np.nan))
319320
tm.assert_series_equal(result, expected)
320321

322+
def test_multiindex_with_pyarrow_categorical(self):
323+
# GH#53051
324+
325+
# Create dataframe with categorical colum
326+
df = (
327+
pd.DataFrame([("A", 1), ("B", 2), ("C", 3)], columns=["string_column", "number_column"])
328+
.astype({"string_column": "string", "number_column": "float32"})
329+
.astype({"string_column": "category", "number_column": "float32"})
330+
)
331+
332+
# Convert dataframe to pyarrow backend
333+
with io.BytesIO() as buffer:
334+
df.to_parquet(buffer)
335+
buffer.seek(0) # Reset buffer position
336+
df = pd.read_parquet(buffer, dtype_backend="pyarrow")
337+
338+
339+
# Check that index can be set
340+
df.set_index(["string_column", "number_column"])
341+
321342

322343
class TestSorted:
323344
"""everything you wanted to test about sorting"""

0 commit comments

Comments
 (0)