|
3 | 3 | datetime,
|
4 | 4 | timedelta,
|
5 | 5 | )
|
| 6 | +import io |
6 | 7 | from itertools import product
|
7 | 8 | import re
|
8 | 9 |
|
@@ -2827,3 +2828,42 @@ def test_pivot_margins_with_none_index(self):
|
2827 | 2828 | ),
|
2828 | 2829 | )
|
2829 | 2830 | tm.assert_frame_equal(result, expected)
|
| 2831 | + |
| 2832 | + def test_pivot_with_pyarrow_categorical(self): |
| 2833 | + # GH#53051 |
| 2834 | + |
| 2835 | + # Create dataframe with categorical colum |
| 2836 | + df = ( |
| 2837 | + pd.DataFrame([("A", 1), ("B", 2), ("C", 3)], columns=["string_column", "number_column"]) |
| 2838 | + .astype({"string_column": "string", "number_column": "float32"}) |
| 2839 | + .astype({"string_column": "category", "number_column": "float32"}) |
| 2840 | + ) |
| 2841 | + |
| 2842 | + # Convert dataframe to pyarrow backend |
| 2843 | + with io.BytesIO() as buffer: |
| 2844 | + df.to_parquet(buffer) |
| 2845 | + buffer.seek(0) # Reset buffer position |
| 2846 | + df = pd.read_parquet(buffer, dtype_backend="pyarrow") |
| 2847 | + |
| 2848 | + |
| 2849 | + # Check that pivot works |
| 2850 | + df = df.pivot(columns=["string_column"], values=["number_column"]) |
| 2851 | + |
| 2852 | + # Assert that values of result are correct to prevent silent failure |
| 2853 | + multi_index = pd.MultiIndex.from_arrays( |
| 2854 | + [ |
| 2855 | + ["number_column", "number_column", "number_column"], |
| 2856 | + ["A", "B", "C"] |
| 2857 | + ], |
| 2858 | + names=(None, "string_column") |
| 2859 | + ) |
| 2860 | + df_expected = pd.DataFrame( |
| 2861 | + [ |
| 2862 | + [1.0, np.nan, np.nan], |
| 2863 | + [np.nan, 2.0, np.nan], |
| 2864 | + [np.nan, np.nan, 3.0] |
| 2865 | + ], |
| 2866 | + columns=multi_index |
| 2867 | + ) |
| 2868 | + tm.assert_frame_equal(df, df_expected, check_dtype=False, check_column_type=False) |
| 2869 | + |
0 commit comments