diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e6fafc8b1b14c..319e6999a76de 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -636,6 +636,7 @@ Bug fixes Categorical ^^^^^^^^^^^ - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) +- Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`) - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`) - diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 647530151d5f6..11102909b37f0 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -452,7 +452,7 @@ def __init__( if isinstance(values, Index): arr = values._data._pa_array.combine_chunks() else: - arr = values._pa_array.combine_chunks() + arr = extract_array(values)._pa_array.combine_chunks() categories = arr.dictionary.to_pandas(types_mapper=ArrowDtype) codes = arr.indices.to_numpy() dtype = CategoricalDtype(categories, values.dtype.pyarrow_dtype.ordered) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 46eee13755b2d..c868b358c8594 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -15,6 +15,7 @@ import pandas as pd from pandas import ( + ArrowDtype, Categorical, DataFrame, Grouper, @@ -2851,3 +2852,38 @@ def test_pivot_margins_with_none_index(self): ), ) tm.assert_frame_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning") + def test_pivot_with_pyarrow_categorical(self): + # GH#53051 + + pa = pytest.importorskip("pyarrow") + + # Create dataframe with categorical column + df = DataFrame( + {"string_column": ["A", "B", "C"], "number_column": [1, 2, 3]} + ).astype({"string_column": "category", "number_column": "float32"}) + + # Convert dataframe to pyarrow backend + df = df.astype( + { + "string_column": ArrowDtype(pa.dictionary(pa.int32(), pa.string())), + "number_column": "float[pyarrow]", + } + ) + + # Check that pivot works + df = df.pivot(columns=["string_column"], values=["number_column"]) + + # Assert that values of result are correct to prevent silent failure + multi_index = MultiIndex.from_arrays( + [["number_column", "number_column", "number_column"], ["A", "B", "C"]], + names=(None, "string_column"), + ) + df_expected = DataFrame( + [[1.0, np.nan, np.nan], [np.nan, 2.0, np.nan], [np.nan, np.nan, 3.0]], + columns=multi_index, + ) + tm.assert_frame_equal( + df, df_expected, check_dtype=False, check_column_type=False + ) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a23e6d9b3973a..502728e178e43 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -5,6 +5,7 @@ import pandas as pd from pandas import ( + ArrowDtype, DataFrame, MultiIndex, Series, @@ -318,6 +319,30 @@ def test_multiindex_dt_with_nan(self): expected = Series(["a", "b", "c", "d"], name=("sub", np.nan)) tm.assert_series_equal(result, expected) + # Ignore deprecation raised by old versions of pyarrow. Already fixed in + # newer versions + @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning") + def test_multiindex_with_pyarrow_categorical(self): + # GH#53051 + + pa = pytest.importorskip("pyarrow") + + # Create dataframe with categorical column + df = DataFrame( + {"string_column": ["A", "B", "C"], "number_column": [1, 2, 3]} + ).astype({"string_column": "category", "number_column": "float32"}) + + # Convert dataframe to pyarrow backend + df = df.astype( + { + "string_column": ArrowDtype(pa.dictionary(pa.int32(), pa.string())), + "number_column": "float[pyarrow]", + } + ) + + # Check that index can be set + df.set_index(["string_column", "number_column"]) + class TestSorted: """everything you wanted to test about sorting"""