PERF: Return RangeIndex columns instead of Index for str.partition with ArrowDtype (#57768)

mroeschke · web-flow · commit c8ca4ee95a0d · 2024-03-10T23:22:42.000-04:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -260,6 +260,7 @@ Performance improvements
 - :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`)
 - :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`)
 - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
+- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`)
 - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
 - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
 - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
@@ -321,18 +321,16 @@ def _wrap_result(
                             new_values.append(row)
                         pa_type = result._pa_array.type
                         result = ArrowExtensionArray(pa.array(new_values, type=pa_type))
-                if name is not None:
-                    labels = name
-                else:
-                    labels = range(max_len)
+                if name is None:
+                    name = range(max_len)
                 result = (
                     pa.compute.list_flatten(result._pa_array)
                     .to_numpy()
                     .reshape(len(result), max_len)
                 )
                 result = {
                     label: ArrowExtensionArray(pa.array(res))
-                    for label, res in zip(labels, result.T)
+                    for label, res in zip(name, result.T)
                 }
             elif is_object_dtype(result):
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -2268,19 +2268,23 @@ def test_str_partition():
     ser = pd.Series(["abcba", None], dtype=ArrowDtype(pa.string()))
     result = ser.str.partition("b")
     expected = pd.DataFrame(
-        [["a", "b", "cba"], [None, None, None]], dtype=ArrowDtype(pa.string())
+        [["a", "b", "cba"], [None, None, None]],
+        dtype=ArrowDtype(pa.string()),
+        columns=pd.RangeIndex(3),
     )
-    tm.assert_frame_equal(result, expected)
+    tm.assert_frame_equal(result, expected, check_column_type=True)
 
     result = ser.str.partition("b", expand=False)
     expected = pd.Series(ArrowExtensionArray(pa.array([["a", "b", "cba"], None])))
     tm.assert_series_equal(result, expected)
 
     result = ser.str.rpartition("b")
     expected = pd.DataFrame(
-        [["abc", "b", "a"], [None, None, None]], dtype=ArrowDtype(pa.string())
+        [["abc", "b", "a"], [None, None, None]],
+        dtype=ArrowDtype(pa.string()),
+        columns=pd.RangeIndex(3),
     )
-    tm.assert_frame_equal(result, expected)
+    tm.assert_frame_equal(result, expected, check_column_type=True)
 
     result = ser.str.rpartition("b", expand=False)
     expected = pd.Series(ArrowExtensionArray(pa.array([["abc", "b", "a"], None])))