Skip to content

Commit c8ca4ee

Browse files
authored
PERF: Return RangeIndex columns instead of Index for str.partition with ArrowDtype (#57768)
1 parent 9dc7a74 commit c8ca4ee

File tree

3 files changed

+12
-9
lines changed

3 files changed

+12
-9
lines changed

Diff for: doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ Performance improvements
260260
- :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`)
261261
- :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`)
262262
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
263+
- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`)
263264
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
264265
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
265266
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)

Diff for: pandas/core/strings/accessor.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -321,18 +321,16 @@ def _wrap_result(
321321
new_values.append(row)
322322
pa_type = result._pa_array.type
323323
result = ArrowExtensionArray(pa.array(new_values, type=pa_type))
324-
if name is not None:
325-
labels = name
326-
else:
327-
labels = range(max_len)
324+
if name is None:
325+
name = range(max_len)
328326
result = (
329327
pa.compute.list_flatten(result._pa_array)
330328
.to_numpy()
331329
.reshape(len(result), max_len)
332330
)
333331
result = {
334332
label: ArrowExtensionArray(pa.array(res))
335-
for label, res in zip(labels, result.T)
333+
for label, res in zip(name, result.T)
336334
}
337335
elif is_object_dtype(result):
338336

Diff for: pandas/tests/extension/test_arrow.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -2268,19 +2268,23 @@ def test_str_partition():
22682268
ser = pd.Series(["abcba", None], dtype=ArrowDtype(pa.string()))
22692269
result = ser.str.partition("b")
22702270
expected = pd.DataFrame(
2271-
[["a", "b", "cba"], [None, None, None]], dtype=ArrowDtype(pa.string())
2271+
[["a", "b", "cba"], [None, None, None]],
2272+
dtype=ArrowDtype(pa.string()),
2273+
columns=pd.RangeIndex(3),
22722274
)
2273-
tm.assert_frame_equal(result, expected)
2275+
tm.assert_frame_equal(result, expected, check_column_type=True)
22742276

22752277
result = ser.str.partition("b", expand=False)
22762278
expected = pd.Series(ArrowExtensionArray(pa.array([["a", "b", "cba"], None])))
22772279
tm.assert_series_equal(result, expected)
22782280

22792281
result = ser.str.rpartition("b")
22802282
expected = pd.DataFrame(
2281-
[["abc", "b", "a"], [None, None, None]], dtype=ArrowDtype(pa.string())
2283+
[["abc", "b", "a"], [None, None, None]],
2284+
dtype=ArrowDtype(pa.string()),
2285+
columns=pd.RangeIndex(3),
22822286
)
2283-
tm.assert_frame_equal(result, expected)
2287+
tm.assert_frame_equal(result, expected, check_column_type=True)
22842288

22852289
result = ser.str.rpartition("b", expand=False)
22862290
expected = pd.Series(ArrowExtensionArray(pa.array([["abc", "b", "a"], None])))

0 commit comments

Comments
 (0)