Skip to content

Commit 00a825f

Browse files
authored
Backport PR #52470 on branch 2.0.x (BUG: describe not returning ArrowDtype) (#52495)
Backport PR #52470: BUG: describe not returning ArrowDtype
1 parent 31d1de3 commit 00a825f

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

Diff for: doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Fixed regressions
2121
Bug fixes
2222
~~~~~~~~~
2323
- Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`)
24+
- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
2425

2526
.. ---------------------------------------------------------------------------
2627
.. _whatsnew_201.other:

Diff for: pandas/core/methods/describe.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737
is_timedelta64_dtype,
3838
)
3939

40-
import pandas as pd
40+
from pandas.core.arrays.arrow.dtype import ArrowDtype
41+
from pandas.core.arrays.floating import Float64Dtype
4142
from pandas.core.reshape.concat import concat
4243

4344
from pandas.io.formats.format import format_percentiles
@@ -230,7 +231,12 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
230231
# GH#48340 - always return float on non-complex numeric data
231232
dtype: DtypeObj | None
232233
if is_extension_array_dtype(series):
233-
dtype = pd.Float64Dtype()
234+
if isinstance(series.dtype, ArrowDtype):
235+
import pyarrow as pa
236+
237+
dtype = ArrowDtype(pa.float64())
238+
else:
239+
dtype = Float64Dtype()
234240
elif is_numeric_dtype(series) and not is_complex_dtype(series):
235241
dtype = np.dtype("float")
236242
else:

Diff for: pandas/tests/extension/test_arrow.py

+13
Original file line numberDiff line numberDiff line change
@@ -2343,3 +2343,16 @@ def test_setitem_boolean_replace_with_mask_segfault():
23432343
expected = arr.copy()
23442344
arr[np.zeros((N,), dtype=np.bool_)] = False
23452345
assert arr._data == expected._data
2346+
2347+
2348+
@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES)
2349+
def test_describe_numeric_data(pa_type):
2350+
# GH 52470
2351+
data = pd.Series([1, 2, 3], dtype=ArrowDtype(pa_type))
2352+
result = data.describe()
2353+
expected = pd.Series(
2354+
[3, 2, 1, 1, 1.5, 2.0, 2.5, 3],
2355+
dtype=ArrowDtype(pa.float64()),
2356+
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
2357+
)
2358+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)