Skip to content

Commit 4efb33a

Browse files
committed
Merge branch 'main' into improve_error_message_for_missing_dependencies
2 parents 734a707 + 3c93d06 commit 4efb33a

File tree

7 files changed

+55
-6
lines changed

7 files changed

+55
-6
lines changed

Diff for: doc/source/whatsnew/v2.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Other enhancements
3737
updated to work correctly with NumPy >= 2 (:issue:`57739`)
3838
- :meth:`Series.str.decode` result now has ``StringDtype`` when ``future.infer_string`` is True (:issue:`60709`)
3939
- :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype`` (:issue:`60663`)
40+
- Improved ``repr`` of :class:`.NumpyExtensionArray` to account for NEP51 (:issue:`61085`)
4041
- The :meth:`Series.str.decode` has gained the argument ``dtype`` to control the dtype of the result (:issue:`60940`)
4142
- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns (:issue:`60633`)
4243
- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)

Diff for: doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,7 @@ Reshaping
782782
^^^^^^^^^
783783
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
784784
- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
785+
- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
785786
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
786787
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
787788
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)

Diff for: pandas/core/arrays/arrow/array.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1938,7 +1938,10 @@ def _explode(self):
19381938
"""
19391939
# child class explode method supports only list types; return
19401940
# default implementation for non list types.
1941-
if not pa.types.is_list(self.dtype.pyarrow_dtype):
1941+
if not (
1942+
pa.types.is_list(self.dtype.pyarrow_dtype)
1943+
or pa.types.is_large_list(self.dtype.pyarrow_dtype)
1944+
):
19421945
return super()._explode()
19431946
values = self
19441947
counts = pa.compute.list_value_length(values._pa_array)

Diff for: pandas/core/arrays/numpy_.py

+12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from typing import (
44
TYPE_CHECKING,
5+
Any,
56
Literal,
67
)
78

@@ -29,6 +30,8 @@
2930
from pandas.core.strings.object_array import ObjectStringArrayMixin
3031

3132
if TYPE_CHECKING:
33+
from collections.abc import Callable
34+
3235
from pandas._typing import (
3336
AxisInt,
3437
Dtype,
@@ -565,3 +568,12 @@ def _wrap_ndarray_result(self, result: np.ndarray):
565568

566569
return TimedeltaArray._simple_new(result, dtype=result.dtype)
567570
return type(self)(result)
571+
572+
def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
573+
# NEP 51: https://github.com/numpy/numpy/pull/22449
574+
if self.dtype.kind in "SU":
575+
return "'{}'".format
576+
elif self.dtype == "object":
577+
return repr
578+
else:
579+
return str

Diff for: pandas/core/indexes/base.py

+4
Original file line numberDiff line numberDiff line change
@@ -4922,6 +4922,10 @@ def values(self) -> ArrayLike:
49224922
:meth:`Index.to_numpy`, depending on whether you need
49234923
a reference to the underlying data or a NumPy array.
49244924
4925+
.. versionchanged:: 3.0.0
4926+
4927+
The returned array is read-only.
4928+
49254929
Returns
49264930
-------
49274931
array: numpy.ndarray or ExtensionArray

Diff for: pandas/tests/arrays/numpy_/test_numpy.py

+29-2
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,15 @@
2222
np.array([True, False], dtype=bool),
2323
np.array([0, 1], dtype="datetime64[ns]"),
2424
np.array([0, 1], dtype="timedelta64[ns]"),
25-
]
25+
],
2626
)
2727
def any_numpy_array(request):
2828
"""
2929
Parametrized fixture for NumPy arrays with different dtypes.
3030
3131
This excludes string and bytes.
3232
"""
33-
return request.param
33+
return request.param.copy()
3434

3535

3636
# ----------------------------------------------------------------------------
@@ -323,3 +323,30 @@ def test_factorize_unsigned():
323323
tm.assert_numpy_array_equal(res_codes, exp_codes)
324324

325325
tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique))
326+
327+
328+
# ----------------------------------------------------------------------------
329+
# Output formatting
330+
331+
332+
def test_array_repr(any_numpy_array):
333+
# GH#61085
334+
nparray = any_numpy_array
335+
arr = NumpyExtensionArray(nparray)
336+
if nparray.dtype == "object":
337+
values = "['a', 'b']"
338+
elif nparray.dtype == "float64":
339+
values = "[0.0, 1.0]"
340+
elif str(nparray.dtype).startswith("int"):
341+
values = "[0, 1]"
342+
elif nparray.dtype == "complex128":
343+
values = "[0j, (1+2j)]"
344+
elif nparray.dtype == "bool":
345+
values = "[True, False]"
346+
elif nparray.dtype == "datetime64[ns]":
347+
values = "[1970-01-01T00:00:00.000000000, 1970-01-01T00:00:00.000000001]"
348+
elif nparray.dtype == "timedelta64[ns]":
349+
values = "[0 nanoseconds, 1 nanoseconds]"
350+
expected = f"<NumpyExtensionArray>\n{values}\nLength: 2, dtype: {nparray.dtype}"
351+
result = repr(arr)
352+
assert result == expected, f"{result} vs {expected}"

Diff for: pandas/tests/series/methods/test_explode.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,9 @@ def test_explode_scalars_can_ignore_index():
145145

146146

147147
@pytest.mark.parametrize("ignore_index", [True, False])
148-
def test_explode_pyarrow_list_type(ignore_index):
149-
# GH 53602
148+
@pytest.mark.parametrize("list_type", ["list_", "large_list"])
149+
def test_explode_pyarrow_list_type(ignore_index, list_type):
150+
# GH 53602, 61091
150151
pa = pytest.importorskip("pyarrow")
151152

152153
data = [
@@ -156,7 +157,7 @@ def test_explode_pyarrow_list_type(ignore_index):
156157
[2, 3],
157158
None,
158159
]
159-
ser = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
160+
ser = pd.Series(data, dtype=pd.ArrowDtype(getattr(pa, list_type)(pa.int64())))
160161
result = ser.explode(ignore_index=ignore_index)
161162
expected = pd.Series(
162163
data=[None, None, 1, None, 2, 3, None],

0 commit comments

Comments
 (0)