Merge branch 'main' into improve_error_message_for_missing_dependencies

chilin0525 · chilin0525 · commit 4efb33aa4aa0 · 2025-03-13T07:11:26.000+08:00
diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
@@ -37,6 +37,7 @@ Other enhancements
   updated to work correctly with NumPy >= 2 (:issue:`57739`)
 - :meth:`Series.str.decode` result now has ``StringDtype`` when ``future.infer_string`` is True (:issue:`60709`)
 - :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype``  (:issue:`60663`)
+- Improved ``repr`` of :class:`.NumpyExtensionArray` to account for NEP51 (:issue:`61085`)
 - The :meth:`Series.str.decode` has gained the argument ``dtype`` to control the dtype of the result (:issue:`60940`)
 - The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns (:issue:`60633`)
 - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -782,6 +782,7 @@ Reshaping
 ^^^^^^^^^
 - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
 - Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
+- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
 - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
 - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
 - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -1938,7 +1938,10 @@ def _explode(self):
         """
         # child class explode method supports only list types; return
         # default implementation for non list types.
-        if not pa.types.is_list(self.dtype.pyarrow_dtype):
+        if not (
+            pa.types.is_list(self.dtype.pyarrow_dtype)
+            or pa.types.is_large_list(self.dtype.pyarrow_dtype)
+        ):
             return super()._explode()
         values = self
         counts = pa.compute.list_value_length(values._pa_array)
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -2,6 +2,7 @@
 
 from typing import (
     TYPE_CHECKING,
+    Any,
     Literal,
 )
 
@@ -29,6 +30,8 @@
 from pandas.core.strings.object_array import ObjectStringArrayMixin
 
 if TYPE_CHECKING:
+    from collections.abc import Callable
+
     from pandas._typing import (
         AxisInt,
         Dtype,
@@ -565,3 +568,12 @@ def _wrap_ndarray_result(self, result: np.ndarray):
 
             return TimedeltaArray._simple_new(result, dtype=result.dtype)
         return type(self)(result)
+
+    def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
+        # NEP 51: https://github.com/numpy/numpy/pull/22449
+        if self.dtype.kind in "SU":
+            return "'{}'".format
+        elif self.dtype == "object":
+            return repr
+        else:
+            return str
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -4922,6 +4922,10 @@ def values(self) -> ArrayLike:
            :meth:`Index.to_numpy`, depending on whether you need
            a reference to the underlying data or a NumPy array.
 
+        .. versionchanged:: 3.0.0
+
+           The returned array is read-only.
+
         Returns
         -------
         array: numpy.ndarray or ExtensionArray
diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py
@@ -22,15 +22,15 @@
         np.array([True, False], dtype=bool),
         np.array([0, 1], dtype="datetime64[ns]"),
         np.array([0, 1], dtype="timedelta64[ns]"),
-    ]
+    ],
 )
 def any_numpy_array(request):
     """
     Parametrized fixture for NumPy arrays with different dtypes.
 
     This excludes string and bytes.
     """
-    return request.param
+    return request.param.copy()
 
 
 # ----------------------------------------------------------------------------
@@ -323,3 +323,30 @@ def test_factorize_unsigned():
     tm.assert_numpy_array_equal(res_codes, exp_codes)
 
     tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique))
+
+
+# ----------------------------------------------------------------------------
+# Output formatting
+
+
+def test_array_repr(any_numpy_array):
+    # GH#61085
+    nparray = any_numpy_array
+    arr = NumpyExtensionArray(nparray)
+    if nparray.dtype == "object":
+        values = "['a', 'b']"
+    elif nparray.dtype == "float64":
+        values = "[0.0, 1.0]"
+    elif str(nparray.dtype).startswith("int"):
+        values = "[0, 1]"
+    elif nparray.dtype == "complex128":
+        values = "[0j, (1+2j)]"
+    elif nparray.dtype == "bool":
+        values = "[True, False]"
+    elif nparray.dtype == "datetime64[ns]":
+        values = "[1970-01-01T00:00:00.000000000, 1970-01-01T00:00:00.000000001]"
+    elif nparray.dtype == "timedelta64[ns]":
+        values = "[0 nanoseconds, 1 nanoseconds]"
+    expected = f"<NumpyExtensionArray>\n{values}\nLength: 2, dtype: {nparray.dtype}"
+    result = repr(arr)
+    assert result == expected, f"{result} vs {expected}"
diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py
@@ -145,8 +145,9 @@ def test_explode_scalars_can_ignore_index():
 
 
 @pytest.mark.parametrize("ignore_index", [True, False])
-def test_explode_pyarrow_list_type(ignore_index):
-    # GH 53602
+@pytest.mark.parametrize("list_type", ["list_", "large_list"])
+def test_explode_pyarrow_list_type(ignore_index, list_type):
+    # GH 53602, 61091
     pa = pytest.importorskip("pyarrow")
 
     data = [
@@ -156,7 +157,7 @@ def test_explode_pyarrow_list_type(ignore_index):
         [2, 3],
         None,
     ]
-    ser = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
+    ser = pd.Series(data, dtype=pd.ArrowDtype(getattr(pa, list_type)(pa.int64())))
     result = ser.explode(ignore_index=ignore_index)
     expected = pd.Series(
         data=[None, None, 1, None, 2, 3, None],