pandas-dev · mroeschke · Apr 3, 2025 · Feb 22, 2025 · Mar 1, 2025 · Mar 1, 2025
diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py
@@ -67,6 +67,14 @@ class NumericEngineIndexing:
     def setup(self, engine_and_dtype, index_type, unique, N):
         engine, dtype = engine_and_dtype
 
+        if (
+            index_type == "non_monotonic"
+            and dtype in [np.int16, np.int8, np.uint8]
+            and unique
+        ):
+            # Values overflow
+            raise NotImplementedError
+
         if index_type == "monotonic_incr":
             if unique:
                 arr = np.arange(N * 3, dtype=dtype)
@@ -115,6 +123,14 @@ def setup(self, engine_and_dtype, index_type, unique, N):
         engine, dtype = engine_and_dtype
         dtype = dtype.lower()
 
+        if (
+            index_type == "non_monotonic"
+            and dtype in ["int16", "int8", "uint8"]
+            and unique
+        ):
+            # Values overflow
+            raise NotImplementedError
+
         if index_type == "monotonic_incr":
             if unique:
                 arr = np.arange(N * 3, dtype=dtype)

diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst
@@ -383,7 +383,7 @@ In Python, since ``a`` is a list, you can simply use list comprehension.
 
 .. ipython:: python
 
-   a = np.array(list(range(1, 24)) + [np.NAN]).reshape(2, 3, 4)
+   a = np.array(list(range(1, 24)) + [np.nan]).reshape(2, 3, 4)
    pd.DataFrame([tuple(list(x) + [val]) for x, val in np.ndenumerate(a)])
 
 meltlist
@@ -402,7 +402,7 @@ In Python, this list would be a list of tuples, so
 
 .. ipython:: python
 
-   a = list(enumerate(list(range(1, 5)) + [np.NAN]))
+   a = list(enumerate(list(range(1, 5)) + [np.nan]))
    pd.DataFrame(a)
 
 For more details and examples see :ref:`the Intro to Data Structures

diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
@@ -2063,6 +2063,7 @@ or a passed ``Series``), then it will be preserved in DataFrame operations. Furt
 different numeric dtypes will **NOT** be combined. The following example will give you a taste.
 
 .. ipython:: python
+   :okwarning:
 
    df1 = pd.DataFrame(np.random.randn(8, 1), columns=["A"], dtype="float32")
    df1

diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst
@@ -171,6 +171,7 @@ can be improved by passing an ``np.ndarray``.
    In [4]: %%cython
       ...: cimport numpy as np
       ...: import numpy as np
+      ...: np.import_array()
       ...: cdef double f_typed(double x) except? -2:
       ...:     return x * (x - 1)
       ...: cpdef double integrate_f_typed(double a, double b, int N):
@@ -225,6 +226,7 @@ and ``wraparound`` checks can yield more performance.
       ...: cimport cython
       ...: cimport numpy as np
       ...: import numpy as np
+      ...: np.import_array()
       ...: cdef np.float64_t f_typed(np.float64_t x) except? -2:
       ...:     return x * (x - 1)
       ...: cpdef np.float64_t integrate_f_typed(np.float64_t a, np.float64_t b, np.int64_t N):

diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst
@@ -73,6 +73,7 @@ Dtypes
 Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, or a passed ``Series``, then it will be preserved in DataFrame operations. Furthermore, different numeric dtypes will **NOT** be combined. The following example will give you a taste.
 
 .. ipython:: python
+   :okwarning:
 
    df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32')
    df1

diff --git a/environment.yml b/environment.yml
@@ -23,7 +23,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy<2
+  - numpy<3
 
   # optional dependencies
   - beautifulsoup4>=4.11.2

diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py
@@ -36,8 +36,8 @@
                 r".*In the future `np\.long` will be defined as.*",
                 FutureWarning,
             )
-            np_long = np.long  # type: ignore[attr-defined]
-            np_ulong = np.ulong  # type: ignore[attr-defined]
+            np_long = np.long
+            np_ulong = np.ulong
     except AttributeError:
         np_long = np.int_
         np_ulong = np.uint

diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
@@ -351,7 +351,7 @@ def register_dataframe_accessor(name: str) -> Callable[[TypeT], TypeT]:
 AttributeError: The series must contain integer data only.
 >>> df = pd.Series([1, 2, 3])
 >>> df.int_accessor.sum()
-6"""
+np.int64(6)"""
 
 
 @doc(_register_accessor, klass="Series", examples=_register_series_examples)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -415,7 +415,7 @@ def unique(values):
 
     >>> pd.unique(pd.array([1 + 1j, 2, 3]))
     <NumpyExtensionArray>
-    [(1+1j), (2+0j), (3+0j)]
+    [np.complex128(1+1j), np.complex128(2+0j), np.complex128(3+0j)]
     Length: 3, dtype: complex128
     """
     return unique_with_mask(values)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -1072,7 +1072,7 @@ def interpolate(
         ...     limit_area="inside",
         ... )
         <NumpyExtensionArray>
-        [0.0, 1.0, 2.0, 3.0]
+        [np.float64(0.0), np.float64(1.0), np.float64(2.0), np.float64(3.0)]
         Length: 4, dtype: float64
 
         Interpolating values in a FloatingArray:
@@ -1962,7 +1962,7 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
         ...         return lambda x: "*" + str(x) + "*" if boxed else repr(x) + "*"
         >>> MyExtensionArray(np.array([1, 2, 3, 4]))
         <MyExtensionArray>
-        [1*, 2*, 3*, 4*]
+        [np.int64(1)*, np.int64(2)*, np.int64(3)*, np.int64(4)*]
         Length: 4, dtype: int64
         """
         if boxed:
@@ -2176,11 +2176,11 @@ def _reduce(
         Examples
         --------
         >>> pd.array([1, 2, 3])._reduce("min")
-        1
+        np.int64(1)
         >>> pd.array([1, 2, 3])._reduce("max")
-        3
+        np.int64(3)
         >>> pd.array([1, 2, 3])._reduce("sum")
-        6
+        np.int64(6)
         >>> pd.array([1, 2, 3])._reduce("mean")
         2.0
         >>> pd.array([1, 2, 3])._reduce("median")

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -275,7 +275,7 @@ def _unbox_scalar(
         --------
         >>> arr = pd.array(np.array(["1970-01-01"], "datetime64[ns]"))
         >>> arr._unbox_scalar(arr[0])
-        numpy.datetime64('1970-01-01T00:00:00.000000000')
+        np.datetime64('1970-01-01T00:00:00.000000000')
         """
         raise AbstractMethodError(self)
 

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -1775,7 +1775,8 @@ def to_tuples(self, na_tuple: bool = True) -> np.ndarray:
         [(0, 1], (1, 2]]
         Length: 2, dtype: interval[int64, right]
         >>> idx.to_tuples()
-        array([(0, 1), (1, 2)], dtype=object)
+        array([(np.int64(0), np.int64(1)), (np.int64(1), np.int64(2))],
+          dtype=object)
 
         For :class:`pandas.IntervalIndex`:
 

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -1470,17 +1470,17 @@ def all(
         skips NAs):
 
         >>> pd.array([True, True, pd.NA]).all()
-        True
+        np.True_
         >>> pd.array([1, 1, pd.NA]).all()
-        True
+        np.True_
         >>> pd.array([True, False, pd.NA]).all()
-        False
+        np.False_
         >>> pd.array([], dtype="boolean").all()
-        True
+        np.True_
         >>> pd.array([pd.NA], dtype="boolean").all()
-        True
+        np.True_
         >>> pd.array([pd.NA], dtype="Float64").all()
-        True
+        np.True_
 
         With ``skipna=False``, the result can be NA if this is logically
         required (whether ``pd.NA`` is True or False influences the result):

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -80,7 +80,7 @@ class NumpyExtensionArray(  # type: ignore[misc]
     --------
     >>> pd.arrays.NumpyExtensionArray(np.array([0, 1, 2, 3]))
     <NumpyExtensionArray>
-    [0, 1, 2, 3]
+    [np.int64(0), np.int64(1), np.int64(2), np.int64(3)]
     Length: 4, dtype: int64
     """
 

diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
@@ -297,7 +297,7 @@ class SparseFrameAccessor(BaseAccessor, PandasDelegate):
     --------
     >>> df = pd.DataFrame({"a": [1, 2, 0, 0], "b": [3, 0, 0, 4]}, dtype="Sparse[int]")
     >>> df.sparse.density
-    0.5
+    np.float64(0.5)
     """
 
     def _validate(self, data) -> None:
@@ -459,7 +459,7 @@ def density(self) -> float:
         --------
         >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0, 1])})
         >>> df.sparse.density
-        0.5
+        np.float64(0.5)
         """
         tmp = np.mean([column.array.density for _, column in self._parent.items()])
         return tmp

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -558,7 +558,7 @@ def array(self) -> ExtensionArray:
 
         >>> pd.Series([1, 2, 3]).array
         <NumpyExtensionArray>
-        [1, 2, 3]
+        [np.int64(1), np.int64(2), np.int64(3)]
         Length: 3, dtype: int64
 
         For extension types, like Categorical, the actual ExtensionArray
@@ -804,9 +804,9 @@ def argmax(
         dtype: float64
 
         >>> s.argmax()
-        2
+        np.int64(2)
         >>> s.argmin()
-        0
+        np.int64(0)
 
         The maximum cereal calories is the third element and
         the minimum cereal calories is the first element,
@@ -1360,7 +1360,7 @@ def factorize(
         dtype: int64
 
         >>> ser.searchsorted(4)
-        3
+        np.int64(3)
 
         >>> ser.searchsorted([0, 4])
         array([0, 3])

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -246,7 +246,8 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi
         with warnings.catch_warnings():
             # Can remove warning filter once NumPy 1.24 is min version
             if not np_version_gte1p24:
-                warnings.simplefilter("ignore", np.VisibleDeprecationWarning)
+                # np.VisibleDeprecationWarning only in np.exceptions in 2.0
+                warnings.simplefilter("ignore", np.VisibleDeprecationWarning)  # type: ignore[attr-defined]
             result = np.asarray(values, dtype=dtype)
     except ValueError:
         # Using try/except since it's more performant than checking is_list_like

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -177,7 +177,7 @@ def array(
 
     >>> pd.array(["a", "b"], dtype=str)
     <NumpyExtensionArray>
-    ['a', 'b']
+    [np.str_('a'), np.str_('b')]
     Length: 2, dtype: str32
 
     This would instead return the new ExtensionArray dedicated for string

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -428,9 +428,9 @@ def array_equivalent(
     Examples
     --------
     >>> array_equivalent(np.array([1, 2, np.nan]), np.array([1, 2, np.nan]))
-    True
+    np.True_
     >>> array_equivalent(np.array([1, np.nan, 2]), np.array([1, 2, np.nan]))
-    False
+    np.False_
     """
     left, right = np.asarray(left), np.asarray(right)
 
@@ -626,7 +626,7 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
     >>> na_value_for_dtype(np.dtype("bool"))
     False
     >>> na_value_for_dtype(np.dtype("datetime64[ns]"))
-    numpy.datetime64('NaT')
+    np.datetime64('NaT')
     """
 
     if isinstance(dtype, ExtensionDtype):

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -887,7 +887,7 @@ def squeeze(self, axis: Axis | None = None) -> Scalar | Series | DataFrame:
         dtype: int64
 
         >>> even_primes.squeeze()
-        2
+        np.int64(2)
 
         Squeezing objects with more than one value in every axis does nothing:
 
@@ -7954,7 +7954,7 @@ def asof(self, where, subset=None):
         dtype: float64
 
         >>> s.asof(20)
-        2.0
+        np.float64(2.0)
 
         For a sequence `where`, a Series is returned. The first value is
         NaN, because the first element of `where` is before the first

@@ -546,7 +546,8 @@ def groups(self) -> dict[Hashable, Index]:
         2023-02-15    4
         dtype: int64
         >>> ser.resample("MS").groups
-        {Timestamp('2023-01-01 00:00:00'): 2, Timestamp('2023-02-01 00:00:00'): 4}
+        {Timestamp('2023-01-01 00:00:00'): np.int64(2),
+         Timestamp('2023-02-01 00:00:00'): np.int64(4)}
         """
         if isinstance(self.keys, list) and len(self.keys) == 1:
             warnings.warn(
@@ -613,7 +614,7 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
         toucan  1  5  6
         eagle   7  8  9
         >>> df.groupby(by=["a"]).indices
-        {1: array([0, 1]), 7: array([2])}
+        {np.int64(1): array([0, 1]), np.int64(7): array([2])}
 
         For Resampler:
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -265,7 +265,7 @@ def iloc(self) -> _iLocIndexer:
         With scalar integers.
 
         >>> df.iloc[0, 1]
-        2
+        np.int64(2)
 
         With lists of integers.
 
@@ -375,7 +375,7 @@ def loc(self) -> _LocIndexer:
         Single label for row and column
 
         >>> df.loc["cobra", "shield"]
-        2
+        np.int64(2)
 
         Slice with labels for row and single label for column. As mentioned
         above, note that both the start and stop of the slice are included.
@@ -666,18 +666,18 @@ def at(self) -> _AtIndexer:
         Get value at specified row/column pair
 
         >>> df.at[4, "B"]
-        2
+        np.int64(2)
 
         Set value at specified row/column pair
 
         >>> df.at[4, "B"] = 10
         >>> df.at[4, "B"]
-        10
+        np.int64(10)
 
         Get value within a Series
 
         >>> df.loc[5].at["B"]
-        4
+        np.int64(5)
         """
         return _AtIndexer("at", self)
 
@@ -715,18 +715,18 @@ def iat(self) -> _iAtIndexer:
         Get value at specified row/column pair
 
         >>> df.iat[1, 2]
-        1
+        np.int64(1)
 
         Set value at specified row/column pair
 
         >>> df.iat[1, 2] = 10
         >>> df.iat[1, 2]
-        10
+        np.int64(10)
 
         Get value within a series
 
         >>> df.loc[0].iat[1]
-        2
+        np.int64(2)
         """
         return _iAtIndexer("iat", self)