pandas-dev
diff --git a/Diff for: ‎.github/CODEOWNERS
-2 b/Diff for: ‎.github/CODEOWNERS
-2
diff --git a/Diff for: ‎.github/workflows/wheels.yml
+1-1 b/Diff for: ‎.github/workflows/wheels.yml
+1-1
diff --git a/Diff for: ‎asv_bench/benchmarks/indexing_engines.py
+16 b/Diff for: ‎asv_bench/benchmarks/indexing_engines.py
+16
diff --git a/Diff for: ‎ci/meta.yaml
-1 b/Diff for: ‎ci/meta.yaml
-1
diff --git a/Diff for: ‎doc/source/development/debugging_extensions.rst
+1-1 b/Diff for: ‎doc/source/development/debugging_extensions.rst
+1-1
diff --git a/Diff for: ‎doc/source/getting_started/comparison/comparison_with_r.rst
+2-2 b/Diff for: ‎doc/source/getting_started/comparison/comparison_with_r.rst
+2-2
diff --git a/Diff for: ‎doc/source/user_guide/basics.rst
+8-8 b/Diff for: ‎doc/source/user_guide/basics.rst
+8-8
diff --git a/Diff for: ‎doc/source/user_guide/enhancingperf.rst
+2 b/Diff for: ‎doc/source/user_guide/enhancingperf.rst
+2
diff --git a/Diff for: ‎doc/source/whatsnew/v0.11.0.rst
+2-2 b/Diff for: ‎doc/source/whatsnew/v0.11.0.rst
+2-2
diff --git a/Diff for: ‎doc/source/whatsnew/v3.0.0.rst
+5 b/Diff for: ‎doc/source/whatsnew/v3.0.0.rst
+5
diff --git a/Diff for: ‎environment.yml
+1-1 b/Diff for: ‎environment.yml
+1-1
diff --git a/Diff for: ‎pandas/_libs/algos.pyx
+9-3 b/Diff for: ‎pandas/_libs/algos.pyx
+9-3
diff --git a/Diff for: ‎pandas/_libs/tslibs/period.pyx
+2-5 b/Diff for: ‎pandas/_libs/tslibs/period.pyx
+2-5
diff --git a/Diff for: ‎pandas/compat/numpy/__init__.py
+2-2 b/Diff for: ‎pandas/compat/numpy/__init__.py
+2-2
diff --git a/Diff for: ‎pandas/core/accessor.py
+1-1 b/Diff for: ‎pandas/core/accessor.py
+1-1
diff --git a/Diff for: ‎pandas/core/arrays/base.py
+9-9 b/Diff for: ‎pandas/core/arrays/base.py
+9-9
diff --git a/Diff for: ‎pandas/core/arrays/datetimelike.py
+1-1 b/Diff for: ‎pandas/core/arrays/datetimelike.py
+1-1
diff --git a/Diff for: ‎pandas/core/arrays/interval.py
+2-1 b/Diff for: ‎pandas/core/arrays/interval.py
+2-1
@@ -10,10 +10,8 @@ doc/source/development            @noatamir
 
 # pandas
 pandas/_libs/                     @WillAyd
-pandas/_libs/tslibs/*             @MarcoGorelli
 pandas/_typing.py                 @Dr-Irv
 pandas/core/groupby/*             @rhshadrach
-pandas/core/tools/datetimes.py    @MarcoGorelli
 pandas/io/excel/*                 @rhshadrach
 pandas/io/formats/style.py        @attack68
 pandas/io/formats/style_render.py @attack68
 
@@ -153,7 +153,7 @@ jobs:
         run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
 
       - name: Build wheels
-        uses: pypa/[email protected].1
+        uses: pypa/[email protected].2
         with:
          package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
         env:
 
@@ -67,6 +67,14 @@ class NumericEngineIndexing:
     def setup(self, engine_and_dtype, index_type, unique, N):
         engine, dtype = engine_and_dtype
 
+        if (
+            index_type == "non_monotonic"
+            and dtype in [np.int16, np.int8, np.uint8]
+            and unique
+        ):
+            # Values overflow
+            raise NotImplementedError
+
         if index_type == "monotonic_incr":
             if unique:
                 arr = np.arange(N * 3, dtype=dtype)
@@ -115,6 +123,14 @@ def setup(self, engine_and_dtype, index_type, unique, N):
         engine, dtype = engine_and_dtype
         dtype = dtype.lower()
 
+        if (
+            index_type == "non_monotonic"
+            and dtype in ["int16", "int8", "uint8"]
+            and unique
+        ):
+            # Values overflow
+            raise NotImplementedError
+
         if index_type == "monotonic_incr":
             if unique:
                 arr = np.arange(N * 3, dtype=dtype)
 
@@ -89,4 +89,3 @@ extra:
     - datapythonista
     - phofl
     - lithomas1
-    - marcogorelli
@@ -23,7 +23,7 @@ By default building pandas from source will generate a release build. To generat
 
 .. note::
 
-   conda environments update CFLAGS/CPPFLAGS with flags that are geared towards generating releases. If using conda, you may need to set ``CFLAGS="$CFLAGS -O0"`` and ``CPPFLAGS="$CPPFLAGS -O0"`` to ensure optimizations are turned off for debugging
+   conda environments update CFLAGS/CPPFLAGS with flags that are geared towards generating releases, and may work counter towards usage in a development environment. If using conda, you should unset these environment variables via ``export CFLAGS=`` and ``export CPPFLAGS=``
 
 By specifying ``builddir="debug"`` all of the targets will be built and placed in the debug directory relative to the project root. This helps to keep your debug and release artifacts separate; you are of course able to choose a different directory name or omit altogether if you do not care to separate build types.
 
 
@@ -383,7 +383,7 @@ In Python, since ``a`` is a list, you can simply use list comprehension.
 
 .. ipython:: python
 
-   a = np.array(list(range(1, 24)) + [np.NAN]).reshape(2, 3, 4)
+   a = np.array(list(range(1, 24)) + [np.nan]).reshape(2, 3, 4)
    pd.DataFrame([tuple(list(x) + [val]) for x, val in np.ndenumerate(a)])
 
 meltlist
@@ -402,7 +402,7 @@ In Python, this list would be a list of tuples, so
 
 .. ipython:: python
 
-   a = list(enumerate(list(range(1, 5)) + [np.NAN]))
+   a = list(enumerate(list(range(1, 5)) + [np.nan]))
    pd.DataFrame(a)
 
 For more details and examples see :ref:`the Intro to Data Structures
 
@@ -36,7 +36,7 @@ of elements to display is five, but you may pass a custom number.
 Attributes and underlying data
 ------------------------------
 
-pandas objects have a number of attributes enabling you to access the metadata
+pandas objects have a number of attributes enabling you to access the metadata.
 
 * **shape**: gives the axis dimensions of the object, consistent with ndarray
 * Axis labels
@@ -59,7 +59,7 @@ NumPy's type system to add support for custom arrays
 (see :ref:`basics.dtypes`).
 
 To get the actual data inside a :class:`Index` or :class:`Series`, use
-the ``.array`` property
+the ``.array`` property.
 
 .. ipython:: python
 
@@ -88,18 +88,18 @@ NumPy doesn't have a dtype to represent timezone-aware datetimes, so there
 are two possibly useful representations:
 
 1. An object-dtype :class:`numpy.ndarray` with :class:`Timestamp` objects, each
-   with the correct ``tz``
+   with the correct ``tz``.
 2. A ``datetime64[ns]`` -dtype :class:`numpy.ndarray`, where the values have
-   been converted to UTC and the timezone discarded
+   been converted to UTC and the timezone discarded.
 
-Timezones may be preserved with ``dtype=object``
+Timezones may be preserved with ``dtype=object``:
 
 .. ipython:: python
 
    ser = pd.Series(pd.date_range("2000", periods=2, tz="CET"))
    ser.to_numpy(dtype=object)
 
-Or thrown away with ``dtype='datetime64[ns]'``
+Or thrown away with ``dtype='datetime64[ns]'``:
 
 .. ipython:: python
 
@@ -2064,12 +2064,12 @@ different numeric dtypes will **NOT** be combined. The following example will gi
 
 .. ipython:: python
 
-   df1 = pd.DataFrame(np.random.randn(8, 1), columns=["A"], dtype="float32")
+   df1 = pd.DataFrame(np.random.randn(8, 1), columns=["A"], dtype="float64")
    df1
    df1.dtypes
    df2 = pd.DataFrame(
        {
-           "A": pd.Series(np.random.randn(8), dtype="float16"),
+           "A": pd.Series(np.random.randn(8), dtype="float32"),
            "B": pd.Series(np.random.randn(8)),
            "C": pd.Series(np.random.randint(0, 255, size=8), dtype="uint8"),  # [0,255] (range of uint8)
        }
 
@@ -171,6 +171,7 @@ can be improved by passing an ``np.ndarray``.
    In [4]: %%cython
       ...: cimport numpy as np
       ...: import numpy as np
+      ...: np.import_array()
       ...: cdef double f_typed(double x) except? -2:
       ...:     return x * (x - 1)
       ...: cpdef double integrate_f_typed(double a, double b, int N):
@@ -225,6 +226,7 @@ and ``wraparound`` checks can yield more performance.
       ...: cimport cython
       ...: cimport numpy as np
       ...: import numpy as np
+      ...: np.import_array()
       ...: cdef np.float64_t f_typed(np.float64_t x) except? -2:
       ...:     return x * (x - 1)
       ...: cpdef np.float64_t integrate_f_typed(np.float64_t a, np.float64_t b, np.int64_t N):
 
@@ -74,10 +74,10 @@ Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passe
 
 .. ipython:: python
 
-   df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32')
+   df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float64')
    df1
    df1.dtypes
-   df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float16'),
+   df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float32'),
                        'B': pd.Series(np.random.randn(8)),
                        'C': pd.Series(range(8), dtype='uint8')})
    df2
 
@@ -674,6 +674,7 @@ Timezones
 
 Numeric
 ^^^^^^^
+- Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`)
 - Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)
 - Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`)
 
@@ -773,6 +774,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
 - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
 - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
+- Bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`)
 - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
 - Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)
 - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
@@ -824,6 +826,7 @@ Other
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`)
 - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
+- Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
 - Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`)
 - Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`)
@@ -833,12 +836,14 @@ Other
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
 - Bug in :meth:`DataFrame.query` where using duplicate column names led to a ``TypeError``. (:issue:`59950`)
 - Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`)
+- Bug in :meth:`DataFrame.query` which raised an exception when querying integer column names using backticks. (:issue:`60494`)
 - Bug in :meth:`DataFrame.shift` where passing a ``freq`` on a DataFrame with no columns did not shift the index correctly. (:issue:`60102`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
 - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
 - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
 - Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
 - Bug in :meth:`MultiIndex.fillna` error message was referring to ``isna`` instead of ``fillna`` (:issue:`60974`)
+- Bug in :meth:`Series.describe` where median percentile was always included when the ``percentiles`` argument was passed (:issue:`60550`).
 - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
 - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
 - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`)
 
@@ -23,7 +23,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy<2
+  - numpy<3
 
   # optional dependencies
   - beautifulsoup4>=4.11.2
 
@@ -353,10 +353,9 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
         float64_t[:, ::1] result
         uint8_t[:, :] mask
         int64_t nobs = 0
-        float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy
+        float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy, val
 
     N, K = (<object>mat).shape
-
     if minp is None:
         minpv = 1
     else:
@@ -389,8 +388,15 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
                 else:
                     divisor = (nobs - 1.0) if cov else sqrt(ssqdmx * ssqdmy)
 
+                    # clip `covxy / divisor` to ensure coeff is within bounds
                     if divisor != 0:
-                        result[xi, yi] = result[yi, xi] = covxy / divisor
+                        val = covxy / divisor
+                        if not cov:
+                            if val > 1.0:
+                                val = 1.0
+                            elif val < -1.0:
+                                val = -1.0
+                        result[xi, yi] = result[yi, xi] = val
                     else:
                         result[xi, yi] = result[yi, xi] = NaN
 
 
@@ -1752,9 +1752,6 @@ cdef class _Period(PeriodMixin):
     def __cinit__(self, int64_t ordinal, BaseOffset freq):
         self.ordinal = ordinal
         self.freq = freq
-        # Note: this is more performant than PeriodDtype.from_date_offset(freq)
-        #  because from_date_offset cannot be made a cdef method (until cython
-        #  supported cdef classmethods)
         self._dtype = PeriodDtypeBase(freq._period_dtype_code, freq.n)
 
     @classmethod
@@ -1913,7 +1910,7 @@ cdef class _Period(PeriodMixin):
 
         Parameters
         ----------
-        freq : str, BaseOffset
+        freq : str, DateOffset
             The target frequency to convert the Period object to.
             If a string is provided,
             it must be a valid :ref:`period alias <timeseries.period_aliases>`.
@@ -2599,7 +2596,7 @@ cdef class _Period(PeriodMixin):
 
         Parameters
         ----------
-        freq : str, BaseOffset
+        freq : str, DateOffset
             Frequency to use for the returned period.
 
         See Also
 
@@ -36,8 +36,8 @@
                 r".*In the future `np\.long` will be defined as.*",
                 FutureWarning,
             )
-            np_long = np.long  # type: ignore[attr-defined]
-            np_ulong = np.ulong  # type: ignore[attr-defined]
+            np_long = np.long
+            np_ulong = np.ulong
     except AttributeError:
         np_long = np.int_
         np_ulong = np.uint
 
@@ -351,7 +351,7 @@ def register_dataframe_accessor(name: str) -> Callable[[TypeT], TypeT]:
 AttributeError: The series must contain integer data only.
 >>> df = pd.Series([1, 2, 3])
 >>> df.int_accessor.sum()
-6"""
+np.int64(6)"""
 
 
 @doc(_register_accessor, klass="Series", examples=_register_series_examples)
 
@@ -941,7 +941,7 @@ def argmin(self, skipna: bool = True) -> int:
         --------
         >>> arr = pd.array([3, 1, 2, 5, 4])
         >>> arr.argmin()
-        1
+        np.int64(1)
         """
         # Implementer note: You have two places to override the behavior of
         # argmin.
@@ -975,7 +975,7 @@ def argmax(self, skipna: bool = True) -> int:
         --------
         >>> arr = pd.array([3, 1, 2, 5, 4])
         >>> arr.argmax()
-        3
+        np.int64(3)
         """
         # Implementer note: You have two places to override the behavior of
         # argmax.
@@ -1959,10 +1959,10 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
         --------
         >>> class MyExtensionArray(pd.arrays.NumpyExtensionArray):
         ...     def _formatter(self, boxed=False):
-        ...         return lambda x: "*" + str(x) + "*" if boxed else repr(x) + "*"
+        ...         return lambda x: "*" + str(x) + "*"
         >>> MyExtensionArray(np.array([1, 2, 3, 4]))
         <MyExtensionArray>
-        [1*, 2*, 3*, 4*]
+        [*1*, *2*, *3*, *4*]
         Length: 4, dtype: int64
         """
         if boxed:
@@ -2176,15 +2176,15 @@ def _reduce(
         Examples
         --------
         >>> pd.array([1, 2, 3])._reduce("min")
-        1
+        np.int64(1)
         >>> pd.array([1, 2, 3])._reduce("max")
-        3
+        np.int64(3)
         >>> pd.array([1, 2, 3])._reduce("sum")
-        6
+        np.int64(6)
         >>> pd.array([1, 2, 3])._reduce("mean")
-        2.0
+        np.float64(2.0)
         >>> pd.array([1, 2, 3])._reduce("median")
-        2.0
+        np.float64(2.0)
         """
         meth = getattr(self, name, None)
         if meth is None:
 
@@ -275,7 +275,7 @@ def _unbox_scalar(
         --------
         >>> arr = pd.array(np.array(["1970-01-01"], "datetime64[ns]"))
         >>> arr._unbox_scalar(arr[0])
-        numpy.datetime64('1970-01-01T00:00:00.000000000')
+        np.datetime64('1970-01-01T00:00:00.000000000')
         """
         raise AbstractMethodError(self)
 
 
@@ -1775,7 +1775,8 @@ def to_tuples(self, na_tuple: bool = True) -> np.ndarray:
         [(0, 1], (1, 2]]
         Length: 2, dtype: interval[int64, right]
         >>> idx.to_tuples()
-        array([(0, 1), (1, 2)], dtype=object)
+        array([(np.int64(0), np.int64(1)), (np.int64(1), np.int64(2))],
+              dtype=object)
 
         For :class:`pandas.IntervalIndex`: