Skip to content

Commit 32dcf9c

Browse files
committed
Merge branch 'main' into np-array-copy-keyword
2 parents f66cd05 + 1bf86a3 commit 32dcf9c

39 files changed

+238
-49
lines changed

Diff for: doc/source/whatsnew/v3.0.0.rst

+5-2
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ Removal of prior version deprecations/changes
244244

245245
Performance improvements
246246
~~~~~~~~~~~~~~~~~~~~~~~~
247+
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
247248
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
248249
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
249250
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
@@ -252,11 +253,11 @@ Performance improvements
252253
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
253254
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
254255
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
256+
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
255257
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
256258
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
257-
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
258-
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``)
259259
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
260+
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
260261

261262
.. ---------------------------------------------------------------------------
262263
.. _whatsnew_300.bug_fixes:
@@ -265,6 +266,7 @@ Bug fixes
265266
~~~~~~~~~
266267
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
267268
- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
269+
- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
268270
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
269271

270272
Categorical
@@ -324,6 +326,7 @@ MultiIndex
324326

325327
I/O
326328
^^^
329+
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
327330
-
328331
-
329332

Diff for: pandas/_libs/src/vendored/ujson/python/objToJSON.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ typedef struct __NpyArrContext {
7474
npy_intp ndim;
7575
npy_intp index[NPY_MAXDIMS];
7676
int type_num;
77-
PyArray_GetItemFunc *getitem;
7877

7978
char **rowLabels;
8079
char **columnLabels;
@@ -405,7 +404,6 @@ static void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
405404
}
406405

407406
npyarr->array = (PyObject *)obj;
408-
npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem;
409407
npyarr->dataptr = PyArray_DATA(obj);
410408
npyarr->ndim = PyArray_NDIM(obj) - 1;
411409
npyarr->curdim = 0;
@@ -492,7 +490,7 @@ static int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
492490
((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr;
493491
((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
494492
} else {
495-
GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array);
493+
GET_TC(tc)->itemValue = PyArray_GETITEM(arrayobj, npyarr->dataptr);
496494
}
497495

498496
npyarr->dataptr += npyarr->stride;

Diff for: pandas/core/array_algos/quantile.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import numpy as np
66

7+
from pandas.compat.numpy import np_version_gt2
78
from pandas.core.dtypes.missing import (
89
isna,
910
na_value_for_dtype,
@@ -102,7 +103,8 @@ def quantile_with_mask(
102103
interpolation=interpolation,
103104
)
104105

105-
result = np.array(result, copy=False)
106+
copy_false = None if np_version_gt2 else False
107+
result = np.array(result, copy=copy_false)
106108
result = result.T
107109

108110
return result
@@ -199,11 +201,12 @@ def _nanpercentile(
199201
_nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
200202
for (val, m) in zip(list(values), list(mask))
201203
]
204+
copy_false = None if np_version_gt2 else False
202205
if values.dtype.kind == "f":
203206
# preserve itemsize
204-
result = np.array(result, dtype=values.dtype, copy=False).T
207+
result = np.array(result, dtype=values.dtype, copy=copy_false).T
205208
else:
206-
result = np.array(result, copy=False).T
209+
result = np.array(result, copy=copy_false).T
207210
if (
208211
result.dtype != values.dtype
209212
and not mask.all()

Diff for: pandas/core/arrays/arrow/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,7 @@ def __arrow_array__(self, type=None):
659659
"""Convert myself to a pyarrow ChunkedArray."""
660660
return self._pa_array
661661

662-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
662+
def __array__(self, dtype: NpDtype | None = None, copy: bool | None = None) -> np.ndarray:
663663
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
664664
return self.to_numpy(dtype=dtype)
665665

Diff for: pandas/core/arrays/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
lib,
2828
)
2929
from pandas.compat import set_function_name
30-
from pandas.compat.numpy import function as nv
30+
from pandas.compat.numpy import function as nv, np_version_gt2
3131
from pandas.errors import AbstractMethodError
3232
from pandas.util._decorators import (
3333
Appender,
@@ -710,6 +710,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
710710
return self
711711
else:
712712
return self.copy()
713+
if np_version_gt2 and not copy:
714+
copy = None
713715

714716
if isinstance(dtype, ExtensionDtype):
715717
cls = dtype.construct_array_type()

Diff for: pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1659,7 +1659,7 @@ def _validate_codes_for_dtype(cls, codes, *, dtype: CategoricalDtype) -> np.ndar
16591659
# -------------------------------------------------------------
16601660

16611661
@ravel_compat
1662-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
1662+
def __array__(self, dtype: NpDtype | None = None, copy: bool | None = None) -> np.ndarray:
16631663
"""
16641664
The numpy array interface.
16651665

Diff for: pandas/core/arrays/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def _formatter(self, boxed: bool = False) -> Callable[[object], str]:
353353
# ----------------------------------------------------------------
354354
# Array-Like / EA-Interface Methods
355355

356-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
356+
def __array__(self, dtype: NpDtype | None = None, copy: bool | None = None) -> np.ndarray:
357357
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
358358
if is_object_dtype(dtype):
359359
return np.array(list(self), dtype=object)

Diff for: pandas/core/arrays/datetimes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -649,12 +649,12 @@ def _resolution_obj(self) -> Resolution:
649649
# ----------------------------------------------------------------
650650
# Array-Like / EA-Interface Methods
651651

652-
def __array__(self, dtype=None) -> np.ndarray:
652+
def __array__(self, dtype=None, copy=None) -> np.ndarray:
653653
if dtype is None and self.tz:
654654
# The default for tz-aware is object, to preserve tz info
655655
dtype = object
656656

657-
return super().__array__(dtype=dtype)
657+
return super().__array__(dtype=dtype, copy=copy)
658658

659659
def __iter__(self) -> Iterator:
660660
"""

Diff for: pandas/core/arrays/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1564,7 +1564,7 @@ def is_non_overlapping_monotonic(self) -> bool:
15641564
# ---------------------------------------------------------------------
15651565
# Conversion
15661566

1567-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
1567+
def __array__(self, dtype: NpDtype | None = None, copy: bool | None = None) -> np.ndarray:
15681568
"""
15691569
Return the IntervalArray's data as a numpy array of Interval
15701570
objects (with dtype='object')

Diff for: pandas/core/arrays/masked.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
594594

595595
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us
596596

597-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
597+
def __array__(self, dtype: NpDtype | None = None, copy: bool | None = None) -> np.ndarray:
598598
"""
599599
the array interface, return my values
600600
We return an object array here to preserve our scalar values

Diff for: pandas/core/arrays/numeric.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
)
1616
from pandas.errors import AbstractMethodError
1717
from pandas.util._decorators import cache_readonly
18+
from pandas.compat.numpy import np_version_gt2
1819

1920
from pandas.core.dtypes.common import (
2021
is_integer_dtype,
@@ -137,6 +138,12 @@ def _coerce_to_data_and_mask(
137138
values, dtype, copy: bool, dtype_cls: type[NumericDtype], default_dtype: np.dtype
138139
):
139140
checker = dtype_cls._checker
141+
if np_version_gt2:
142+
copy_false = None
143+
if not copy:
144+
copy = None
145+
else:
146+
copy_false = False
140147

141148
mask = None
142149
inferred_type = None
@@ -208,9 +215,9 @@ def _coerce_to_data_and_mask(
208215
inferred_type not in ["floating", "mixed-integer-float"]
209216
and not mask.any()
210217
):
211-
values = np.array(original, dtype=dtype, copy=False)
218+
values = np.array(original, dtype=dtype, copy=copy_false)
212219
else:
213-
values = np.array(original, dtype="object", copy=False)
220+
values = np.array(original, dtype="object", copy=copy_false)
214221

215222
# we copy as need to coerce here
216223
if mask.any():

Diff for: pandas/core/arrays/numpy_.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def dtype(self) -> NumpyEADtype:
150150
# ------------------------------------------------------------------------
151151
# NumPy Array Interface
152152

153-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
153+
def __array__(self, dtype: NpDtype | None = None, copy: bool | None = None) -> np.ndarray:
154154
return np.asarray(self._ndarray, dtype=dtype)
155155

156156
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

Diff for: pandas/core/arrays/period.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
algos as libalgos,
2020
lib,
2121
)
22+
from pandas.compat.numpy import np_version_gt2
2223
from pandas._libs.arrays import NDArrayBacked
2324
from pandas._libs.tslibs import (
2425
BaseOffset,
@@ -243,6 +244,9 @@ def __init__(
243244
if not isinstance(dtype, PeriodDtype):
244245
raise ValueError(f"Invalid dtype {dtype} for PeriodArray")
245246

247+
if np_version_gt2 and not copy:
248+
copy = None
249+
246250
if isinstance(values, ABCSeries):
247251
values = values._values
248252
if not isinstance(values, type(self)):
@@ -400,7 +404,7 @@ def freq(self) -> BaseOffset:
400404
def freqstr(self) -> str:
401405
return PeriodDtype(self.freq)._freqstr
402406

403-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
407+
def __array__(self, dtype: NpDtype | None = None, copy: bool | None = None) -> np.ndarray:
404408
if dtype == "i8":
405409
return self.asi8
406410
elif dtype == bool:

Diff for: pandas/core/arrays/sparse/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
554554

555555
return cls._simple_new(arr, index, dtype)
556556

557-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
557+
def __array__(self, dtype: NpDtype | None = None, copy: bool | None = None) -> np.ndarray:
558558
fill_value = self.fill_value
559559

560560
if self.sp_index.ngaps == 0:

Diff for: pandas/core/arrays/timedeltas.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
lib,
1414
tslibs,
1515
)
16+
from pandas.compat.numpy import np_version_gt2
1617
from pandas._libs.tslibs import (
1718
NaT,
1819
NaTType,
@@ -1072,6 +1073,9 @@ def sequence_to_td64ns(
10721073
# This includes datetime64-dtype, see GH#23539, GH#29794
10731074
raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]")
10741075

1076+
copy_false = None if np_version_gt2 else False
1077+
if not copy:
1078+
copy = copy_false
10751079
data = np.array(data, copy=copy)
10761080

10771081
assert data.dtype.kind == "m"
@@ -1152,7 +1156,8 @@ def _objects_to_td64ns(
11521156
higher level.
11531157
"""
11541158
# coerce Index to np.ndarray, converting string-dtype if necessary
1155-
values = np.array(data, dtype=np.object_, copy=False)
1159+
copy_false = None if np_version_gt2 else False
1160+
values = np.array(data, dtype=np.object_, copy=copy_false)
11561161

11571162
result = array_to_timedelta64(values, unit=unit, errors=errors)
11581163
return result.view("timedelta64[ns]")

Diff for: pandas/core/construction.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import numpy as np
2020
from numpy import ma
2121

22+
from pandas.compat.numpy import np_version_gt2
2223
from pandas._config import using_pyarrow_string_dtype
2324

2425
from pandas._libs import lib
@@ -34,6 +35,7 @@
3435
DtypeObj,
3536
T,
3637
)
38+
from pandas.compat.numpy import np_version_gt2
3739
from pandas.util._exceptions import find_stack_level
3840

3941
from pandas.core.dtypes.base import ExtensionDtype
@@ -626,6 +628,8 @@ def sanitize_array(
626628

627629
elif hasattr(data, "__array__"):
628630
# e.g. dask array GH#38645
631+
if np_version_gt2 and not copy:
632+
copy = None
629633
data = np.array(data, copy=copy)
630634
return sanitize_array(
631635
data,
@@ -735,6 +739,9 @@ def _sanitize_str_dtypes(
735739
"""
736740
Ensure we have a dtype that is supported by pandas.
737741
"""
742+
copy_false = None if np_version_gt2 else False
743+
if not copy:
744+
copy = copy_false
738745

739746
# This is to prevent mixed-type Series getting all casted to
740747
# NumPy string type, e.g. NaN --> '-1#IND'.
@@ -744,7 +751,7 @@ def _sanitize_str_dtypes(
744751
# GH#19853: If data is a scalar, result has already the result
745752
if not lib.is_scalar(data):
746753
if not np.all(isna(data)):
747-
data = np.array(data, dtype=dtype, copy=False)
754+
data = np.array(data, dtype=dtype, copy=copy_false)
748755
result = np.array(data, dtype=object, copy=copy)
749756
return result
750757

@@ -781,6 +788,8 @@ def _try_cast(
781788
np.ndarray or ExtensionArray
782789
"""
783790
is_ndarray = isinstance(arr, np.ndarray)
791+
if np_version_gt2 and not copy:
792+
copy = None
784793

785794
if dtype == object:
786795
if not is_ndarray:

Diff for: pandas/core/dtypes/cast.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1491,6 +1491,9 @@ def construct_2d_arraylike_from_scalar(
14911491
value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool
14921492
) -> np.ndarray:
14931493
shape = (length, width)
1494+
copy_false = None if np_version_gt2 else False
1495+
if not copy:
1496+
copy = copy_false
14941497

14951498
if dtype.kind in "mM":
14961499
value = _maybe_box_and_unbox_datetimelike(value, dtype)
@@ -1652,7 +1655,8 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
16521655
"out-of-bound Python int",
16531656
DeprecationWarning,
16541657
)
1655-
casted = np.array(arr, dtype=dtype, copy=False)
1658+
copy_false = None if np_version_gt2 else False
1659+
casted = np.array(arr, dtype=dtype, copy=copy_false)
16561660
else:
16571661
with warnings.catch_warnings():
16581662
warnings.filterwarnings("ignore", category=RuntimeWarning)

Diff for: pandas/core/dtypes/missing.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import numpy as np
1414

15+
from pandas.compat.numpy import np_version_gt2
1516
from pandas._libs import lib
1617
import pandas._libs.missing as libmissing
1718
from pandas._libs.tslibs import (
@@ -564,7 +565,8 @@ def infer_fill_value(val):
564565
"""
565566
if not is_list_like(val):
566567
val = [val]
567-
val = np.array(val, copy=False)
568+
copy_false = None if np_version_gt2 else False
569+
val = np.array(val, copy=copy_false)
568570
if val.dtype.kind in "mM":
569571
return np.array("NaT", dtype=val.dtype)
570572
elif val.dtype == object:

0 commit comments

Comments
 (0)