Skip to content

MAINT: Adjust the codebase to the new np.array's copy keyword meaning #57172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 5, 2024
10 changes: 7 additions & 3 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import numpy as np

from pandas.compat.numpy import np_version_gt2

from pandas.core.dtypes.missing import (
isna,
na_value_for_dtype,
Expand Down Expand Up @@ -102,7 +104,8 @@ def quantile_with_mask(
interpolation=interpolation,
)

result = np.array(result, copy=False)
copy_false = None if np_version_gt2 else False
result = np.array(result, copy=copy_false)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
copy_false = None if np_version_gt2 else False
result = np.array(result, copy=copy_false)
result = np.asarray(result)

Would this be a simpler but equivalent alternative?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like it (from a quick glance at the diff).

I just added this to the 2.0 migration guide:

  1. Code using np.array(..., copy=False) can in most cases be changed to
    np.asarray(...). Older code tended to use np.array like this because
    it had less overhead than the default np.asarray copy-if-needed
    behavior. This is no longer true, and np.asarray is the preferred function.

result = result.T

return result
Expand Down Expand Up @@ -199,11 +202,12 @@ def _nanpercentile(
_nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
for (val, m) in zip(list(values), list(mask))
]
copy_false = None if np_version_gt2 else False
if values.dtype.kind == "f":
# preserve itemsize
result = np.array(result, dtype=values.dtype, copy=False).T
result = np.array(result, dtype=values.dtype, copy=copy_false).T
else:
result = np.array(result, copy=False).T
result = np.array(result, copy=copy_false).T
if (
result.dtype != values.dtype
and not mask.all()
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,9 @@ def __arrow_array__(self, type=None):
"""Convert myself to a pyarrow ChunkedArray."""
return self._pa_array

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
return self.to_numpy(dtype=dtype)

Expand Down
7 changes: 6 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@
lib,
)
from pandas.compat import set_function_name
from pandas.compat.numpy import function as nv
from pandas.compat.numpy import (
function as nv,
np_version_gt2,
)
from pandas.errors import AbstractMethodError
from pandas.util._decorators import (
Appender,
Expand Down Expand Up @@ -710,6 +713,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
return self
else:
return self.copy()
if np_version_gt2 and not copy:
copy = None

if isinstance(dtype, ExtensionDtype):
cls = dtype.construct_array_type()
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1659,7 +1659,9 @@ def _validate_codes_for_dtype(cls, codes, *, dtype: CategoricalDtype) -> np.ndar
# -------------------------------------------------------------

@ravel_compat
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""
The numpy array interface.

Expand All @@ -1668,6 +1670,9 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
dtype : np.dtype or None
Specifies the the dtype for the array.

copy : bool or None, optional
Unused.

Returns
-------
numpy.array
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,9 @@ def _formatter(self, boxed: bool = False) -> Callable[[object], str]:
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
if is_object_dtype(dtype):
return np.array(list(self), dtype=object)
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
tzconversion,
)
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
from pandas.compat.numpy import np_version_gt2
from pandas.errors import PerformanceWarning
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_inclusive
Expand Down Expand Up @@ -649,12 +650,12 @@ def _resolution_obj(self) -> Resolution:
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype=None, copy=None) -> np.ndarray:
if dtype is None and self.tz:
# The default for tz-aware is object, to preserve tz info
dtype = object

return super().__array__(dtype=dtype)
return super().__array__(dtype=dtype, copy=copy)

def __iter__(self) -> Iterator:
"""
Expand Down Expand Up @@ -2421,7 +2422,8 @@ def objects_to_datetime64(
assert errors in ["raise", "coerce"]

# if str-dtype, convert
data = np.array(data, copy=False, dtype=np.object_)
copy_false = None if np_version_gt2 else False
data = np.array(data, dtype=np.object_, copy=copy_false)

result, tz_parsed = tslib.array_to_datetime(
data,
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1564,7 +1564,9 @@ def is_non_overlapping_monotonic(self) -> bool:
# ---------------------------------------------------------------------
# Conversion

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""
Return the IntervalArray's data as a numpy array of Interval
objects (with dtype='object')
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

__array_priority__ = 1000 # higher than ndarray so ops dispatch to us

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""
the array interface, return my values
We return an object array here to preserve our scalar values
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
lib,
missing as libmissing,
)
from pandas.compat.numpy import np_version_gt2
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly

Expand Down Expand Up @@ -137,6 +138,12 @@ def _coerce_to_data_and_mask(
values, dtype, copy: bool, dtype_cls: type[NumericDtype], default_dtype: np.dtype
):
checker = dtype_cls._checker
if np_version_gt2:
copy_false = None
if not copy:
copy = None
else:
copy_false = False

mask = None
inferred_type = None
Expand Down Expand Up @@ -208,9 +215,9 @@ def _coerce_to_data_and_mask(
inferred_type not in ["floating", "mixed-integer-float"]
and not mask.any()
):
values = np.array(original, dtype=dtype, copy=False)
values = np.array(original, dtype=dtype, copy=copy_false)
else:
values = np.array(original, dtype="object", copy=False)
values = np.array(original, dtype="object", copy=copy_false)

# we copy as need to coerce here
if mask.any():
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ def dtype(self) -> NumpyEADtype:
# ------------------------------------------------------------------------
# NumPy Array Interface

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
return np.asarray(self._ndarray, dtype=dtype)

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
get_period_field_arr,
period_asfreq_arr,
)
from pandas.compat.numpy import np_version_gt2
from pandas.util._decorators import (
cache_readonly,
doc,
Expand Down Expand Up @@ -243,6 +244,9 @@ def __init__(
if not isinstance(dtype, PeriodDtype):
raise ValueError(f"Invalid dtype {dtype} for PeriodArray")

if np_version_gt2 and not copy:
copy = None

if isinstance(values, ABCSeries):
values = values._values
if not isinstance(values, type(self)):
Expand Down Expand Up @@ -400,7 +404,9 @@ def freq(self) -> BaseOffset:
def freqstr(self) -> str:
return PeriodDtype(self.freq)._freqstr

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
if dtype == "i8":
return self.asi8
elif dtype == bool:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,9 @@ def from_spmatrix(cls, data: spmatrix) -> Self:

return cls._simple_new(arr, index, dtype)

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
fill_value = self.fill_value

if self.sp_index.ngaps == 0:
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@
parse_timedelta_unit,
truediv_object_array,
)
from pandas.compat.numpy import function as nv
from pandas.compat.numpy import (
function as nv,
np_version_gt2,
)
from pandas.util._validators import validate_endpoints

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -1072,6 +1075,9 @@ def sequence_to_td64ns(
# This includes datetime64-dtype, see GH#23539, GH#29794
raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]")

copy_false = None if np_version_gt2 else False
if not copy:
copy = copy_false
data = np.array(data, copy=copy)

assert data.dtype.kind == "m"
Expand Down Expand Up @@ -1152,7 +1158,8 @@ def _objects_to_td64ns(
higher level.
"""
# coerce Index to np.ndarray, converting string-dtype if necessary
values = np.array(data, dtype=np.object_, copy=False)
copy_false = None if np_version_gt2 else False
values = np.array(data, dtype=np.object_, copy=copy_false)

result = array_to_timedelta64(values, unit=unit, errors=errors)
return result.view("timedelta64[ns]")
Expand Down
10 changes: 9 additions & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
DtypeObj,
T,
)
from pandas.compat.numpy import np_version_gt2
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.base import ExtensionDtype
Expand Down Expand Up @@ -626,6 +627,8 @@ def sanitize_array(

elif hasattr(data, "__array__"):
# e.g. dask array GH#38645
if np_version_gt2 and not copy:
copy = None
data = np.array(data, copy=copy)
return sanitize_array(
data,
Expand Down Expand Up @@ -735,6 +738,9 @@ def _sanitize_str_dtypes(
"""
Ensure we have a dtype that is supported by pandas.
"""
copy_false = None if np_version_gt2 else False
if not copy:
copy = copy_false

# This is to prevent mixed-type Series getting all casted to
# NumPy string type, e.g. NaN --> '-1#IND'.
Expand All @@ -744,7 +750,7 @@ def _sanitize_str_dtypes(
# GH#19853: If data is a scalar, result has already the result
if not lib.is_scalar(data):
if not np.all(isna(data)):
data = np.array(data, dtype=dtype, copy=False)
data = np.array(data, dtype=dtype, copy=copy_false)
result = np.array(data, dtype=object, copy=copy)
return result

Expand Down Expand Up @@ -781,6 +787,8 @@ def _try_cast(
np.ndarray or ExtensionArray
"""
is_ndarray = isinstance(arr, np.ndarray)
if np_version_gt2 and not copy:
copy = None

if dtype == object:
if not is_ndarray:
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1491,6 +1491,9 @@ def construct_2d_arraylike_from_scalar(
value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool
) -> np.ndarray:
shape = (length, width)
copy_false = None if np_version_gt2 else False
if not copy:
copy = copy_false

if dtype.kind in "mM":
value = _maybe_box_and_unbox_datetimelike(value, dtype)
Expand Down Expand Up @@ -1652,7 +1655,8 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
"out-of-bound Python int",
DeprecationWarning,
)
casted = np.array(arr, dtype=dtype, copy=False)
copy_false = None if np_version_gt2 else False
casted = np.array(arr, dtype=dtype, copy=copy_false)
else:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning)
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
NaT,
iNaT,
)
from pandas.compat.numpy import np_version_gt2

from pandas.core.dtypes.common import (
DT64NS_DTYPE,
Expand Down Expand Up @@ -564,7 +565,8 @@ def infer_fill_value(val):
"""
if not is_list_like(val):
val = [val]
val = np.array(val, copy=False)
copy_false = None if np_version_gt2 else False
val = np.array(val, copy=copy_false)
if val.dtype.kind in "mM":
return np.array("NaT", dtype=val.dtype)
elif val.dtype == object:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1919,7 +1919,7 @@ def to_numpy(
dtype = np.dtype(dtype)
result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value)
if result.dtype is not dtype:
result = np.array(result, dtype=dtype, copy=False)
result = np.asarray(result, dtype=dtype)

return result

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1989,7 +1989,9 @@ def empty(self) -> bool:
# GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
__array_priority__: int = 1000

def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
def __array__(
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
) -> np.ndarray:
values = self._values
arr = np.asarray(values, dtype=dtype)
if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,7 @@ def __len__(self) -> int:
"""
return len(self._data)

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype=None, copy=None) -> np.ndarray:
"""
The array interface, return my values.
"""
Expand Down
Loading