diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 328c7566d8e8d..90c2638be5eec 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -393,32 +393,29 @@ def maybe_promote(dtype, fill_value=np.nan): elif is_float(fill_value): if issubclass(dtype.type, np.bool_): - dtype = np.object_ + dtype = np.dtype(np.object_) + elif issubclass(dtype.type, np.integer): dtype = np.dtype(np.float64) - if not isna(fill_value): - fill_value = dtype.type(fill_value) elif dtype.kind == "f": - if not np.can_cast(fill_value, dtype): - # e.g. dtype is float32, need float64 - dtype = np.min_scalar_type(fill_value) + mst = np.min_scalar_type(fill_value) + if mst > dtype: + # e.g. mst is np.float64 and dtype is np.float32 + dtype = mst elif dtype.kind == "c": mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) - if dtype.kind == "c" and not np.isnan(fill_value): - fill_value = dtype.type(fill_value) - elif is_bool(fill_value): if not issubclass(dtype.type, np.bool_): - dtype = np.object_ - else: - fill_value = np.bool_(fill_value) + dtype = np.dtype(np.object_) + elif is_integer(fill_value): if issubclass(dtype.type, np.bool_): dtype = np.dtype(np.object_) + elif issubclass(dtype.type, np.integer): if not np.can_cast(fill_value, dtype): # upcast to prevent overflow @@ -428,35 +425,20 @@ def maybe_promote(dtype, fill_value=np.nan): # Case where we disagree with numpy dtype = np.dtype(np.object_) - fill_value = dtype.type(fill_value) - - elif issubclass(dtype.type, np.floating): - # check if we can cast - if _check_lossless_cast(fill_value, dtype): - fill_value = dtype.type(fill_value) - - if dtype.kind in ["c", "f"]: - # e.g. if dtype is complex128 and fill_value is 1, we - # want np.complex128(1) - fill_value = dtype.type(fill_value) - elif is_complex(fill_value): if issubclass(dtype.type, np.bool_): dtype = np.dtype(np.object_) + elif issubclass(dtype.type, (np.integer, np.floating)): mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) elif dtype.kind == "c": mst = np.min_scalar_type(fill_value) - if mst > dtype and mst.kind == "c": + if mst > dtype: # e.g. mst is np.complex128 and dtype is np.complex64 dtype = mst - if dtype.kind == "c": - # make sure we have a np.complex and not python complex - fill_value = dtype.type(fill_value) - elif fill_value is None: if is_float_dtype(dtype) or is_complex_dtype(dtype): fill_value = np.nan @@ -466,37 +448,48 @@ def maybe_promote(dtype, fill_value=np.nan): elif is_datetime_or_timedelta_dtype(dtype): fill_value = dtype.type("NaT", "ns") else: - dtype = np.object_ + dtype = np.dtype(np.object_) fill_value = np.nan else: - dtype = np.object_ + dtype = np.dtype(np.object_) # in case we have a string that looked like a number if is_extension_array_dtype(dtype): pass elif issubclass(np.dtype(dtype).type, (bytes, str)): - dtype = np.object_ + dtype = np.dtype(np.object_) + fill_value = _ensure_dtype_type(fill_value, dtype) return dtype, fill_value -def _check_lossless_cast(value, dtype: np.dtype) -> bool: +def _ensure_dtype_type(value, dtype): """ - Check if we can cast the given value to the given dtype _losslesly_. + Ensure that the given value is an instance of the given dtype. + + e.g. if out dtype is np.complex64, we should have an instance of that + as opposed to a python complex object. Parameters ---------- value : object - dtype : np.dtype + dtype : np.dtype or ExtensionDtype Returns ------- - bool + object """ - casted = dtype.type(value) - if casted == value: - return True - return False + + # Start with exceptions in which we do _not_ cast to numpy types + if is_extension_array_dtype(dtype): + return value + elif dtype == np.object_: + return value + elif isna(value): + # e.g. keep np.nan rather than try to cast to np.float32(np.nan) + return value + + return dtype.type(value) def infer_dtype_from(val, pandas_dtype=False):