diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a6b6d704737bd..3d55647236c3f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -50,6 +50,20 @@ Backwards incompatible API changes - Accessing a non-existent attribute on a closed :class:`HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) +.. _whatsnew_0210.dtype_conversions: + +Dtype Conversions +^^^^^^^^^^^^^^^^^ + +Example about setitem / where with bools. + + + +- Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) +- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) + + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 21680fb0b3921..975c84b79ed0b 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -19,6 +19,7 @@ cimport tslib from hashtable cimport * from pandas._libs import tslib, algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta +from datetime import datetime, timedelta from datetime cimport (get_datetime64_value, _pydatetime_to_dts, pandas_datetimestruct) @@ -507,24 +508,37 @@ cdef class TimedeltaEngine(DatetimeEngine): return 'm8[ns]' cpdef convert_scalar(ndarray arr, object value): + # we don't turn integers + # into datetimes/timedeltas + + # we don't turn bools into int/float/complex + if arr.descr.type_num == NPY_DATETIME: if isinstance(value, np.ndarray): pass - elif isinstance(value, Timestamp): - return value.value + elif isinstance(value, datetime): + return Timestamp(value).value elif value is None or value != value: return iNaT - else: + elif util.is_string_object(value): return Timestamp(value).value + raise ValueError("cannot set a Timestamp with a non-timestamp") + elif arr.descr.type_num == NPY_TIMEDELTA: if isinstance(value, np.ndarray): pass - elif isinstance(value, Timedelta): - return value.value + elif isinstance(value, timedelta): + return Timedelta(value).value elif value is None or value != value: return iNaT - else: + elif util.is_string_object(value): return Timedelta(value).value + raise ValueError("cannot set a Timedelta with a non-timedelta") + + if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and not + issubclass(arr.dtype.type, np.bool_)): + if util.is_bool_object(value): + raise ValueError('Cannot assign bool to float/integer series') if issubclass(arr.dtype.type, (np.integer, np.bool_)): if util.is_float_object(value) and value != value: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fd61813a57c98..11208baa6d1ed 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -6,7 +6,7 @@ import warnings from pandas._libs import tslib, lib -from pandas._libs.tslib import iNaT +from pandas._libs.tslib import iNaT, Timestamp from pandas.compat import string_types, text_type, PY3 from .common import (_ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, @@ -272,7 +272,7 @@ def maybe_promote(dtype, fill_value=np.nan): else: if issubclass(dtype.type, np.datetime64): try: - fill_value = lib.Timestamp(fill_value).value + fill_value = Timestamp(fill_value).value except: # the proper thing to do here would probably be to upcast # to object (but numpy 1.6.1 doesn't do this properly) @@ -333,6 +333,23 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value +def infer_dtype_from(val, pandas_dtype=False): + """ + interpret the dtype from a scalar or array. This is a convenience + routines to infer dtype from a scalar or an array + + Parameters + ---------- + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar/array belongs to pandas extension types is inferred as + object + """ + if is_scalar(val): + return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) + return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) + + def infer_dtype_from_scalar(val, pandas_dtype=False): """ interpret the dtype from a scalar @@ -349,9 +366,9 @@ def infer_dtype_from_scalar(val, pandas_dtype=False): # a 1-element ndarray if isinstance(val, np.ndarray): + msg = "invalid ndarray passed to _infer_dtype_from_scalar" if val.ndim != 0: - raise ValueError( - "invalid ndarray passed to _infer_dtype_from_scalar") + raise ValueError(msg) dtype = val.dtype val = val.item() @@ -408,24 +425,32 @@ def infer_dtype_from_scalar(val, pandas_dtype=False): return dtype, val -def infer_dtype_from_array(arr): +def infer_dtype_from_array(arr, pandas_dtype=False): """ infer the dtype from a scalar or array Parameters ---------- arr : scalar or array + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, array belongs to pandas extension types + is inferred as object Returns ------- - tuple (numpy-compat dtype, array) + tuple (numpy-compat/pandas-compat dtype, array) Notes ----- - These infer to numpy dtypes exactly - with the exception that mixed / object dtypes + + if pandas_dtype=False. these infer to numpy dtypes + exactly with the exception that mixed / object dtypes are not coerced by stringifying or conversion + if pandas_dtype=True. datetime64tz-aware/categorical + types will retain there character. + Examples -------- >>> np.asarray([1, '1']) @@ -442,6 +467,10 @@ def infer_dtype_from_array(arr): if not is_list_like(arr): arr = [arr] + if pandas_dtype and (is_categorical_dtype(arr) or + is_datetime64tz_dtype(arr)): + return arr.dtype, arr + # don't force numpy coerce with nan's inferred = lib.infer_dtype(arr) if inferred in ['string', 'bytes', 'unicode', @@ -552,7 +581,7 @@ def conv(r, dtype): if isnull(r): pass elif dtype == _NS_DTYPE: - r = lib.Timestamp(r) + r = Timestamp(r) elif dtype == _TD_DTYPE: r = _coerce_scalar_to_timedelta_type(r) elif dtype == np.bool_: @@ -1026,3 +1055,31 @@ def find_common_type(types): return np.object return np.find_common_type(types, []) + + +def cast_scalar_to_array(shape, value, dtype=None): + """ + create np.ndarray of specified shape and dtype, filled with values + + Parameters + ---------- + shape : tuple + value : scalar value + dtype : np.dtype, optional + dtype to coerce + + Returns + ------- + ndarray of shape, filled with value, of specified / inferred dtype + + """ + + if dtype is None: + dtype, fill_value = infer_dtype_from_scalar(value) + else: + fill_value = value + + values = np.empty(shape, dtype=dtype) + values.fill(fill_value) + + return values diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index bfec1ec3ebe8c..7af9b504ec130 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -11,7 +11,8 @@ ExtensionDtype) from .generic import (ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, - ABCSparseArray, ABCSparseSeries) + ABCSparseArray, ABCSparseSeries, + ABCIndexClass) from .inference import is_string_like from .inference import * # noqa @@ -1535,11 +1536,22 @@ def is_bool_dtype(arr_or_dtype): if arr_or_dtype is None: return False + try: tipo = _get_dtype_type(arr_or_dtype) except ValueError: # this isn't even a dtype return False + + if isinstance(arr_or_dtype, ABCIndexClass): + + # TODO(jreback) + # we don't have a boolean Index class + # so its object, we need to infer to + # guess this + return (arr_or_dtype.is_object and + arr_or_dtype.inferred_type == 'boolean') + return issubclass(tipo, np.bool_) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 743d623ee5e44..ee2db84513f06 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -25,7 +25,7 @@ import numpy.ma as ma from pandas.core.dtypes.cast import ( - maybe_upcast, infer_dtype_from_scalar, + maybe_upcast, maybe_cast_to_datetime, maybe_infer_to_datetimelike, maybe_convert_platform, @@ -33,6 +33,7 @@ invalidate_string_dtypes, coerce_to_dtypes, maybe_upcast_putmask, + cast_scalar_to_array, find_common_type) from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -59,6 +60,7 @@ is_named_tuple) from pandas.core.dtypes.missing import isnull, notnull + from pandas.core.common import (_try_sort, _default_index, _values_from_object, @@ -355,15 +357,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, raise_with_traceback(exc) if arr.ndim == 0 and index is not None and columns is not None: - if isinstance(data, compat.string_types) and dtype is None: - dtype = np.object_ - if dtype is None: - dtype, data = infer_dtype_from_scalar(data) - - values = np.empty((len(index), len(columns)), dtype=dtype) - values.fill(data) - mgr = self._init_ndarray(values, index, columns, dtype=dtype, - copy=False) + values = cast_scalar_to_array((len(index), len(columns)), + data, dtype=dtype) + mgr = self._init_ndarray(values, index, columns, + dtype=values.dtype, copy=False) else: raise ValueError('DataFrame constructor not properly called!') @@ -477,7 +474,7 @@ def _get_axes(N, K, index=index, columns=columns): values = _prep_ndarray(values, copy=copy) if dtype is not None: - if values.dtype != dtype: + if not is_dtype_equal(values.dtype, dtype): try: values = values.astype(dtype) except Exception as orig: @@ -2653,9 +2650,8 @@ def reindexer(value): else: # upcast the scalar - dtype, value = infer_dtype_from_scalar(value) - value = np.repeat(value, len(self.index)).astype(dtype) - value = maybe_cast_to_datetime(value, dtype) + value = cast_scalar_to_array(len(self.index), value) + value = maybe_cast_to_datetime(value, value.dtype) # return internal types directly if is_extension_type(value): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e541f1532d0a0..cf66487b77020 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -13,7 +13,6 @@ from pandas.core.dtypes.common import ( _ensure_int64, _ensure_object, - needs_i8_conversion, is_scalar, is_number, is_integer, is_bool, @@ -5301,48 +5300,6 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, raise NotImplemented("cannot align with a higher dimensional " "NDFrame") - elif is_list_like(other): - - if self.ndim == 1: - - # try to set the same dtype as ourselves - try: - new_other = np.array(other, dtype=self.dtype) - except ValueError: - new_other = np.array(other) - except TypeError: - new_other = other - - # we can end up comparing integers and m8[ns] - # which is a numpy no no - is_i8 = needs_i8_conversion(self.dtype) - if is_i8: - matches = False - else: - matches = (new_other == np.array(other)) - - if matches is False or not matches.all(): - - # coerce other to a common dtype if we can - if needs_i8_conversion(self.dtype): - try: - other = np.array(other, dtype=self.dtype) - except: - other = np.array(other) - else: - other = np.asarray(other) - other = np.asarray(other, - dtype=np.common_type(other, - new_other)) - - # we need to use the new dtype - try_quick = False - else: - other = new_other - else: - - other = np.array(other) - if isinstance(other, np.ndarray): if other.shape != self.shape: @@ -5407,7 +5364,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, # reconstruct the block manager self._check_inplace_setting(other) - new_data = self._data.putmask(mask=cond, new=other, align=align, + new_data = self._data.putmask(mask=cond, other=other, align=align, inplace=True, axis=block_axis, transpose=self._AXIS_REVERSED) self._update_inplace(new_data) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2af4f112ca941..759501c604cc4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -22,6 +22,7 @@ _ensure_platform_int, is_integer, is_float, + is_bool, is_dtype_equal, is_object_dtype, is_categorical_dtype, @@ -608,11 +609,21 @@ def repeat(self, repeats, *args, **kwargs): @Appender(_index_shared_docs['where']) def where(self, cond, other=None): + if other is None: other = self._na_value - values = np.where(cond, self.values, other) dtype = self.dtype + values = self.values + + if is_bool(other) or is_bool_dtype(other): + + # bools force casting + values = values.astype(object) + dtype = None + + values = np.where(cond, values, other) + if self._is_numeric_dtype and np.any(isnull(values)): # We can't coerce to the numeric dtype of "self" (unless # it's float) if there are NaN values in our output. @@ -1040,6 +1051,7 @@ def _convert_can_do_setop(self, other): def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ + return value def _assert_can_do_op(self, value): @@ -3615,6 +3627,7 @@ def fillna(self, value=None, downcast=None): # no need to care metadata other than name # because it can't have freq if return Index(result, name=self.name) + return self._shallow_copy() _index_shared_docs['dropna'] = """ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index bdae0ac7ac5e9..ca6ff06534824 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -4,7 +4,9 @@ from pandas.core.dtypes.common import ( is_dtype_equal, pandas_dtype, is_float_dtype, is_object_dtype, - is_integer_dtype, is_scalar) + is_bool_dtype, + is_integer_dtype, is_scalar, + is_bool) from pandas.core.common import _asarray_tuplesafe, _values_from_object from pandas import compat @@ -63,6 +65,16 @@ def _convert_tolerance(self, tolerance): raise ValueError('tolerance argument for %s must be numeric: %r' % (type(self).__name__, tolerance)) + def _convert_for_op(self, value): + """ Convert value to be insertable to ndarray """ + + if is_bool(value) or is_bool_dtype(value): + # force conversion to object + # so we don't lose the bools + raise TypeError + + return value + @classmethod def _assert_safe_casting(cls, data, subarr): """ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 15851a17274ca..6076ca4722b16 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -14,9 +14,11 @@ CategoricalDtype) from pandas.core.dtypes.common import ( _TD_DTYPE, _NS_DTYPE, - _ensure_int64, _ensure_platform_int, + _ensure_int64, + _ensure_platform_int, is_integer, is_dtype_equal, + is_bool_dtype, is_timedelta64_dtype, is_datetime64_dtype, is_datetimetz, is_sparse, is_categorical, is_categorical_dtype, @@ -33,17 +35,16 @@ _get_dtype) from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, - maybe_convert_string_to_object, maybe_upcast, - maybe_convert_scalar, maybe_promote, + maybe_promote, infer_dtype_from_scalar, + infer_dtype_from, soft_convert_objects, maybe_convert_objects, astype_nansafe, find_common_type) from pandas.core.dtypes.missing import ( - isnull, array_equivalent, - _is_na_compat, + isnull, notnull, array_equivalent, is_null_datelike_scalar) import pandas.core.dtypes.concat as _concat @@ -374,7 +375,6 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, else: return self.copy() - original_value = value mask = isnull(self.values) if limit is not None: if not is_integer(limit): @@ -387,24 +387,10 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, mask[mask.cumsum(self.ndim - 1) > limit] = False # fillna, but if we cannot coerce, then try again as an ObjectBlock - try: - values, _, value, _ = self._try_coerce_args(self.values, value) - blocks = self.putmask(mask, value, inplace=inplace) - blocks = [b.make_block(values=self._try_coerce_result(b.values)) - for b in blocks] - return self._maybe_downcast(blocks, downcast) - except (TypeError, ValueError): - - # we can't process the value, but nothing to do - if not mask.any(): - return self if inplace else self.copy() - - # we cannot coerce the underlying object, so - # make an ObjectBlock - return self.to_object_block(mgr=mgr).fillna(original_value, - limit=limit, - inplace=inplace, - downcast=False) + blocks = self.putmask(mask, value, inplace=inplace, mgr=mgr) + blocks = [b.make_block(values=self._try_coerce_result(b.values)) + for b in blocks] + return self._maybe_downcast(blocks, downcast) def _maybe_downcast(self, blocks, downcast=None): @@ -548,9 +534,6 @@ def convert(self, copy=True, **kwargs): def _can_hold_element(self, value): raise NotImplementedError() - def _try_cast(self, value): - raise NotImplementedError() - def _try_cast_result(self, result, dtype=None): """ try to cast the result to our original type, we may have roundtripped thru object in the mean-time @@ -590,6 +573,14 @@ def _try_operate(self, values): def _try_coerce_args(self, values, other): """ provide coercion to our input arguments """ + + if np.any(notnull(other)) and not self._can_hold_element(other): + # coercion issues + # let higher levels handle + raise TypeError("cannot convert {} to an {}".format( + type(other).__name__, + type(self).__name__.lower().replace('Block', ''))) + return values, False, other, False def _try_coerce_result(self, result): @@ -650,18 +641,15 @@ def replace(self, to_replace, value, inplace=False, filter=None, filtered_out = ~self.mgr_locs.isin(filter) mask[filtered_out.nonzero()[0]] = False - blocks = self.putmask(mask, value, inplace=inplace) + blocks = self.putmask(mask, value, inplace=inplace, mgr=mgr) if convert: blocks = [b.convert(by_item=True, numeric=False, copy=not inplace) for b in blocks] return blocks except (TypeError, ValueError): - # we can't process the value, but nothing to do - if not mask.any(): - return self if inplace else self.copy() - - return self.to_object_block(mgr=mgr).replace( + block = self.to_object_block(mgr) + return block.replace( to_replace=original_to_replace, value=value, inplace=inplace, filter=filter, regex=regex, convert=convert) @@ -676,6 +664,7 @@ def setitem(self, indexer, value, mgr=None): indexer is a direct slice/positional indexer; value must be a compatible shape """ + orig_value = value # coerce None values, if appropriate if value is None: @@ -683,13 +672,20 @@ def setitem(self, indexer, value, mgr=None): value = np.nan # coerce args - values, _, value, _ = self._try_coerce_args(self.values, value) - arr_value = np.array(value) + try: + values, _, value, _ = self._try_coerce_args(self.values, value) + arr_value = np.array(value) + except (ValueError, TypeError): + + # coercion has failed to the current type + # upcast to something that can hold it + block = self.coerce_to_target_dtype(value) + return block.setitem(indexer, orig_value, mgr=mgr) # cast the values to a type that can hold nan (if necessary) - if not self._can_hold_element(value): - dtype, _ = maybe_promote(arr_value.dtype) - values = values.astype(dtype) + if not self._can_hold_element(orig_value): + block = self.coerce_to_target_dtype(value) + return block.setitem(indexer, orig_value, mgr=mgr) transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) values = transf(values) @@ -757,6 +753,8 @@ def _is_empty_indexer(indexer): else: values[indexer] = value + # TODO: replace with coerce_to_target_dtype + ##### # coerce and try to infer the dtypes of the result if hasattr(value, 'dtype') and is_dtype_equal(values.dtype, value.dtype): @@ -795,17 +793,19 @@ def _is_empty_indexer(indexer): return [self] - def putmask(self, mask, new, align=True, inplace=False, axis=0, + def putmask(self, mask, other, align=True, inplace=False, axis=0, transpose=False, mgr=None): - """ putmask the data to the block; it is possible that we may create a - new dtype of block + """ putmask the data to the block; we may create 1 or more + split blocks, with different dtypes return the resulting block(s) + this will NOT raise to the outside context! + Parameters ---------- mask : the condition to respect - new : a ndarray/object + other : a ndarray/object align : boolean, perform alignment on other/cond, default is True inplace : perform inplace modification, default is False axis : int @@ -816,49 +816,78 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, ------- a list of new blocks, the result of the putmask """ + new_values = self.values + orig_other = other - new_values = self.values if inplace else self.values.copy() - - if hasattr(new, 'reindex_axis'): - new = new.values + if hasattr(other, 'reindex_axis'): + other = other.values if hasattr(mask, 'reindex_axis'): mask = mask.values # if we are passed a scalar None, convert it here - if not is_list_like(new) and isnull(new) and not self.is_object: - new = self.fill_value + if is_scalar(other) and isnull(other) and not self.is_object: + other = self.fill_value + + # we will raise with an incompt type here + try: + _, _, other, _ = self._try_coerce_args(new_values, other) + except (ValueError, TypeError): + # coercion has failed to the current type + # upcast to object; if we are a single column + # already, convert to object + pass + + if self._can_hold_element(orig_other): + # we may have converted the other + # at this point + + new_values = self.values if inplace else self.values.copy() - if self._can_hold_element(new): if transpose: new_values = new_values.T - new = self._try_cast(new) - # If the default repeat behavior in np.putmask would go in the # wrong direction, then explictly repeat and reshape new instead - if getattr(new, 'ndim', 0) >= 1: - if self.ndim - 1 == new.ndim and axis == 1: - new = np.repeat( - new, new_values.shape[-1]).reshape(self.shape) - new = new.astype(new_values.dtype) + if getattr(other, 'ndim', 0) >= 1: + if self.ndim - 1 == other.ndim and axis == 1: + other = np.repeat( + other, new_values.shape[-1]).reshape(self.shape) + other = other.astype(new_values.dtype) + + # we require exact matches between the len of the + # values we are setting (or is compat). np.putmask + # doesn't check this and will simply truncate / pad + # the output, but we want sane error messages + # + # TODO: this prob needs some better checking + # for 2D cases + if ((is_list_like(other) and + np.any(mask[mask]) and + getattr(other, 'ndim', 1) == 1)): - np.putmask(new_values, mask, new) + if not (mask.shape[-1] == len(other) or + mask[mask].shape[-1] == len(other) or + len(other) == 1): + raise ValueError("cannot assign mismatch " + "length to masked array") + + np.putmask(new_values, mask, other) # maybe upcast me elif mask.any(): if transpose: mask = mask.T - if isinstance(new, np.ndarray): - new = new.T + if isinstance(other, np.ndarray): + other = other.T axis = new_values.ndim - axis - 1 # Pseudo-broadcast - if getattr(new, 'ndim', 0) >= 1: - if self.ndim - 1 == new.ndim: - new_shape = list(new.shape) + if getattr(other, 'ndim', 0) >= 1: + if self.ndim - 1 == other.ndim: + new_shape = list(other.shape) new_shape.insert(axis, 1) - new = new.reshape(tuple(new_shape)) + other = other.reshape(tuple(new_shape)) # need to go column by column new_blocks = [] @@ -867,33 +896,30 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, m = mask[i] v = new_values[i] - # need a new block if m.any(): - if isinstance(new, np.ndarray): - n = np.squeeze(new[i % new.shape[0]]) - else: - n = np.array(new) - - # type of the new block - dtype, _ = maybe_promote(n.dtype) + # need a new block + block = make_block( + _block_shape(v, ndim=self.ndim), + placement=[ref_loc]) + block = block.putmask_a_column(m, orig_other, + inplace=inplace) - # we need to explicitly astype here to make a copy - n = n.astype(dtype) - - nv = _putmask_smart(v, m, n) else: nv = v if inplace else v.copy() + nv = nv[np.newaxis] - # Put back the dimension that was taken from it and make - # a block out of the result. - block = self.make_block(values=nv[np.newaxis], - placement=[ref_loc], fastpath=True) + # Put back the dimension that was taken + # from it and make + # a block out of the result. + block = self.make_block( + values=nv, placement=[ref_loc], fastpath=True) new_blocks.append(block) else: - nv = _putmask_smart(new_values, mask, new) - new_blocks.append(self.make_block(values=nv, fastpath=True)) + + b = self.putmask_a_column(mask, orig_other, inplace=inplace) + new_blocks.append(b) return new_blocks @@ -905,6 +931,79 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, return [self.make_block(new_values, fastpath=True)] + def putmask_a_column(self, mask, other, inplace=False): + """ + a helper routine that will putmask on a single column + return a block with a potentially new dtype + + Parameters + ---------- + mask : boolean mask, same shape as self + other : scalar or shape compat with self + inplace : boolean, default False + operate in-place + + Returns + ------- + Block + + """ + + try: + _, _, new, _ = self._try_coerce_args(self.values, other) + + if not inplace: + self = self.copy() + np.putmask(self.values, mask, new) + return self + + except (ValueError, TypeError): + pass + + self = self.coerce_to_target_dtype(other) + return self.putmask_a_column(mask=mask, other=other, + inplace=False) + + def coerce_to_target_dtype(self, other): + """ + coerce the current block to a dtype compat for other + we will return a block, possibly object, and not raise + + we can also safely try to coerce to the same dtype + and will receive the same block + """ + + # if we cannot then coerce to object + dtype, _ = infer_dtype_from(other, pandas_dtype=True) + + if is_dtype_equal(self.dtype, dtype): + return self + + if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype): + # we don't upcast to bool + return self.astype(object) + + elif self.is_datelike: + + # we don't upcast i8 + if is_integer_dtype(dtype): + return self.astype(object) + + # don't upcast timezone with different timezone or no timezone + if self.is_datetime: + mytz = getattr(self.dtype, 'tz', None) + othertz = getattr(dtype, 'tz', None) + + if str(mytz) != str(othertz): + return self.astype(object) + + try: + return self.astype(dtype) + except (ValueError, TypeError): + pass + + return self.astype(object) + def interpolate(self, method='pad', axis=0, index=None, values=None, inplace=False, limit=None, limit_direction='forward', fill_value=None, coerce=False, downcast=None, mgr=None, @@ -1135,8 +1234,17 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): transf = (lambda x: x.T) if is_transposed else (lambda x: x) # coerce/transpose the args if needed - values, values_mask, other, other_mask = self._try_coerce_args( - transf(values), other) + try: + values, values_mask, other, other_mask = self._try_coerce_args( + transf(values), other) + except (ValueError, TypeError): + + # coercion has failed to the current type + # upcast to object + block = self.to_object_block(mgr) + return block.eval(func=func, other=other, + raise_on_error=raise_on_error, + try_cast=try_cast, mgr=None) # get the result, may need to transpose the other def get_result(other): @@ -1165,19 +1273,6 @@ def get_result(other): return self._try_coerce_result(result) - # error handler if we have an issue operating with the function - def handle_error(): - - if raise_on_error: - # The 'detail' variable is defined in outer scope. - raise TypeError('Could not operate %s with block values %s' % - (repr(other), str(detail))) # noqa - else: - # return the values - result = np.empty(values.shape, dtype='O') - result.fill(np.nan) - return result - # get the result try: with np.errstate(all='ignore'): @@ -1187,8 +1282,19 @@ def handle_error(): # GH4576, so raise instead of allowing to pass through except ValueError as detail: raise + + # convert these to TypeErrors + except NotImplementedError as detail: + raise TypeError(detail) + except Exception as detail: - result = handle_error() + + if raise_on_error: + raise + + # return the values + result = np.empty(values.shape, dtype='O') + result.fill(np.nan) # technically a broadcast error in numpy can 'work' by returning a # boolean False @@ -1233,7 +1339,6 @@ def where(self, other, cond, align=True, raise_on_error=True, ------- a new block(s), the result of the func """ - values = self.values if transpose: values = values.T @@ -1254,28 +1359,37 @@ def where(self, other, cond, align=True, raise_on_error=True, raise ValueError("where must have a condition that is ndarray " "like") - other = maybe_convert_string_to_object(other) - other = maybe_convert_scalar(other) + # all True + if cond.ravel().all(): + return self.make_block(self.values) + + try: + values, _, other, _ = self._try_coerce_args(values, other) + except (ValueError, TypeError) as detail: + + # try to coerce to the other dtype + block = self.coerce_to_target_dtype(other) + return block.where(other, cond, align=align, + raise_on_error=raise_on_error, + try_cast=try_cast, axis=axis, + transpose=transpose, mgr=mgr) # our where function def func(cond, values, other): - if cond.ravel().all(): - return values - values, values_mask, other, other_mask = self._try_coerce_args( - values, other) try: - return self._try_coerce_result(expressions.where( - cond, values, other, raise_on_error=True)) + result = expressions.where( + cond, values, other, raise_on_error=True) + return self._try_coerce_result(result) except Exception as detail: + if raise_on_error: - raise TypeError('Could not operate [%s] with block values ' - '[%s]' % (repr(other), str(detail))) - else: - # return the values - result = np.empty(values.shape, dtype='float64') - result.fill(np.nan) - return result + raise + + # return the values + result = np.empty(values.shape, dtype='float64') + result.fill(np.nan) + return result # see if we can operate on the entire block, or need item-by-item # or if we are a single block (ndim == 1) @@ -1537,7 +1651,16 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, # use block's copy logic. # .values may be an Index which does shallow copy by default new_values = self.values if inplace else self.copy().values - new_values, _, new, _ = self._try_coerce_args(new_values, new) + try: + new_values, _, new, _ = self._try_coerce_args(new_values, new) + except: + + # we cannot coerce the underlying object, so + # make an ObjectBlock + block = self.to_object_block(mgr=mgr) + return block.putmask(mask=mask, other=new, align=align, + inplace=inplace, axis=axis, + transpose=transpose, mgr=mgr) if isinstance(new, np.ndarray) and len(new) == len(mask): new = new[mask] @@ -1582,16 +1705,10 @@ def _can_hold_element(self, element): tipo = element.dtype.type return (issubclass(tipo, (np.floating, np.integer)) and not issubclass(tipo, (np.datetime64, np.timedelta64))) - return (isinstance(element, (float, int, np.float_, np.int_)) and + return (isinstance(element, (float, int, np.floating, np.integer)) and not isinstance(element, (bool, np.bool_, datetime, timedelta, np.datetime64, np.timedelta64))) - def _try_cast(self, element): - try: - return float(element) - except: # pragma: no cover - return element - def to_native_types(self, slicer=None, na_rep='', float_format=None, decimal='.', quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ @@ -1635,17 +1752,14 @@ class ComplexBlock(FloatOrComplexBlock): def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) - return issubclass(element.dtype.type, - (np.floating, np.integer, np.complexfloating)) + return (issubclass( + element.dtype.type, (np.floating, + np.integer, + np.complexfloating)) and not + issubclass(element.dtype.type, np.bool_)) return (isinstance(element, (float, int, complex, np.float_, np.int_)) and - not isinstance(bool, np.bool_)) - - def _try_cast(self, element): - try: - return complex(element) - except: # pragma: no cover - return element + not isinstance(element, (bool, np.bool_))) def should_store(self, value): return issubclass(value.dtype.type, np.complexfloating) @@ -1664,12 +1778,6 @@ def _can_hold_element(self, element): not issubclass(tipo, (np.datetime64, np.timedelta64))) return is_integer(element) - def _try_cast(self, element): - try: - return int(element) - except: # pragma: no cover - return element - def should_store(self, value): return is_integer_dtype(value) and value.dtype == self.dtype @@ -1708,8 +1816,25 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): def _box_func(self): return lambda x: tslib.Timedelta(x, unit='ns') - def fillna(self, value, **kwargs): + def _can_hold_element(self, element): + """ + boolean if we can hold this element + """ + + if is_list_like(element): + + element = np.asarray(element) + return element.dtype == _TD_DTYPE + elif isnull(element): + return True + + elif isinstance(element, timedelta): + return True + + return False + + def fillna(self, value, **kwargs): # allow filling with integers to be # interpreted as seconds if not isinstance(value, np.timedelta64) and is_integer(value): @@ -1736,7 +1861,7 @@ def _try_coerce_args(self, values, other): other_mask = False if isinstance(other, bool): - raise TypeError + raise TypeError("cannot convert bool to a Timedelta") elif is_null_datelike_scalar(other): other = tslib.iNaT other_mask = True @@ -1748,14 +1873,15 @@ def _try_coerce_args(self, values, other): other = Timedelta(other).value elif isinstance(other, timedelta): other = Timedelta(other).value - elif isinstance(other, np.ndarray): - other_mask = isnull(other) + elif hasattr(other, 'dtype') and is_timedelta64_dtype(other): other = other.astype('i8', copy=False).view('i8') - else: - # scalar - other = Timedelta(other) other_mask = isnull(other) - other = other.value + else: + + # coercion issues + # let higher levels handle + raise TypeError("cannot convert {} to a Timedelta".format( + type(other).__name__)) return values, values_mask, other, other_mask @@ -1806,14 +1932,8 @@ class BoolBlock(NumericBlock): def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) - return issubclass(element.dtype.type, np.integer) - return isinstance(element, (int, bool)) - - def _try_cast(self, element): - try: - return bool(element) - except: # pragma: no cover - return element + return issubclass(element.dtype.type, np.bool_) + return isinstance(element, bool) def should_store(self, value): return issubclass(value.dtype.type, np.bool_) @@ -1949,9 +2069,6 @@ def _maybe_downcast(self, blocks, downcast=None): def _can_hold_element(self, element): return True - def _try_cast(self, element): - return element - def should_store(self, value): return not (issubclass(value.dtype.type, (np.integer, np.floating, np.complexfloating, @@ -2243,17 +2360,33 @@ def _astype(self, dtype, mgr=None, **kwargs): return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs) def _can_hold_element(self, element): + """ + boolean if we can hold this element, will raise on a + tz-aware datetime + """ + if is_list_like(element): - element = np.array(element) - return element.dtype == _NS_DTYPE or element.dtype == np.int64 - return (is_integer(element) or isinstance(element, datetime) or - isnull(element)) - def _try_cast(self, element): - try: - return int(element) - except: - return element + # we cannot hold tz-aware + # higher level to handle + if getattr(element, 'tz', None) is not None: + return False + + element = np.asarray(element) + return element.dtype == _NS_DTYPE + + elif isnull(element): + return True + + elif isinstance(element, datetime): + + # we cannot hold tz-aware + if getattr(element, 'tzinfo', None) is not None: + return False + + return True + + return False def _try_coerce_args(self, values, other): """ @@ -2277,7 +2410,7 @@ def _try_coerce_args(self, values, other): other_mask = False if isinstance(other, bool): - raise TypeError + raise TypeError("cannot convert a bool to a Datetime") elif is_null_datelike_scalar(other): other = tslib.iNaT other_mask = True @@ -2288,30 +2421,26 @@ def _try_coerce_args(self, values, other): "naive Block") other_mask = isnull(other) other = other.asm8.view('i8') - elif hasattr(other, 'dtype') and is_integer_dtype(other): + elif hasattr(other, 'dtype') and is_datetime64_dtype(other): + if is_datetime64tz_dtype(other): + raise TypeError("cannot coerce a Timestamp with a tz on a " + "naive Block") other = other.view('i8') - else: - try: - other = np.asarray(other) - other_mask = isnull(other) + other_mask = isnull(other) - other = other.astype('i8', copy=False).view('i8') - except ValueError: + else: - # coercion issues - # let higher levels handle - raise TypeError + # coercion issues + # let higher levels handle + raise TypeError("cannot convert a {} to a Datetime".format( + type(other).__name__)) return values, values_mask, other, other_mask def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): - if result.dtype.kind in ['i', 'f', 'O']: - try: - result = result.astype('M8[ns]') - except ValueError: - pass + result = _coerce_array_to_datetime(result) elif isinstance(result, (np.integer, np.float, np.datetime64)): result = self._box_func(result) return result @@ -2386,6 +2515,37 @@ def copy(self, deep=True, mgr=None): values = values.copy(deep=True) return self.make_block_same_class(values) + def _can_hold_element(self, element): + """ + boolean if we can hold this element, will raise on a + tz-aware datetime + """ + + if is_list_like(element): + + dtype = getattr(element, 'dtype', None) + tz = getattr(dtype, 'tz', None) + + # we can only hold an identical tz-aware + if str(self.values.tz) != str(tz): + return False + + element = np.asarray(element) + return element.dtype == _NS_DTYPE + + elif isnull(element): + return True + + elif isinstance(element, datetime): + + # we can only hold an identical tz-aware + if str(self.values.tz) != str(getattr(element, 'tzinfo', None)): + return False + + return True + + return False + def external_values(self): """ we internally represent the data as a DatetimeIndex, but for external compat with ndarray, export as a ndarray of Timestamps @@ -2447,7 +2607,7 @@ def _try_coerce_args(self, values, other): other_mask = isnull(other) if isinstance(other, bool): - raise TypeError + raise TypeError("cannot convert a bool to a tz-aware Datetime") elif (is_null_datelike_scalar(other) or (is_scalar(other) and isnull(other))): other = tslib.iNaT @@ -2466,21 +2626,39 @@ def _try_coerce_args(self, values, other): raise ValueError("incompatible or non tz-aware value") other_mask = isnull(other) other = other.value + elif hasattr(other, 'dtype'): + tz = getattr(other, 'tz', None) + if tz is None or str(tz) != str(self.values.tz): + raise ValueError("incompatible or non tz-aware value") + other_mask = isnull(other) + other = other.view('i8') + else: + + if is_null_datelike_scalar(other): + other_mask = True + else: + # higher level to coerce + raise TypeError( + "cannot convert a {} to a tz-aware Datetime".format( + type(other).__name__)) return values, values_mask, other, other_mask def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): - if result.dtype.kind in ['i', 'f', 'O']: - result = result.astype('M8[ns]') + result = _coerce_array_to_datetime(result) elif isinstance(result, (np.integer, np.float, np.datetime64)): result = lib.Timestamp(result, tz=self.values.tz) if isinstance(result, np.ndarray): # allow passing of > 1dim if its trivial if result.ndim > 1: result = result.reshape(np.prod(result.shape)) - result = self.values._shallow_copy(result) + + try: + result = self.values._shallow_copy(result) + except (TypeError, ValueError): + pass return result @@ -2563,6 +2741,11 @@ def sp_index(self): def kind(self): return self.values.kind + def _can_hold_element(self, element): + """ we should actually check that our dtype is compat + with the inferred type """ + return True + def _astype(self, dtype, copy=False, raise_on_error=True, values=None, klass=None, mgr=None, **kwargs): if values is None: @@ -3055,7 +3238,7 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, elif f == 'putmask': align_copy = False if kwargs.get('align', True): - align_keys = ['new', 'mask'] + align_keys = ['other', 'mask'] else: align_keys = ['mask'] elif f == 'eval': @@ -3246,16 +3429,6 @@ def comp(s): return isnull(values) return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq) - def _cast_scalar(block, scalar): - dtype, val = infer_dtype_from_scalar(scalar, pandas_dtype=True) - if not is_dtype_equal(block.dtype, dtype): - dtype = find_common_type([block.dtype, dtype]) - block = block.astype(dtype) - # use original value - val = scalar - - return block, val - masks = [comp(s) for i, s in enumerate(src_list)] result_blocks = [] @@ -3278,8 +3451,8 @@ def _cast_scalar(block, scalar): # particular block m = masks[i][b.mgr_locs.indexer] if m.any(): - b, val = _cast_scalar(b, d) - new_rb.extend(b.putmask(m, val, inplace=True)) + b = b.putmask(m, d, mgr=mgr) + new_rb.extend(b) else: new_rb.append(b) rb = new_rb @@ -4510,6 +4683,23 @@ def _interleaved_dtype(blocks): return dtype +def _coerce_array_to_datetime(result): + """ preserves the underlying array """ + + if result.dtype.kind in ['i', 'f']: + result = result.astype('M8[ns]') + elif result.dtype.kind in ['O']: + try: + # PITA + # we could have mixed naive & tz-aware + from pandas import to_datetime + result = to_datetime(result.ravel(), box=False) + except (TypeError, ValueError): + pass + + return result + + def _consolidate(blocks): """ Merge blocks having same dtype, exclude non-consolidating blocks @@ -4756,61 +4946,6 @@ def _transform_index(index, func, level=None): return Index(items, name=index.name) -def _putmask_smart(v, m, n): - """ - Return a new block, try to preserve dtype if possible. - - Parameters - ---------- - v : `values`, updated in-place (array like) - m : `mask`, applies to both sides (array like) - n : `new values` either scalar or an array like aligned with `values` - """ - # n should be the length of the mask or a scalar here - if not is_list_like(n): - n = np.array([n] * len(m)) - elif isinstance(n, np.ndarray) and n.ndim == 0: # numpy scalar - n = np.repeat(np.array(n, ndmin=1), len(m)) - - # see if we are only masking values that if putted - # will work in the current dtype - try: - nn = n[m] - - # make sure that we have a nullable type - # if we have nulls - if not _is_na_compat(v, nn[0]): - raise ValueError - - nn_at = nn.astype(v.dtype) - - # avoid invalid dtype comparisons - if not is_numeric_v_string_like(nn, nn_at): - comp = (nn == nn_at) - if is_list_like(comp) and comp.all(): - nv = v.copy() - nv[m] = nn_at - return nv - except (ValueError, IndexError, TypeError): - pass - - # change the dtype - dtype, _ = maybe_promote(n.dtype) - - if is_extension_type(v.dtype) and is_object_dtype(dtype): - nv = v.get_values(dtype) - else: - nv = v.astype(dtype) - - try: - nv[m] = n[m] - except ValueError: - idx, = np.where(np.squeeze(m)) - for mask_index, new_val in zip(idx, n[m]): - nv[mask_index] = new_val - return nv - - def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy): """ Concatenate block managers into one. diff --git a/pandas/core/panel.py b/pandas/core/panel.py index d1f5b4587059c..3b8be05bfc4a7 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -8,6 +8,7 @@ import warnings from pandas.core.dtypes.cast import ( infer_dtype_from_scalar, + cast_scalar_to_array, maybe_cast_item) from pandas.core.dtypes.common import ( is_integer, is_list_like, @@ -178,11 +179,9 @@ def _init_data(self, data, copy, dtype, **kwargs): copy = False dtype = None elif is_scalar(data) and all(x is not None for x in passed_axes): - if dtype is None: - dtype, data = infer_dtype_from_scalar(data) - values = np.empty([len(x) for x in passed_axes], dtype=dtype) - values.fill(data) - mgr = self._init_matrix(values, passed_axes, dtype=dtype, + values = cast_scalar_to_array([len(x) for x in passed_axes], + data, dtype=dtype) + mgr = self._init_matrix(values, passed_axes, dtype=values.dtype, copy=False) copy = False else: # pragma: no cover @@ -582,9 +581,7 @@ def __setitem__(self, key, value): shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) elif is_scalar(value): - dtype, value = infer_dtype_from_scalar(value) - mat = np.empty(shape[1:], dtype=dtype) - mat.fill(value) + mat = cast_scalar_to_array(shape[1:], value) else: raise TypeError('Cannot set item of type: %s' % str(type(value))) diff --git a/pandas/core/series.py b/pandas/core/series.py index 129f291e5f843..7f10899091c73 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1696,7 +1696,7 @@ def update(self, other): other = other.reindex_like(self) mask = notnull(other) - self._data = self._data.putmask(mask=mask, new=other, inplace=True) + self._data = self._data.putmask(mask=mask, other=other, inplace=True) self._maybe_update_cacher() # ---------------------------------------------------------------------- diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index 767e99d98cf29..e91a4d0317905 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -9,8 +9,9 @@ from datetime import datetime, timedelta, date import numpy as np -from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT - +from pandas import Period, Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT +from pandas.api.types import is_dtype_equal +import pandas as pd from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, maybe_convert_objects, @@ -18,7 +19,8 @@ infer_dtype_from_array, maybe_convert_string_to_object, maybe_convert_scalar, - find_common_type) + find_common_type, + cast_scalar_to_array) from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, @@ -131,29 +133,85 @@ def test_infer_dtype_from_scalar(self): dtype, val = infer_dtype_from_scalar(data) assert dtype == 'm8[ns]' - for data in [date(2000, 1, 1), - Timestamp(1, tz='US/Eastern'), 'foo']: + for tz in ['UTC', 'US/Eastern', 'Asia/Tokyo']: + dt = Timestamp(1, tz=tz) + dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=True) + assert dtype == 'datetime64[ns, {0}]'.format(tz) + assert val == dt.value + + dtype, val = infer_dtype_from_scalar(dt) + assert dtype == np.object_ + assert val == dt + + for freq in ['M', 'D']: + p = Period('2011-01-01', freq=freq) + dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True) + assert dtype == 'period[{0}]'.format(freq) + assert val == p.ordinal + + dtype, val = infer_dtype_from_scalar(p) + dtype == np.object_ + assert val == p + + # misc + for data in [date(2000, 1, 1), 'foo']: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.object_ @pytest.mark.parametrize( - "arr, expected", - [('foo', np.object_), - (b'foo', np.object_), - (1, np.int_), - (1.5, np.float_), - ([1], np.int_), - (np.array([1]), np.int_), - ([np.nan, 1, ''], np.object_), - (np.array([[1.0, 2.0]]), np.float_), - (Timestamp('20160101'), np.object_), - (np.datetime64('2016-01-01'), np.dtype(' 0.5, 'dates2'] = pd.NaT ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + for left, right in ops.items(): left_f = getattr(operator, left) right_f = getattr(operator, right) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 25cc810299678..d4f0ec547f9ef 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -98,12 +98,25 @@ def test_setitem_series_int64(self): self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64) # int + complex -> complex - exp = pd.Series([1, 1 + 1j, 3, 4]) + exp = pd.Series([1, 1 + 1j, 3, 4], dtype=np.complex128) self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # int + bool -> int - exp = pd.Series([1, 1, 3, 4]) - self._assert_setitem_series_conversion(obj, True, exp, np.int64) + # int + bool -> object + exp = pd.Series([1, True, 3, 4]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) + + def test_setitem_series_int8(self): + # integer dtype coercion (no change) + obj = pd.Series([1, 2, 3, 4], dtype=np.int8) + assert obj.dtype == np.int8 + + exp = pd.Series([1, 1, 3, 4], dtype=np.int8) + self._assert_setitem_series_conversion(obj, np.int32(1), exp, np.int8) + + # BUG: it must be Series([1, 1, 3, 4], dtype=np.int16) + exp = pd.Series([1, 0, 3, 4], dtype=np.int8) + self._assert_setitem_series_conversion(obj, np.int16(2**9), exp, + np.int8) def test_setitem_series_float64(self): obj = pd.Series([1.1, 2.2, 3.3, 4.4]) @@ -118,13 +131,13 @@ def test_setitem_series_float64(self): self._assert_setitem_series_conversion(obj, 1.1, exp, np.float64) # float + complex -> complex - exp = pd.Series([1.1, 1 + 1j, 3.3, 4.4]) + exp = pd.Series([1.1, 1 + 1j, 3.3, 4.4], dtype=np.complex128) self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # float + bool -> float - exp = pd.Series([1.1, 1.0, 3.3, 4.4]) - self._assert_setitem_series_conversion(obj, True, exp, np.float64) + # float + bool -> object + exp = pd.Series([1.1, True, 3.3, 4.4]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) def test_setitem_series_complex128(self): obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) @@ -132,7 +145,7 @@ def test_setitem_series_complex128(self): # complex + int -> complex exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, True, exp, np.complex128) + self._assert_setitem_series_conversion(obj, 1, exp, np.complex128) # complex + float -> complex exp = pd.Series([1 + 1j, 1.1, 3 + 3j, 4 + 4j]) @@ -142,9 +155,9 @@ def test_setitem_series_complex128(self): exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j]) self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, 4 + 4j]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) def test_setitem_series_bool(self): obj = pd.Series([True, False, True, False]) @@ -198,14 +211,18 @@ def test_setitem_series_datetime64(self): exp, 'datetime64[ns]') # datetime64 + int -> object - # ToDo: The result must be object exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp(1), + 1, pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self._assert_setitem_series_conversion(obj, 1, exp, 'datetime64[ns]') + self._assert_setitem_series_conversion(obj, 1, exp, np.object) - # ToDo: add more tests once the above issue has been fixed + # datetime64 + object -> object + exp = pd.Series([pd.Timestamp('2011-01-01'), + 'x', + pd.Timestamp('2011-01-03'), + pd.Timestamp('2011-01-04')]) + self._assert_setitem_series_conversion(obj, 'x', exp, np.object) def test_setitem_series_datetime64tz(self): tz = 'US/Eastern' @@ -224,20 +241,59 @@ def test_setitem_series_datetime64tz(self): self._assert_setitem_series_conversion(obj, value, exp, 'datetime64[ns, US/Eastern]') - # datetime64 + int -> object - # ToDo: The result must be object + # datetime64tz + datetime64tz (different tz) -> object exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp(1, tz=tz), + pd.Timestamp('2012-01-01', tz='US/Pacific'), pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) - self._assert_setitem_series_conversion(obj, 1, exp, - 'datetime64[ns, US/Eastern]') + value = pd.Timestamp('2012-01-01', tz='US/Pacific') + self._assert_setitem_series_conversion(obj, value, exp, np.object) + + # datetime64tz + datetime64 -> object + exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2012-01-01'), + pd.Timestamp('2011-01-03', tz=tz), + pd.Timestamp('2011-01-04', tz=tz)]) + value = pd.Timestamp('2012-01-01') + self._assert_setitem_series_conversion(obj, value, exp, np.object) - # ToDo: add more tests once the above issue has been fixed + # datetime64 + int -> object + exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), + 1, + pd.Timestamp('2011-01-03', tz=tz), + pd.Timestamp('2011-01-04', tz=tz)]) + self._assert_setitem_series_conversion(obj, 1, exp, np.object) def test_setitem_series_timedelta64(self): - pass + obj = pd.Series([pd.Timedelta('1 day'), + pd.Timedelta('2 day'), + pd.Timedelta('3 day'), + pd.Timedelta('4 day')]) + assert obj.dtype == 'timedelta64[ns]' + + # timedelta64 + timedelta64 -> timedelta64 + exp = pd.Series([pd.Timedelta('1 day'), + pd.Timedelta('12 day'), + pd.Timedelta('3 day'), + pd.Timedelta('4 day')]) + self._assert_setitem_series_conversion(obj, pd.Timedelta('12 day'), + exp, 'timedelta64[ns]') + + # timedelta64 + int -> object + exp = pd.Series([pd.Timedelta('1 day'), + 1, + pd.Timedelta('3 day'), + pd.Timedelta('4 day')]) + self._assert_setitem_series_conversion(obj, 1, exp, np.object) + + # timedelta64 + object -> object + exp = pd.Series([pd.Timedelta('1 day'), + 'x', + pd.Timedelta('3 day'), + pd.Timedelta('4 day')]) + self._assert_setitem_series_conversion(obj, 'x', exp, np.object) + @pytest.mark.xfail(reason="add some tests for me") def test_setitem_series_period(self): pass @@ -610,13 +666,13 @@ def _where_int64_common(self, klass): self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # int + bool -> int - exp = klass([1, 1, 3, 1]) - self._assert_where_conversion(obj, cond, True, exp, np.int64) + # int + bool -> object + exp = klass([1, True, 3, 1], dtype=object) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = klass([True, False, True, True]) - exp = klass([1, 0, 3, 1]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) + exp = klass([1, False, 3, True], dtype=object) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_series_int64(self): self._where_int64_common(pd.Series) @@ -656,13 +712,13 @@ def _where_float64_common(self, klass): self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # float + bool -> float - exp = klass([1.1, 1.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, True, exp, np.float64) + # float + bool -> object + exp = klass([1.1, True, 3.3, 1.0], dtype=object) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = klass([True, False, True, True]) - exp = klass([1.1, 0.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) + exp = klass([1.1, False, 3.3, True], dtype=object) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_series_float64(self): self._where_float64_common(pd.Series) @@ -699,13 +755,13 @@ def test_where_series_complex128(self): exp = pd.Series([1 + 1j, 6 + 6j, 3 + 3j, 8 + 8j]) self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, 1]) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = pd.Series([True, False, True, True]) - exp = pd.Series([1 + 1j, 0, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) + exp = pd.Series([1 + 1j, False, 3 + 3j, True]) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_index_complex128(self): pass @@ -715,29 +771,29 @@ def test_where_series_bool(self): assert obj.dtype == np.bool cond = pd.Series([True, False, True, False]) - # bool + int -> int - exp = pd.Series([1, 1, 1, 1]) - self._assert_where_conversion(obj, cond, 1, exp, np.int64) + # bool + int -> object + exp = pd.Series([True, 1, True, 1]) + self._assert_where_conversion(obj, cond, 1, exp, np.object) values = pd.Series([5, 6, 7, 8]) - exp = pd.Series([1, 6, 1, 8]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) + exp = pd.Series([True, 6, True, 8], dtype=object) + self._assert_where_conversion(obj, cond, values, exp, np.object) - # bool + float -> float - exp = pd.Series([1.0, 1.1, 1.0, 1.1]) - self._assert_where_conversion(obj, cond, 1.1, exp, np.float64) + # bool + float -> object + exp = pd.Series([True, 1.1, True, 1.1]) + self._assert_where_conversion(obj, cond, 1.1, exp, np.object) values = pd.Series([5.5, 6.6, 7.7, 8.8]) - exp = pd.Series([1.0, 6.6, 1.0, 8.8]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) + exp = pd.Series([True, 6.6, True, 8.8], dtype=object) + self._assert_where_conversion(obj, cond, values, exp, np.object) - # bool + complex -> complex - exp = pd.Series([1, 1 + 1j, 1, 1 + 1j]) - self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.complex128) + # bool + complex -> object + exp = pd.Series([True, 1 + 1j, True, 1 + 1j], dtype=object) + self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.object) values = pd.Series([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j]) - exp = pd.Series([1, 6 + 6j, 1, 8 + 8j]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) + exp = pd.Series([True, 6 + 6j, True, 8 + 8j], dtype=object) + self._assert_where_conversion(obj, cond, values, exp, np.object) # bool + bool -> bool exp = pd.Series([True, True, True, True]) @@ -776,12 +832,15 @@ def test_where_series_datetime64(self): pd.Timestamp('2012-01-04')]) self._assert_where_conversion(obj, cond, values, exp, 'datetime64[ns]') - # ToDo: coerce to object - msg = "cannot coerce a Timestamp with a tz on a naive Block" - with tm.assert_raises_regex(TypeError, msg): - obj.where(cond, pd.Timestamp('2012-01-01', tz='US/Eastern')) + # datetime64 + datetime64tz -> object + exp = pd.Series([pd.Timestamp('2011-01-01'), + pd.Timestamp('2012-01-01', tz='US/Eastern'), + pd.Timestamp('2011-01-03'), + pd.Timestamp('2012-01-01', tz='US/Eastern')]) + values = pd.Timestamp('2012-01-01', tz='US/Eastern') + self._assert_where_conversion(obj, cond, values, exp, np.object) - # ToDo: do not coerce to UTC, must be object + # TODO: do not coerce to UTC, must be object values = pd.Series([pd.Timestamp('2012-01-01', tz='US/Eastern'), pd.Timestamp('2012-01-02', tz='US/Eastern'), pd.Timestamp('2012-01-03', tz='US/Eastern'), @@ -921,9 +980,9 @@ def _fillna_float64_common(self, klass): else: NotImplementedError - # float + bool -> float - exp = klass([1.1, 1.0, 3.3, 4.4]) - self._assert_fillna_conversion(obj, True, exp, np.float64) + # float + bool -> object + exp = klass([1.1, True, 3.3, 4.4]) + self._assert_fillna_conversion(obj, True, exp, np.object) def test_fillna_series_float64(self): self._fillna_float64_common(pd.Series) @@ -947,9 +1006,9 @@ def test_fillna_series_complex128(self): exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j]) self._assert_fillna_conversion(obj, 1 + 1j, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_fillna_conversion(obj, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, 4 + 4j]) + self._assert_fillna_conversion(obj, True, exp, np.object) def test_fillna_index_complex128(self): self._fillna_float64_common(pd.Index) @@ -982,15 +1041,15 @@ def test_fillna_series_datetime64(self): pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) value = pd.Timestamp('2012-01-01', tz='US/Eastern') + self._assert_fillna_conversion(obj, value, exp, np.object) # datetime64 + int => object - # ToDo: must be coerced to object exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp(1), + 1, pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self._assert_fillna_conversion(obj, 1, exp, 'datetime64[ns]') + self._assert_fillna_conversion(obj, 1, exp, np.object) # datetime64 + object => object exp = pd.Series([pd.Timestamp('2011-01-01'), @@ -1014,6 +1073,7 @@ def test_fillna_series_datetime64tz(self): pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) value = pd.Timestamp('2012-01-01', tz=tz) + self._assert_fillna_conversion(obj, value, exp, 'datetime64[ns, US/Eastern]') @@ -1033,14 +1093,12 @@ def test_fillna_series_datetime64tz(self): value = pd.Timestamp('2012-01-01', tz='Asia/Tokyo') self._assert_fillna_conversion(obj, value, exp, np.object) - # datetime64tz + int => datetime64tz - # ToDo: must be object + # datetime64tz + int => object exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp(1, tz=tz), + 1, pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) - self._assert_fillna_conversion(obj, 1, exp, - 'datetime64[ns, US/Eastern]') + self._assert_fillna_conversion(obj, 1, exp, np.object) # datetime64tz + object => object exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index da8a896cb6f4a..a6bca819f9898 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -1,5 +1,3 @@ -import pytest - import numpy as np import pandas as pd from pandas import date_range, Index, DataFrame, Series, Timestamp @@ -12,7 +10,6 @@ def test_indexing_with_datetime_tz(self): # 8260 # support datetime64 with tz - idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), name='foo') dr = date_range('20130110', periods=3) @@ -56,10 +53,11 @@ def test_indexing_with_datetime_tz(self): 'US/Pacific') # trying to set a single element on a part of a different timezone - def f(): - df.loc[df.new_col == 'new', 'time'] = v + df2 = df.copy() + assert df2.time.dtype == 'datetime64[ns, UTC]' - pytest.raises(ValueError, f) + df2.loc[df2.new_col == 'new', 'time'] = v + assert df2.time.dtype == 'object' v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') df.loc[df.new_col == 'new', 'time'] = v diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 9fa677eb624ae..3f9b4146f1616 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -354,8 +354,15 @@ def test_multi_assign(self): tm.assert_frame_equal(df2, expected) # with an ndarray on rhs + # coerces to float64 because values has float64 dtype + # GH 14001 + expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], + 'PF': [0, 0, 0, 0, 1, 1], + 'col1': [0, 1, 4, 6, 8, 10], + 'col2': [12, 7, 16, np.nan, 20, 22]}) df2 = df.copy() df2.loc[mask, cols] = dft.loc[mask, cols].values + tm.assert_frame_equal(df2, expected) df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 18c6c9a6dd021..d8abf242186bd 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1021,11 +1021,11 @@ def test_clip_with_datetimes(self): # naive and tz-aware datetimes t = Timestamp('2015-12-01 09:30:30') - s = Series([Timestamp('2015-12-01 09:30:00'), Timestamp( - '2015-12-01 09:31:00')]) + s = Series([Timestamp('2015-12-01 09:30:00'), + Timestamp('2015-12-01 09:31:00')]) result = s.clip(upper=t) - expected = Series([Timestamp('2015-12-01 09:30:00'), Timestamp( - '2015-12-01 09:30:30')]) + expected = Series([Timestamp('2015-12-01 09:30:00'), + Timestamp('2015-12-01 09:30:30')]) assert_series_equal(result, expected) t = Timestamp('2015-12-01 09:30:30', tz='US/Eastern') diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 6ded4d593a571..0397dda6d021f 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -771,13 +771,14 @@ def test_setitem_dtypes(self): s[0] = np.nan assert_series_equal(s, expected) + # bool & float -> object s = Series([False]) s.loc[0] = np.nan - assert_series_equal(s, Series([np.nan])) + assert_series_equal(s, Series([np.nan], dtype=object)) s = Series([False, True]) s.loc[0] = np.nan - assert_series_equal(s, Series([np.nan, 1.0])) + assert_series_equal(s, Series([np.nan, True])) def test_set_value(self): idx = self.ts.index[10] @@ -1360,14 +1361,16 @@ def test_where_dups(self): expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) assert_series_equal(comb, expected) - def test_where_datetime(self): + def test_where_datetime_coerce(self): + s = Series(date_range('20130102', periods=2)) - expected = Series([10, 10], dtype='datetime64[ns]') + expected = Series([10, 10], dtype='object') mask = np.array([False, False]) rs = s.where(mask, [10, 10]) assert_series_equal(rs, expected) + # convert to object as we are passing non-datetime64 rs = s.where(mask, 10) assert_series_equal(rs, expected) @@ -1378,7 +1381,7 @@ def test_where_datetime(self): assert_series_equal(rs, expected) rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype='datetime64[ns]') + expected = Series([10, None], dtype=object) assert_series_equal(rs, expected) # GH 15701 @@ -1389,9 +1392,9 @@ def test_where_datetime(self): expected = Series([pd.NaT, s[1]]) assert_series_equal(rs, expected) - def test_where_timedelta(self): + def test_where_timedelta_coerce(self): s = Series([1, 2], dtype='timedelta64[ns]') - expected = Series([10, 10], dtype='timedelta64[ns]') + expected = Series([10, 10], dtype=object) mask = np.array([False, False]) rs = s.where(mask, [10, 10]) @@ -1407,9 +1410,38 @@ def test_where_timedelta(self): assert_series_equal(rs, expected) rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype='timedelta64[ns]') + expected = Series([10, None], dtype=object) assert_series_equal(rs, expected) + def test_where_consistency(self): + + # 16402 + # where should be consisten across various functions + s = Series([Timestamp('20130101'), pd.NaT]) + + # this is currently wrong :<, should be object + result = s.fillna(Timestamp('20130101', tz='US/Eastern')) + expected = Series([Timestamp('2012-12-31 19:00:00'), + Timestamp('2013-01-01 00:00:00')] + ).dt.tz_localize('US/Eastern') + assert_series_equal(result, expected) + + result = s.fillna('foo') + expected = Series([Timestamp('20130101'), 'foo']) + assert_series_equal(result, expected) + + s2 = s.copy() + s2[1] = 'bar' + expected = Series([Timestamp('20130101'), 'bar']) + assert_series_equal(s2, expected) + + # see 16406 for constrution bug + result = s.where([True, False], Timestamp('20130101', tz='US/Eastern')) + expected = Series([Timestamp('20130101'), + Timestamp('20130101', tz='US/Eastern')], + dtype=object) + assert_series_equal(result, expected) + def test_mask(self): # compare with tested results in test_where s = Series(np.random.randn(5)) @@ -1589,7 +1621,7 @@ def test_setitem_na(self): expected = Series([np.nan, 1, np.nan, 0]) s = Series([True, True, False, False]) s[::2] = np.nan - assert_series_equal(s, expected) + assert_series_equal(s, Series([np.nan, True, np.nan, False])) expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 8e73c17684a16..2409d0a77d54b 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -52,14 +52,14 @@ class TestSeriesMissingData(TestData): def test_timedelta_fillna(self): # GH 3371 - s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp( - '20130102'), Timestamp('20130103 9:01:01')]) + s = Series([Timestamp('20130101'), Timestamp('20130101'), + Timestamp('20130102'), Timestamp('20130103 9:01:01')]) td = s.diff() # reg fillna result = td.fillna(0) - expected = Series([timedelta(0), timedelta(0), timedelta(1), timedelta( - days=1, seconds=9 * 3600 + 60 + 1)]) + expected = Series([timedelta(0), timedelta(0), timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) # interprested as seconds @@ -69,8 +69,9 @@ def test_timedelta_fillna(self): assert_series_equal(result, expected) result = td.fillna(timedelta(days=1, seconds=1)) - expected = Series([timedelta(days=1, seconds=1), timedelta( - 0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) + expected = Series([timedelta(days=1, seconds=1), timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) result = td.fillna(np.timedelta64(int(1e9))) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 35d13a62ca083..6903b93e176de 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -101,6 +101,7 @@ def test_replace_gh5319(self): ser = pd.Series(pd.date_range('20130101', periods=5)) expected = ser.copy() expected.loc[2] = pd.Timestamp('20120101') + result = ser.replace({pd.Timestamp('20130103'): pd.Timestamp('20120101')}) tm.assert_series_equal(result, expected) @@ -133,8 +134,8 @@ def check_replace(to_rep, val, expected): tm.assert_series_equal(expected, r) tm.assert_series_equal(expected, sc) - # MUST upcast to float - e = pd.Series([0., 1., 2., 3., 4.]) + # will NOT upcast to float + e = s tr, v = [3], [3.0] check_replace(tr, v, e)