Skip to content

Commit 0290308

Browse files
committed
rebase & cleanup, fixup some edge cases
closes #16402 xref to #12747
1 parent fdd19a5 commit 0290308

20 files changed

+744
-610
lines changed

Diff for: doc/source/whatsnew/v0.21.0.txt

+13-3
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,19 @@ Backwards incompatible API changes
5050
- Accessing a non-existent attribute on a closed :class:`HDFStore` will now
5151
raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`)
5252

53+
.. _whatsnew_0210.dtype_conversions:
54+
55+
Dtype Conversions
56+
^^^^^^^^^^^^^^^^^
57+
58+
Example about setitem / where with bools.
59+
60+
61+
62+
- Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`)
63+
- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`)
64+
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
65+
5366

5467
.. _whatsnew_0210.api:
5568

@@ -88,9 +101,6 @@ Bug Fixes
88101
Conversion
89102
^^^^^^^^^^
90103

91-
- Bug in assignment against datetime-like data with ``int`` may incorrectly converted to datetime-like (:issue:`14145`)
92-
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
93-
94104

95105

96106
Indexing

Diff for: pandas/_libs/index.pyx

+20-6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ cimport tslib
1919
from hashtable cimport *
2020
from pandas._libs import tslib, algos, hashtable as _hash
2121
from pandas._libs.tslib import Timestamp, Timedelta
22+
from datetime import datetime, timedelta
2223

2324
from datetime cimport (get_datetime64_value, _pydatetime_to_dts,
2425
pandas_datetimestruct)
@@ -507,24 +508,37 @@ cdef class TimedeltaEngine(DatetimeEngine):
507508
return 'm8[ns]'
508509

509510
cpdef convert_scalar(ndarray arr, object value):
511+
# we don't turn integers
512+
# into datetimes/timedeltas
513+
514+
# we don't turn bools into int/float/complex
515+
510516
if arr.descr.type_num == NPY_DATETIME:
511517
if isinstance(value, np.ndarray):
512518
pass
513-
elif isinstance(value, Timestamp):
514-
return value.value
519+
elif isinstance(value, datetime):
520+
return Timestamp(value).value
515521
elif value is None or value != value:
516522
return iNaT
517-
else:
523+
elif util.is_string_object(value):
518524
return Timestamp(value).value
525+
raise ValueError("cannot set a Timestamp with a non-timestamp")
526+
519527
elif arr.descr.type_num == NPY_TIMEDELTA:
520528
if isinstance(value, np.ndarray):
521529
pass
522-
elif isinstance(value, Timedelta):
523-
return value.value
530+
elif isinstance(value, timedelta):
531+
return Timedelta(value).value
524532
elif value is None or value != value:
525533
return iNaT
526-
else:
534+
elif util.is_string_object(value):
527535
return Timedelta(value).value
536+
raise ValueError("cannot set a Timedelta with a non-timedelta")
537+
538+
if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and not
539+
issubclass(arr.dtype.type, np.bool_)):
540+
if util.is_bool_object(value):
541+
raise ValueError('Cannot assign bool to float/integer series')
528542

529543
if issubclass(arr.dtype.type, (np.integer, np.bool_)):
530544
if util.is_float_object(value) and value != value:

Diff for: pandas/core/dtypes/cast.py

+48-7
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import warnings
77

88
from pandas._libs import tslib, lib
9-
from pandas._libs.tslib import iNaT
9+
from pandas._libs.tslib import iNaT, Timestamp
1010
from pandas.compat import string_types, text_type, PY3
1111
from .common import (_ensure_object, is_bool, is_integer, is_float,
1212
is_complex, is_datetimetz, is_categorical_dtype,
@@ -333,6 +333,23 @@ def maybe_promote(dtype, fill_value=np.nan):
333333
return dtype, fill_value
334334

335335

336+
def infer_dtype_from(val, pandas_dtype=False):
337+
"""
338+
interpret the dtype from a scalar or array. This is a convenience
339+
routines to infer dtype from a scalar or an array
340+
341+
Parameters
342+
----------
343+
pandas_dtype : bool, default False
344+
whether to infer dtype including pandas extension types.
345+
If False, scalar/array belongs to pandas extension types is inferred as
346+
object
347+
"""
348+
if is_scalar(val):
349+
return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype)
350+
return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)
351+
352+
336353
def infer_dtype_from_scalar(val, pandas_dtype=False):
337354
"""
338355
interpret the dtype from a scalar
@@ -408,24 +425,32 @@ def infer_dtype_from_scalar(val, pandas_dtype=False):
408425
return dtype, val
409426

410427

411-
def infer_dtype_from_array(arr):
428+
def infer_dtype_from_array(arr, pandas_dtype=False):
412429
"""
413430
infer the dtype from a scalar or array
414431
415432
Parameters
416433
----------
417434
arr : scalar or array
435+
pandas_dtype : bool, default False
436+
whether to infer dtype including pandas extension types.
437+
If False, array belongs to pandas extension types
438+
is inferred as object
418439
419440
Returns
420441
-------
421-
tuple (numpy-compat dtype, array)
442+
tuple (numpy-compat/pandas-compat dtype, array)
422443
423444
Notes
424445
-----
425-
These infer to numpy dtypes exactly
426-
with the exception that mixed / object dtypes
446+
447+
if pandas_dtype=False. these infer to numpy dtypes
448+
exactly with the exception that mixed / object dtypes
427449
are not coerced by stringifying or conversion
428450
451+
if pandas_dtype=True. datetime64tz-aware/categorical
452+
types will retain there character.
453+
429454
Examples
430455
--------
431456
>>> np.asarray([1, '1'])
@@ -442,6 +467,10 @@ def infer_dtype_from_array(arr):
442467
if not is_list_like(arr):
443468
arr = [arr]
444469

470+
if pandas_dtype and (is_categorical_dtype(arr) or
471+
is_datetime64tz_dtype(arr)):
472+
return arr.dtype, arr
473+
445474
# don't force numpy coerce with nan's
446475
inferred = lib.infer_dtype(arr)
447476
if inferred in ['string', 'bytes', 'unicode',
@@ -1028,13 +1057,25 @@ def find_common_type(types):
10281057
return np.find_common_type(types, [])
10291058

10301059

1031-
def _cast_scalar_to_array(shape, value, dtype=None):
1060+
def cast_scalar_to_array(shape, value, dtype=None):
10321061
"""
10331062
create np.ndarray of specified shape and dtype, filled with values
1063+
1064+
Parameters
1065+
----------
1066+
shape : tuple
1067+
value : scalar value
1068+
dtype : np.dtype, optional
1069+
dtype to coerce
1070+
1071+
Returns
1072+
-------
1073+
ndarray of shape, filled with value, of specified / inferred dtype
1074+
10341075
"""
10351076

10361077
if dtype is None:
1037-
dtype, fill_value = _infer_dtype_from_scalar(value)
1078+
dtype, fill_value = infer_dtype_from_scalar(value)
10381079
else:
10391080
fill_value = value
10401081

Diff for: pandas/core/dtypes/common.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
ExtensionDtype)
1212
from .generic import (ABCCategorical, ABCPeriodIndex,
1313
ABCDatetimeIndex, ABCSeries,
14-
ABCSparseArray, ABCSparseSeries)
14+
ABCSparseArray, ABCSparseSeries,
15+
ABCIndexClass)
1516
from .inference import is_string_like
1617
from .inference import * # noqa
1718

@@ -1535,11 +1536,22 @@ def is_bool_dtype(arr_or_dtype):
15351536

15361537
if arr_or_dtype is None:
15371538
return False
1539+
15381540
try:
15391541
tipo = _get_dtype_type(arr_or_dtype)
15401542
except ValueError:
15411543
# this isn't even a dtype
15421544
return False
1545+
1546+
if isinstance(arr_or_dtype, ABCIndexClass):
1547+
1548+
# TODO(jreback)
1549+
# we don't have a boolean Index class
1550+
# so its object, we need to infer to
1551+
# guess this
1552+
return (arr_or_dtype.is_object and
1553+
arr_or_dtype.inferred_type == 'boolean')
1554+
15431555
return issubclass(tipo, np.bool_)
15441556

15451557

Diff for: pandas/core/frame.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,15 @@
2525
import numpy.ma as ma
2626

2727
from pandas.core.dtypes.cast import (
28-
maybe_upcast, infer_dtype_from_scalar,
28+
maybe_upcast,
2929
maybe_cast_to_datetime,
3030
maybe_infer_to_datetimelike,
3131
maybe_convert_platform,
3232
maybe_downcast_to_dtype,
3333
invalidate_string_dtypes,
3434
coerce_to_dtypes,
3535
maybe_upcast_putmask,
36+
cast_scalar_to_array,
3637
find_common_type)
3738
from pandas.core.dtypes.common import (
3839
is_categorical_dtype,
@@ -356,8 +357,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
356357
raise_with_traceback(exc)
357358

358359
if arr.ndim == 0 and index is not None and columns is not None:
359-
values = _cast_scalar_to_array((len(index), len(columns)),
360-
data, dtype=dtype)
360+
values = cast_scalar_to_array((len(index), len(columns)),
361+
data, dtype=dtype)
361362
mgr = self._init_ndarray(values, index, columns,
362363
dtype=values.dtype, copy=False)
363364
else:
@@ -2649,8 +2650,8 @@ def reindexer(value):
26492650

26502651
else:
26512652
# upcast the scalar
2652-
value = _cast_scalar_to_array(len(self.index), value)
2653-
value = _possibly_cast_to_datetime(value, value.dtype)
2653+
value = cast_scalar_to_array(len(self.index), value)
2654+
value = maybe_cast_to_datetime(value, value.dtype)
26542655

26552656
# return internal types directly
26562657
if is_extension_type(value):

Diff for: pandas/core/generic.py

+1-44
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from pandas.core.dtypes.common import (
1414
_ensure_int64,
1515
_ensure_object,
16-
needs_i8_conversion,
1716
is_scalar,
1817
is_number,
1918
is_integer, is_bool,
@@ -5301,48 +5300,6 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
53015300
raise NotImplemented("cannot align with a higher dimensional "
53025301
"NDFrame")
53035302

5304-
elif is_list_like(other):
5305-
5306-
if self.ndim == 1:
5307-
5308-
# try to set the same dtype as ourselves
5309-
try:
5310-
new_other = np.array(other, dtype=self.dtype)
5311-
except ValueError:
5312-
new_other = np.array(other)
5313-
except TypeError:
5314-
new_other = other
5315-
5316-
# we can end up comparing integers and m8[ns]
5317-
# which is a numpy no no
5318-
is_i8 = needs_i8_conversion(self.dtype)
5319-
if is_i8:
5320-
matches = False
5321-
else:
5322-
matches = (new_other == np.array(other))
5323-
5324-
if matches is False or not matches.all():
5325-
5326-
# coerce other to a common dtype if we can
5327-
if needs_i8_conversion(self.dtype):
5328-
try:
5329-
other = np.array(other, dtype=self.dtype)
5330-
except:
5331-
other = np.array(other)
5332-
else:
5333-
other = np.asarray(other)
5334-
other = np.asarray(other,
5335-
dtype=np.common_type(other,
5336-
new_other))
5337-
5338-
# we need to use the new dtype
5339-
try_quick = False
5340-
else:
5341-
other = new_other
5342-
else:
5343-
5344-
other = np.array(other)
5345-
53465303
if isinstance(other, np.ndarray):
53475304

53485305
if other.shape != self.shape:
@@ -5407,7 +5364,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
54075364
# reconstruct the block manager
54085365

54095366
self._check_inplace_setting(other)
5410-
new_data = self._data.putmask(mask=cond, new=other, align=align,
5367+
new_data = self._data.putmask(mask=cond, other=other, align=align,
54115368
inplace=True, axis=block_axis,
54125369
transpose=self._AXIS_REVERSED)
54135370
self._update_inplace(new_data)

Diff for: pandas/core/indexes/base.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
_ensure_platform_int,
2323
is_integer,
2424
is_float,
25+
is_bool,
2526
is_dtype_equal,
2627
is_object_dtype,
2728
is_categorical_dtype,
@@ -608,11 +609,21 @@ def repeat(self, repeats, *args, **kwargs):
608609

609610
@Appender(_index_shared_docs['where'])
610611
def where(self, cond, other=None):
612+
611613
if other is None:
612614
other = self._na_value
613-
values = np.where(cond, self.values, other)
614615

615616
dtype = self.dtype
617+
values = self.values
618+
619+
if is_bool(other) or is_bool_dtype(other):
620+
621+
# bools force casting
622+
values = values.astype(object)
623+
dtype = None
624+
625+
values = np.where(cond, values, other)
626+
616627
if self._is_numeric_dtype and np.any(isnull(values)):
617628
# We can't coerce to the numeric dtype of "self" (unless
618629
# it's float) if there are NaN values in our output.
@@ -1040,6 +1051,7 @@ def _convert_can_do_setop(self, other):
10401051

10411052
def _convert_for_op(self, value):
10421053
""" Convert value to be insertable to ndarray """
1054+
10431055
return value
10441056

10451057
def _assert_can_do_op(self, value):
@@ -3615,6 +3627,7 @@ def fillna(self, value=None, downcast=None):
36153627
# no need to care metadata other than name
36163628
# because it can't have freq if
36173629
return Index(result, name=self.name)
3630+
36183631
return self._shallow_copy()
36193632

36203633
_index_shared_docs['dropna'] = """

0 commit comments

Comments
 (0)