Skip to content

Commit 562f43a

Browse files
committed
CLN/BUG: fix ndarray assignment may cause unexpected cast
1 parent b895968 commit 562f43a

File tree

11 files changed

+324
-143
lines changed

11 files changed

+324
-143
lines changed

Diff for: doc/source/whatsnew/v0.19.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Bug Fixes
5656
- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`)
5757
- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
5858
- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
59+
<<<<<<< f26b049786624ed983f2718687c23e3f1adbb670
5960
- Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`)
6061
- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
6162
is not scalar and ``values`` is not specified (:issue:`14380`)

Diff for: doc/source/whatsnew/v0.19.2.txt

+10
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,20 @@ Bug Fixes
7373
- Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`)
7474
- Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`)
7575
- Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`)
76+
<<<<<<< HEAD
7677
- Bug in ``DataFrame.combine_first()`` for integer columns (:issue:`14687`).
7778
- Bug in ``pd.read_csv()`` in which the ``dtype`` parameter was not being respected for empty data (:issue:`14712`)
7879
- Bug in ``pd.read_csv()`` in which the ``nrows`` parameter was not being respected for large input when using the C engine for parsing (:issue:`7626`)
7980
- Bug in ``pd.merge_asof()`` could not handle timezone-aware DatetimeIndex when a tolerance was specified (:issue:`14844`)
81+
=======
82+
83+
84+
- Bug in assignment against datetime-like data with ``int`` may incorrectly converted to datetime-like (:issue:`14145`)
85+
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
86+
87+
88+
89+
>>>>>>> CLN/BUG: fix ndarray assignment may cause unexpected cast
8090
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
8191
- Bug in ``.plot(kind='kde')`` which did not drop missing values to generate the KDE Plot, instead generating an empty plot. (:issue:`14821`)
8292
- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)

Diff for: pandas/core/frame.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
import numpy as np
2424
import numpy.ma as ma
2525

26-
from pandas.types.cast import (_maybe_upcast, _infer_dtype_from_scalar,
26+
from pandas.types.cast import (_maybe_upcast,
27+
_cast_scalar_to_array,
2728
_possibly_cast_to_datetime,
2829
_possibly_infer_to_datetimelike,
2930
_possibly_convert_platform,
@@ -333,15 +334,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
333334
raise_with_traceback(exc)
334335

335336
if arr.ndim == 0 and index is not None and columns is not None:
336-
if isinstance(data, compat.string_types) and dtype is None:
337-
dtype = np.object_
338-
if dtype is None:
339-
dtype, data = _infer_dtype_from_scalar(data)
340-
341-
values = np.empty((len(index), len(columns)), dtype=dtype)
342-
values.fill(data)
343-
mgr = self._init_ndarray(values, index, columns, dtype=dtype,
344-
copy=False)
337+
values = _cast_scalar_to_array((len(index), len(columns)),
338+
data, dtype=dtype)
339+
mgr = self._init_ndarray(values, index, columns,
340+
dtype=values.dtype, copy=False)
345341
else:
346342
raise PandasError('DataFrame constructor not properly called!')
347343

@@ -455,7 +451,7 @@ def _get_axes(N, K, index=index, columns=columns):
455451
values = _prep_ndarray(values, copy=copy)
456452

457453
if dtype is not None:
458-
if values.dtype != dtype:
454+
if not is_dtype_equal(values.dtype, dtype):
459455
try:
460456
values = values.astype(dtype)
461457
except Exception as orig:
@@ -2641,9 +2637,8 @@ def reindexer(value):
26412637

26422638
else:
26432639
# upcast the scalar
2644-
dtype, value = _infer_dtype_from_scalar(value)
2645-
value = np.repeat(value, len(self.index)).astype(dtype)
2646-
value = _possibly_cast_to_datetime(value, dtype)
2640+
value = _cast_scalar_to_array(len(self.index), value)
2641+
value = _possibly_cast_to_datetime(value, value.dtype)
26472642

26482643
# return internal types directly
26492644
if is_extension_type(value):

Diff for: pandas/core/internals.py

+94-82
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
is_null_datelike_scalar)
4242
import pandas.types.concat as _concat
4343

44-
from pandas.types.generic import ABCSeries
44+
from pandas.types.generic import ABCSeries, ABCDatetimeIndex
4545
from pandas.core.common import is_null_slice
4646
import pandas.core.algorithms as algos
4747

@@ -379,7 +379,8 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,
379379

380380
# fillna, but if we cannot coerce, then try again as an ObjectBlock
381381
try:
382-
values, _, value, _ = self._try_coerce_args(self.values, value)
382+
values, _, _, _ = self._try_coerce_args(self.values, value)
383+
# value may be converted to internal, thus drop
383384
blocks = self.putmask(mask, value, inplace=inplace)
384385
blocks = [b.make_block(values=self._try_coerce_result(b.values))
385386
for b in blocks]
@@ -673,8 +674,43 @@ def setitem(self, indexer, value, mgr=None):
673674
if self.is_numeric:
674675
value = np.nan
675676

676-
# coerce args
677-
values, _, value, _ = self._try_coerce_args(self.values, value)
677+
# coerce if block dtype can store value
678+
values = self.values
679+
try:
680+
values, _, value, _ = self._try_coerce_args(values, value)
681+
# can keep its own dtype
682+
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
683+
value.dtype):
684+
dtype = self.dtype
685+
else:
686+
dtype = 'infer'
687+
688+
except (TypeError, ValueError):
689+
# current dtype cannot store value, coerce to common dtype
690+
find_dtype = False
691+
692+
if hasattr(value, 'dtype'):
693+
dtype = value.dtype
694+
find_dtype = True
695+
696+
elif is_scalar(value):
697+
if isnull(value):
698+
# NaN promotion is handled in latter path
699+
dtype = False
700+
else:
701+
dtype, _ = _infer_dtype_from_scalar(value,
702+
pandas_dtype=True)
703+
find_dtype = True
704+
else:
705+
dtype = 'infer'
706+
707+
if find_dtype:
708+
dtype = _find_common_type([values.dtype, dtype])
709+
if not is_dtype_equal(self.dtype, dtype):
710+
b = self.astype(dtype)
711+
return b.setitem(indexer, value, mgr=mgr)
712+
713+
# value must be storeable at this moment
678714
arr_value = np.array(value)
679715

680716
# cast the values to a type that can hold nan (if necessary)
@@ -704,87 +740,52 @@ def setitem(self, indexer, value, mgr=None):
704740
raise ValueError("cannot set using a slice indexer with a "
705741
"different length than the value")
706742

707-
try:
708-
709-
def _is_scalar_indexer(indexer):
710-
# return True if we are all scalar indexers
711-
712-
if arr_value.ndim == 1:
713-
if not isinstance(indexer, tuple):
714-
indexer = tuple([indexer])
715-
return all([is_scalar(idx) for idx in indexer])
716-
return False
717-
718-
def _is_empty_indexer(indexer):
719-
# return a boolean if we have an empty indexer
743+
def _is_scalar_indexer(indexer):
744+
# return True if we are all scalar indexers
720745

721-
if arr_value.ndim == 1:
722-
if not isinstance(indexer, tuple):
723-
indexer = tuple([indexer])
724-
return any(isinstance(idx, np.ndarray) and len(idx) == 0
725-
for idx in indexer)
726-
return False
727-
728-
# empty indexers
729-
# 8669 (empty)
730-
if _is_empty_indexer(indexer):
731-
pass
732-
733-
# setting a single element for each dim and with a rhs that could
734-
# be say a list
735-
# GH 6043
736-
elif _is_scalar_indexer(indexer):
737-
values[indexer] = value
738-
739-
# if we are an exact match (ex-broadcasting),
740-
# then use the resultant dtype
741-
elif (len(arr_value.shape) and
742-
arr_value.shape[0] == values.shape[0] and
743-
np.prod(arr_value.shape) == np.prod(values.shape)):
744-
values[indexer] = value
745-
values = values.astype(arr_value.dtype)
746-
747-
# set
748-
else:
749-
values[indexer] = value
750-
751-
# coerce and try to infer the dtypes of the result
752-
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
753-
value.dtype):
754-
dtype = value.dtype
755-
elif is_scalar(value):
756-
dtype, _ = _infer_dtype_from_scalar(value)
757-
else:
758-
dtype = 'infer'
759-
values = self._try_coerce_and_cast_result(values, dtype)
760-
block = self.make_block(transf(values), fastpath=True)
761-
762-
# may have to soft convert_objects here
763-
if block.is_object and not self.is_object:
764-
block = block.convert(numeric=False)
765-
766-
return block
767-
except ValueError:
768-
raise
769-
except TypeError:
746+
if arr_value.ndim == 1:
747+
if not isinstance(indexer, tuple):
748+
indexer = tuple([indexer])
749+
return all([is_scalar(idx) for idx in indexer])
750+
return False
770751

771-
# cast to the passed dtype if possible
772-
# otherwise raise the original error
773-
try:
774-
# e.g. we are uint32 and our value is uint64
775-
# this is for compat with older numpies
776-
block = self.make_block(transf(values.astype(value.dtype)))
777-
return block.setitem(indexer=indexer, value=value, mgr=mgr)
752+
def _is_empty_indexer(indexer):
753+
# return a boolean if we have an empty indexer
778754

779-
except:
780-
pass
781-
782-
raise
755+
if arr_value.ndim == 1:
756+
if not isinstance(indexer, tuple):
757+
indexer = tuple([indexer])
758+
return any(isinstance(idx, np.ndarray) and len(idx) == 0
759+
for idx in indexer)
760+
return False
783761

784-
except Exception:
762+
# empty indexers
763+
# 8669 (empty)
764+
if _is_empty_indexer(indexer):
785765
pass
786766

787-
return [self]
767+
# setting a single element for each dim and with a rhs that could
768+
# be say a list
769+
# GH 6043
770+
elif _is_scalar_indexer(indexer):
771+
values[indexer] = value
772+
773+
# if we are an exact match (ex-broadcasting),
774+
# then use the resultant dtype
775+
elif (len(arr_value.shape) and
776+
arr_value.shape[0] == values.shape[0] and
777+
np.prod(arr_value.shape) == np.prod(values.shape)):
778+
values[indexer] = value
779+
values = values.astype(arr_value.dtype)
780+
781+
# set
782+
else:
783+
values[indexer] = value
784+
785+
# coerce and try to infer the dtypes of the result
786+
values = self._try_coerce_and_cast_result(values, dtype)
787+
block = self.make_block(transf(values), fastpath=True)
788+
return block
788789

789790
def putmask(self, mask, new, align=True, inplace=False, axis=0,
790791
transpose=False, mgr=None):
@@ -1255,6 +1256,7 @@ def func(cond, values, other):
12551256

12561257
values, values_mask, other, other_mask = self._try_coerce_args(
12571258
values, other)
1259+
12581260
try:
12591261
return self._try_coerce_result(expressions.where(
12601262
cond, values, other, raise_on_error=True))
@@ -1534,6 +1536,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
15341536
new = new[mask]
15351537

15361538
mask = _safe_reshape(mask, new_values.shape)
1539+
15371540
new_values[mask] = new
15381541
new_values = self._try_coerce_result(new_values)
15391542
return [self.make_block(values=new_values)]
@@ -1703,7 +1706,7 @@ def fillna(self, value, **kwargs):
17031706

17041707
# allow filling with integers to be
17051708
# interpreted as seconds
1706-
if not isinstance(value, np.timedelta64) and is_integer(value):
1709+
if not isinstance(value, np.timedelta64):
17071710
value = Timedelta(value, unit='s')
17081711
return super(TimeDeltaBlock, self).fillna(value, **kwargs)
17091712

@@ -1937,6 +1940,15 @@ def _maybe_downcast(self, blocks, downcast=None):
19371940
def _can_hold_element(self, element):
19381941
return True
19391942

1943+
def _try_coerce_args(self, values, other):
1944+
""" provide coercion to our input arguments """
1945+
1946+
if isinstance(other, ABCDatetimeIndex):
1947+
# to store DatetimeTZBlock as object
1948+
other = other.asobject.values
1949+
1950+
return values, False, other, False
1951+
19401952
def _try_cast(self, element):
19411953
return element
19421954

@@ -2276,8 +2288,6 @@ def _try_coerce_args(self, values, other):
22762288
"naive Block")
22772289
other_mask = isnull(other)
22782290
other = other.asm8.view('i8')
2279-
elif hasattr(other, 'dtype') and is_integer_dtype(other):
2280-
other = other.view('i8')
22812291
else:
22822292
try:
22832293
other = np.asarray(other)
@@ -2453,6 +2463,8 @@ def _try_coerce_args(self, values, other):
24532463
raise ValueError("incompatible or non tz-aware value")
24542464
other_mask = isnull(other)
24552465
other = other.value
2466+
else:
2467+
raise TypeError
24562468

24572469
return values, values_mask, other, other_mask
24582470

Diff for: pandas/core/panel.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import numpy as np
1010

1111
from pandas.types.cast import (_infer_dtype_from_scalar,
12+
_cast_scalar_to_array,
1213
_possibly_cast_item)
1314
from pandas.types.common import (is_integer, is_list_like,
1415
is_string_like, is_scalar)
@@ -166,11 +167,9 @@ def _init_data(self, data, copy, dtype, **kwargs):
166167
copy = False
167168
dtype = None
168169
elif is_scalar(data) and all(x is not None for x in passed_axes):
169-
if dtype is None:
170-
dtype, data = _infer_dtype_from_scalar(data)
171-
values = np.empty([len(x) for x in passed_axes], dtype=dtype)
172-
values.fill(data)
173-
mgr = self._init_matrix(values, passed_axes, dtype=dtype,
170+
values = _cast_scalar_to_array([len(x) for x in passed_axes],
171+
data, dtype=dtype)
172+
mgr = self._init_matrix(values, passed_axes, dtype=values.dtype,
174173
copy=False)
175174
copy = False
176175
else: # pragma: no cover
@@ -570,9 +569,7 @@ def __setitem__(self, key, value):
570569
shape[1:], tuple(map(int, value.shape))))
571570
mat = np.asarray(value)
572571
elif is_scalar(value):
573-
dtype, value = _infer_dtype_from_scalar(value)
574-
mat = np.empty(shape[1:], dtype=dtype)
575-
mat.fill(value)
572+
mat = _cast_scalar_to_array(shape[1:], value)
576573
else:
577574
raise TypeError('Cannot set item of type: %s' % str(type(value)))
578575

0 commit comments

Comments
 (0)