Skip to content

Commit 4141317

Browse files
committed
Use argument dtype to inform coercion
Master: ```python >>> import dask.dataframe as dd >>> s = dd.core.Scalar({('s', 0): 10}, 's', 'i8') >>> pdf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], ... 'b': [7, 6, 5, 4, 3, 2, 1]}) >>> (pdf + s).dtypes a object b object dtype: object Head: ``` >>> (pdf + s).dtypes a int64 b int64 dtype: object ``` This is more consistent with 0.20.3, while still most of the changes in pandas-dev#16821 Closes pandas-dev#17767
1 parent 2ff1241 commit 4141317

File tree

2 files changed

+88
-19
lines changed

2 files changed

+88
-19
lines changed

Diff for: pandas/core/internals.py

+27-19
Original file line numberDiff line numberDiff line change
@@ -629,9 +629,8 @@ def convert(self, copy=True, **kwargs):
629629
def _can_hold_element(self, element):
630630
""" require the same dtype as ourselves """
631631
dtype = self.values.dtype.type
632-
if is_list_like(element):
633-
element = np.asarray(element)
634-
tipo = element.dtype.type
632+
tipo = _maybe_get_element_dtype_type(element)
633+
if tipo:
635634
return issubclass(tipo, dtype)
636635
return isinstance(element, dtype)
637636

@@ -1806,9 +1805,8 @@ class FloatBlock(FloatOrComplexBlock):
18061805
_downcast_dtype = 'int64'
18071806

18081807
def _can_hold_element(self, element):
1809-
if is_list_like(element):
1810-
element = np.asarray(element)
1811-
tipo = element.dtype.type
1808+
tipo = _maybe_get_element_dtype_type(element)
1809+
if tipo:
18121810
return (issubclass(tipo, (np.floating, np.integer)) and
18131811
not issubclass(tipo, (np.datetime64, np.timedelta64)))
18141812
return (isinstance(element, (float, int, np.floating, np.int_)) and
@@ -1856,9 +1854,9 @@ class ComplexBlock(FloatOrComplexBlock):
18561854
is_complex = True
18571855

18581856
def _can_hold_element(self, element):
1859-
if is_list_like(element):
1860-
element = np.array(element)
1861-
return issubclass(element.dtype.type,
1857+
tipo = _maybe_get_element_dtype_type(element)
1858+
if tipo:
1859+
return issubclass(tipo,
18621860
(np.floating, np.integer, np.complexfloating))
18631861
return (isinstance(element,
18641862
(float, int, complex, np.float_, np.int_)) and
@@ -1874,9 +1872,8 @@ class IntBlock(NumericBlock):
18741872
_can_hold_na = False
18751873

18761874
def _can_hold_element(self, element):
1877-
if is_list_like(element):
1878-
element = np.array(element)
1879-
tipo = element.dtype.type
1875+
tipo = _maybe_get_element_dtype_type(element)
1876+
if tipo:
18801877
return (issubclass(tipo, np.integer) and
18811878
not issubclass(tipo, (np.datetime64, np.timedelta64)) and
18821879
self.dtype.itemsize >= element.dtype.itemsize)
@@ -1917,9 +1914,8 @@ def _box_func(self):
19171914
return lambda x: tslib.Timedelta(x, unit='ns')
19181915

19191916
def _can_hold_element(self, element):
1920-
if is_list_like(element):
1921-
element = np.array(element)
1922-
tipo = element.dtype.type
1917+
tipo = _maybe_get_element_dtype_type(element)
1918+
if tipo:
19231919
return issubclass(tipo, np.timedelta64)
19241920
return isinstance(element, (timedelta, np.timedelta64))
19251921

@@ -2018,9 +2014,9 @@ class BoolBlock(NumericBlock):
20182014
_can_hold_na = False
20192015

20202016
def _can_hold_element(self, element):
2021-
if is_list_like(element):
2022-
element = np.asarray(element)
2023-
return issubclass(element.dtype.type, np.bool_)
2017+
tipo = _maybe_get_element_dtype_type(element)
2018+
if tipo:
2019+
return issubclass(tipo, np.bool_)
20242020
return isinstance(element, (bool, np.bool_))
20252021

20262022
def should_store(self, value):
@@ -2450,7 +2446,9 @@ def _astype(self, dtype, mgr=None, **kwargs):
24502446
return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs)
24512447

24522448
def _can_hold_element(self, element):
2453-
if is_list_like(element):
2449+
tipo = _maybe_get_element_dtype_type(element)
2450+
if tipo:
2451+
# TODO: this still uses asarray, instead of dtype.type
24542452
element = np.array(element)
24552453
return element.dtype == _NS_DTYPE or element.dtype == np.int64
24562454
return (is_integer(element) or isinstance(element, datetime) or
@@ -5525,3 +5523,13 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill):
55255523
if not allow_fill:
55265524
indexer = maybe_convert_indices(indexer, length)
55275525
return 'fancy', indexer, len(indexer)
5526+
5527+
5528+
def _maybe_get_element_dtype_type(element):
5529+
tipo = None
5530+
if hasattr(element, 'dtype'):
5531+
tipo = element.dtype.type
5532+
elif is_list_like(element):
5533+
element = np.asarray(element)
5534+
tipo = element.dtype.type
5535+
return tipo

Diff for: pandas/tests/internals/test_internals.py

+61
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable=W0102
33

44
from datetime import datetime, date
5+
import operator
56
import sys
67
import pytest
78
import numpy as np
@@ -1213,3 +1214,63 @@ def assert_add_equals(val, inc, result):
12131214

12141215
with pytest.raises(ValueError):
12151216
BlockPlacement(slice(2, None, -1)).add(-1)
1217+
1218+
1219+
class DummyElement(object):
1220+
def __init__(self, value, dtype):
1221+
self.value = value
1222+
self.dtype = np.dtype(dtype)
1223+
1224+
def __array__(self):
1225+
return np.array(self.value, dtype=self.dtype)
1226+
1227+
def __str__(self):
1228+
return "DummyElement({}, {})".format(self.value, self.dtype)
1229+
1230+
def __repr__(self):
1231+
return str(self)
1232+
1233+
def astype(self, dtype, copy=False):
1234+
self.dtype = dtype
1235+
return self
1236+
1237+
def view(self, dtype):
1238+
return type(self)(self.value.view(dtype), dtype)
1239+
1240+
def any(self, axis=None):
1241+
return bool(self.value)
1242+
1243+
1244+
class TestCanHoldElement(object):
1245+
@pytest.mark.parametrize('value, dtype', [
1246+
(1, 'i8'),
1247+
(1.0, 'f8'),
1248+
(1j, 'complex128'),
1249+
(True, 'bool'),
1250+
# (np.timedelta64(20, 'ns'), '<m8[ns]'),
1251+
(np.datetime64(20, 'ns'), '<M8[ns]'),
1252+
])
1253+
@pytest.mark.parametrize('op', [
1254+
operator.add,
1255+
operator.sub,
1256+
operator.mul,
1257+
operator.truediv,
1258+
operator.mod,
1259+
operator.pow,
1260+
], ids=lambda x: x.__name__)
1261+
def test_binop_other(self, op, value, dtype):
1262+
skip = {(operator.add, 'bool'),
1263+
(operator.sub, 'bool'),
1264+
(operator.mul, 'bool'),
1265+
(operator.truediv, 'bool'),
1266+
(operator.mod, 'i8'),
1267+
(operator.mod, 'complex128'),
1268+
(operator.mod, '<M8[ns]'),
1269+
(operator.pow, 'bool')}
1270+
if (op, dtype) in skip:
1271+
pytest.skip("Invalid combination {},{}".format(op, dtype))
1272+
e = DummyElement(value, dtype)
1273+
s = pd.DataFrame({"A": [e.value, e.value]}, dtype=e.dtype)
1274+
result = op(s, e).dtypes
1275+
expected = op(s, value).dtypes
1276+
assert_series_equal(result, expected)

0 commit comments

Comments
 (0)