pandas-dev · jreback · Apr 16, 2018 · Feb 23, 2018 · Feb 26, 2018 · Feb 27, 2018
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -65,7 +65,7 @@
 import pandas.core.algorithms as algos
 
 from pandas.core.index import Index, MultiIndex, _ensure_index
-from pandas.core.indexing import maybe_convert_indices, length_of_indexer
+from pandas.core.indexing import maybe_convert_indices
 from pandas.core.arrays import Categorical
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.timedeltas import TimedeltaIndex
@@ -79,7 +79,8 @@
 from pandas._libs.tslibs import conversion
 
 from pandas.util._decorators import cache_readonly
-from pandas.util._validators import validate_bool_kwarg
+from pandas.util._validators import (
+    validate_bool_kwarg, validate_setitem_lengths)
 from pandas import compat
 from pandas.compat import range, map, zip, u
 
@@ -815,11 +816,24 @@ def _replace_single(self, *args, **kwargs):
         return self if kwargs['inplace'] else self.copy()
 
     def setitem(self, indexer, value, mgr=None):
-        """ set the value inplace; return a new block (of a possibly different
-        dtype)
+        """Set the value inplace, returning a a maybe different typed block.
 
-        indexer is a direct slice/positional indexer; value must be a
-        compatible shape
+        Parameters
+        ----------
+        indexer : tuple, list-like, array-like, slice
+            The subset of self.values to set
+        value : object
+            The value being set
+        mgr : BlockPlacement
+
+        Returns
+        -------
+        Block
+
+        Notes
+        -----
+        'indexer' is a direct slice/positional indexer. 'value' must
+        be a compatible shape.
         """
         # coerce None values, if appropriate
         if value is None:
@@ -874,22 +888,7 @@ def setitem(self, indexer, value, mgr=None):
         values = transf(values)
 
         # length checking
-        # boolean with truth values == len of the value is ok too
-        if isinstance(indexer, (np.ndarray, list)):
-            if is_list_like(value) and len(indexer) != len(value):
-                if not (isinstance(indexer, np.ndarray) and
-                        indexer.dtype == np.bool_ and
-                        len(indexer[indexer]) == len(value)):
-                    raise ValueError("cannot set using a list-like indexer "
-                                     "with a different length than the value")
-
-        # slice
-        elif isinstance(indexer, slice):
-
-            if is_list_like(value) and len(values):
-                if len(value) != length_of_indexer(indexer, values):
-                    raise ValueError("cannot set using a slice indexer with a "
-                                     "different length than the value")
+        validate_setitem_lengths(indexer, value, values)
 
         def _is_scalar_indexer(indexer):
             # return True if we are all scalar indexers
@@ -1898,6 +1897,37 @@ def is_view(self):
         """Extension arrays are never treated as views."""
         return False
 
+    def setitem(self, indexer, value, mgr=None):
+        """Set the value inplace, returning a same-typed block.
+
+        This differs from Block.setitem by not allowing setitem to change
+        the dtype of the Block.
+
+        Parameters
+        ----------
+        indexer : tuple, list-like, array-like, slice
+            The subset of self.values to set
+        value : object
+            The value being set
+        mgr : BlockPlacement
+
+        Returns
+        -------
+        Block
+
+        Notes
+        -----
+        'indexer' is a direct slice/positional indexer. 'value' must
+        be a compatible shape.
+        """
+        if isinstance(indexer, tuple):
+            # we are always 1-D
+            indexer = indexer[0]
+
+        validate_setitem_lengths(indexer, value, self.values)
+        self.values[indexer] = value
+        return self
+
     def get_values(self, dtype=None):
         # ExtensionArrays must be iterable, so this works.
         values = np.asarray(self.values)
@@ -3489,7 +3519,8 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
         # with a .values attribute.
         aligned_args = dict((k, kwargs[k])
                             for k in align_keys
-                            if hasattr(kwargs[k], 'values'))
+                            if hasattr(kwargs[k], 'values') and
+                            not isinstance(kwargs[k], ABCExtensionArray))
 
         for b in self.blocks:
             if filter is not None:
@@ -5190,7 +5221,7 @@ def _safe_reshape(arr, new_shape):
     If possible, reshape `arr` to have shape `new_shape`,
     with a couple of exceptions (see gh-13012):
 
-    1) If `arr` is a Categorical or Index, `arr` will be
+    1) If `arr` is a ExtensionArray or Index, `arr` will be
        returned as is.
     2) If `arr` is a Series, the `_values` attribute will
        be reshaped and returned.

diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
@@ -48,3 +48,4 @@ class TestMyDtype(BaseDtypeTests):
 from .methods import BaseMethodsTests  # noqa
 from .missing import BaseMissingTests  # noqa
 from .reshaping import BaseReshapingTests  # noqa
+from .setitem import BaseSetitemTests  # noqa
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
@@ -0,0 +1,116 @@
+import operator
+
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+from .base import BaseExtensionTests
+
+
+class BaseSetitemTests(BaseExtensionTests):
+    def test_setitem_scalar_series(self, data):
+        arr = pd.Series(data)
+        arr[0] = data[1]
+        assert arr[0] == data[1]
+
+    def test_setitem_sequence(self, data):
+        arr = pd.Series(data)
+        original = data.copy()
+
+        arr[[0, 1]] = [data[1], data[0]]
+        assert arr[0] == original[1]
+        assert arr[1] == original[0]
+
+    @pytest.mark.parametrize('as_array', [True, False])
+    def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
+        ser = pd.Series(data)
+        value = [data[0]]
+        if as_array:
+            value = type(data)(value)
+
+        xpr = 'cannot set using a {} indexer with a different length'
+        with tm.assert_raises_regex(ValueError, xpr.format('list-like')):
+            ser[[0, 1]] = value
+
+        with tm.assert_raises_regex(ValueError, xpr.format('slice')):
+            ser[slice(3)] = value
+
+    def test_setitem_empty_indxer(self, data):
+        ser = pd.Series(data)
+        original = ser.copy()
+        ser[[]] = []
+        self.assert_series_equal(ser, original)
+
+    def test_setitem_sequence_broadcasts(self, data):
+        arr = pd.Series(data)
+
+        arr[[0, 1]] = data[2]
+        assert arr[0] == data[2]
+        assert arr[1] == data[2]
+
+    @pytest.mark.parametrize('setter', ['loc', 'iloc'])
+    def test_set_scalar(self, data, setter):
+        arr = pd.Series(data)
+        setter = getattr(arr, setter)
+        operator.setitem(setter, 0, data[1])
+        assert arr[0] == data[1]
+
+    def test_set_loc_scalar_mixed(self, data):
+        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
+        df.loc[0, 'B'] = data[1]
+        assert df.loc[0, 'B'] == data[1]
+
+    def test_set_loc_scalar_single(self, data):
+        df = pd.DataFrame({"B": data})
+        df.loc[10, 'B'] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    def test_set_loc_scalar_multiple_homogoneous(self, data):
+        df = pd.DataFrame({"A": data, "B": data})
+        df.loc[10, 'B'] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    def test_set_iloc_scalar_mixed(self, data):
+        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
+        df.iloc[0, 1] = data[1]
+        assert df.loc[0, 'B'] == data[1]
+
+    def test_set_iloc_scalar_single(self, data):
+        df = pd.DataFrame({"B": data})
+        df.iloc[10, 0] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    def test_set_iloc_scalar_multiple_homogoneous(self, data):
+        df = pd.DataFrame({"A": data, "B": data})
+        df.iloc[10, 1] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    @pytest.mark.parametrize('as_callable', [True, False])
+    def test_set_mask_aligned(self, data, as_callable):
+        ser = pd.Series(data)
+        mask = np.zeros(len(data), dtype=bool)
+        mask[:2] = True
+
+        if as_callable:
+            mask2 = lambda x: mask
+        else:
+            mask2 = mask
+
+        ser[mask2] = data[5:7]
+        assert ser[0] == data[5]
+        assert ser[1] == data[6]
+
+    def test_set_mask_broadcast(self, data):
+        ser = pd.Series(data)
+        mask = np.zeros(len(data), dtype=bool)
+        mask[:2] = True
+
+        ser[mask] = data[10]
+        assert ser[0] == data[10]
+        assert ser[1] == data[10]
+
+    def test_setitem_expand_columns(self, data):
+        df = pd.DataFrame({"A": data})
+        df['B'] = 1
+        assert len(df.columns) == 2
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
@@ -68,6 +68,10 @@ def test_getitem_scalar(self):
         pass
 
 
+class TestSetitem(base.BaseSetitemTests):
+    pass
+
+
 class TestMissing(base.BaseMissingTests):
     pass
 

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -116,6 +116,10 @@ class TestCasting(base.BaseCastingTests):
     pass
 
 
+class TestDecimal(base.BaseSetitemTests):
+    pass
+
+
 def test_series_constructor_coerce_data_to_extension_dtype_raises():
     xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the "
            "extension array directly.")

diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
@@ -71,3 +71,6 @@ def test_value_counts(self, all_data, dropna):
 
 class TestCasting(base.BaseCastingTests):
     pass
+
+# We intentionally don't run base.BaseSetitemTests because pandas'
+# internals has trouble setting sequences of values into scalar positions.
diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py
@@ -4,7 +4,9 @@
 """
 import warnings
 
-from pandas.core.dtypes.common import is_bool
+import numpy as np
+
+from pandas.core.dtypes.common import is_bool, is_list_like
 
 
 def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
@@ -356,3 +358,48 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):
         raise ValueError("Cannot specify both 'value' and 'method'.")
 
     return value, method
+
+
+def validate_setitem_lengths(indexer, value, values):
+    """Validate that value and indexer are the same length.
+
+    An special-case is allowed for when the indexer is a boolean array
+    and the number of true values equals the length of ``value``. In
+    this case, no exception is raised.
+
+    Parameters
+    ----------
+    indexer : sequence
+        The key for the setitem
+    value : array-like
+        The value for the setitem
+    values : array-like
+        The values being set into
+
+    Returns
+    -------
+    None
+
+    Raises
+    ------
+    ValueError
+        When the indexer is an ndarray or list and the lengths don't
+        match.
+    """
+    from pandas.core.indexing import length_of_indexer
+
+    # boolean with truth values == len of the value is ok too
+    if isinstance(indexer, (np.ndarray, list)):
+        if is_list_like(value) and len(indexer) != len(value):
+            if not (isinstance(indexer, np.ndarray) and
+                    indexer.dtype == np.bool_ and
+                    len(indexer[indexer]) == len(value)):
+                raise ValueError("cannot set using a list-like indexer "
+                                 "with a different length than the value")
+    # slice
+    elif isinstance(indexer, slice):
+
+        if is_list_like(value) and len(values):
+            if len(value) != length_of_indexer(indexer, values):
+                raise ValueError("cannot set using a slice indexer with a "
+                                 "different length than the value")
-Original file line number
+Diff line change
@@ Expand Up / @@ -68,6 +68,10 @@ def test_getitem_scalar(self): @@
             pass
+    class TestSetitem(base.BaseSetitemTests):
+        pass
     class TestMissing(base.BaseMissingTests):
         pass
@@ Expand Down @@