From 5b5ead4eab3a9d14a65d24efc21e860607ffdbd3 Mon Sep 17 00:00:00 2001 From: Nathalie Rud Date: Sat, 12 Nov 2016 15:50:56 +0000 Subject: [PATCH 1/4] BUG: fix iloc with pd.Series on pd.Categorical Before, iloc on pd.Series returned Categorical object for list-like indexes input, while Series object is expected. Fixes https://github.com/pandas-dev/pandas/issues/14580 --- pandas/core/indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c4ae3dcca8367..ff65a781dfcb1 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1616,6 +1616,7 @@ def _getitem_axis(self, key, axis=0): # force an actual list key = list(key) + return self.obj.take(key, axis=axis, convert=False) else: key = self._convert_scalar_indexer(key, axis) From 9a69ca5088f382750a55afa7d422e125a43518c9 Mon Sep 17 00:00:00 2001 From: Nathalie Rud Date: Sat, 12 Nov 2016 16:11:54 +0000 Subject: [PATCH 2/4] CLN: refactor _getitem_axis() of _iLocIndexer class _get_list_axis() is factored out of _getitem_axis() to handle list-like type of input. --- pandas/core/indexing.py | 50 ++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ff65a781dfcb1..7d8616df48c1b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1596,6 +1596,27 @@ def _get_slice_axis(self, slice_obj, axis=0): else: return self.obj.take(slice_obj, axis=axis, convert=False) + def _get_list_axis(self, key_list, axis=0): + """ + Return Series values by list or array of integers + + Parameters + ---------- + key_list : list-like positional indexer + axis : int (can only be zero) + + Returns + ------- + Series object + """ + + # validate list bounds + self._is_valid_list_like(key_list, axis) + + # force an actual list + key_list = list(key_list) + return self.obj.take(key_list, axis=axis, convert=False) + def _getitem_axis(self, key, axis=0): if isinstance(key, slice): @@ -1606,27 +1627,20 @@ def _getitem_axis(self, key, axis=0): self._has_valid_type(key, axis) return self._getbool_axis(key, axis=axis) - # a single integer or a list of integers - else: - - if is_list_like_indexer(key): - - # validate list bounds - self._is_valid_list_like(key, axis) - - # force an actual list - key = list(key) - return self.obj.take(key, axis=axis, convert=False) + # a list of integers + elif is_list_like_indexer(key): + return self._get_list_axis(key, axis=axis) - else: - key = self._convert_scalar_indexer(key, axis) + # a single integer + else: + key = self._convert_scalar_indexer(key, axis) - if not is_integer(key): - raise TypeError("Cannot index by location index with a " - "non-integer key") + if not is_integer(key): + raise TypeError("Cannot index by location index with a " + "non-integer key") - # validate the location - self._is_valid_integer(key, axis) + # validate the location + self._is_valid_integer(key, axis) return self._get_loc(key, axis=axis) From bb9b601d19d9975fb375b0efd3b2760e82ac9601 Mon Sep 17 00:00:00 2001 From: Nathalie Rud Date: Sat, 12 Nov 2016 16:22:23 +0000 Subject: [PATCH 3/4] TST: add test to cover bug GH #14580 Add test to verify type of returned value of iloc() Series of Categorical data. --- pandas/tests/test_categorical.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 5d2c317cc0f81..fe61f16cf7eb1 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -54,6 +54,27 @@ def test_getitem_listlike(self): expected = c[np.array([100000]).astype(np.int64)].codes self.assert_numpy_array_equal(result, expected) + def test_getitem_category_type(self): + # GH 14580 + # test iloc() on Series with Categorical data + + s = pd.Series([1, 2, 3]).astype('category') + + # get slice + result = s.iloc[0:2] + expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # get list of indexes + result = s.iloc[[0, 1]] + expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # get boolean array + result = s.iloc[[True, False, False]] + expected = pd.Series([1]).astype('category', categories=[1, 2, 3]) + tm.assert_series_equal(result, expected) + def test_setitem(self): # int/positional From f6b1790e06f9f17fb0046927ee768d6c3c319159 Mon Sep 17 00:00:00 2001 From: Nathalie Rud Date: Sun, 18 Dec 2016 00:44:31 +0000 Subject: [PATCH 4/4] DOC: add whatsnew entry for bug GH #14580 fix --- doc/source/whatsnew/v0.19.2.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index 4cd58f0148ae8..0249e831bad42 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -94,3 +94,5 @@ Bug Fixes - Bug in ``.plot(kind='kde')`` which did not drop missing values to generate the KDE Plot, instead generating an empty plot. (:issue:`14821`) - Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`) + +- Bug in ``pd.Series`` iloc returned Categorical object for list-like indexes input, while Series object was expected. (:issue:`14580`)