Skip to content

BUG: iloc misbehavior with pd.Series: sometimes returns pd.Categorical, fixes #14580 #14642

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.19.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,5 @@ Bug Fixes
- Bug in ``.plot(kind='kde')`` which did not drop missing values to generate the KDE Plot, instead generating an empty plot. (:issue:`14821`)

- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)

- Bug in ``pd.Series`` iloc returned Categorical object for list-like indexes input, while Series object was expected. (:issue:`14580`)
49 changes: 32 additions & 17 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1596,6 +1596,27 @@ def _get_slice_axis(self, slice_obj, axis=0):
else:
return self.obj.take(slice_obj, axis=axis, convert=False)

def _get_list_axis(self, key_list, axis=0):
"""
Return Series values by list or array of integers

Parameters
----------
key_list : list-like positional indexer
axis : int (can only be zero)

Returns
-------
Series object
"""

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment on what this is doing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you give a hint on phrasing please?
Would "return Series values by list or array of integers" be clear enough?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so ideally add a Paramaters/Returns section (I know we don't have docs for other of the internal functions, but have to start :))

yes but be clear that this is a posiitional indexer

# validate list bounds
self._is_valid_list_like(key_list, axis)

# force an actual list
key_list = list(key_list)
return self.obj.take(key_list, axis=axis, convert=False)

def _getitem_axis(self, key, axis=0):

if isinstance(key, slice):
Expand All @@ -1606,26 +1627,20 @@ def _getitem_axis(self, key, axis=0):
self._has_valid_type(key, axis)
return self._getbool_axis(key, axis=axis)

# a single integer or a list of integers
else:

if is_list_like_indexer(key):

# validate list bounds
self._is_valid_list_like(key, axis)

# force an actual list
key = list(key)
# a list of integers
elif is_list_like_indexer(key):
return self._get_list_axis(key, axis=axis)

else:
key = self._convert_scalar_indexer(key, axis)
# a single integer
else:
key = self._convert_scalar_indexer(key, axis)

if not is_integer(key):
raise TypeError("Cannot index by location index with a "
"non-integer key")
if not is_integer(key):
raise TypeError("Cannot index by location index with a "
"non-integer key")

# validate the location
self._is_valid_integer(key, axis)
# validate the location
self._is_valid_integer(key, axis)

return self._get_loc(key, axis=axis)

Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,27 @@ def test_getitem_listlike(self):
expected = c[np.array([100000]).astype(np.int64)].codes
self.assert_numpy_array_equal(result, expected)

def test_getitem_category_type(self):
# GH 14580
# test iloc() on Series with Categorical data

s = pd.Series([1, 2, 3]).astype('category')

# get slice
result = s.iloc[0:2]
expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3])
tm.assert_series_equal(result, expected)

# get list of indexes
result = s.iloc[[0, 1]]
expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3])
tm.assert_series_equal(result, expected)

# get boolean array
result = s.iloc[[True, False, False]]
expected = pd.Series([1]).astype('category', categories=[1, 2, 3])
tm.assert_series_equal(result, expected)

def test_setitem(self):

# int/positional
Expand Down