Skip to content

Commit e9a8e73

Browse files
committed
BUG: Respect dups in reindexing CategoricalIndex
When the indexer is identical to the elements. We should still return duplicates when the indexer contains duplicates. Closes pandas-devgh-17323.
1 parent 473a7f3 commit e9a8e73

File tree

3 files changed

+14
-14
lines changed

3 files changed

+14
-14
lines changed

Diff for: doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ Indexing
358358
- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)
359359
- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`)
360360
- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
361+
- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)
361362

362363
I/O
363364
^^^

Diff for: pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
487487
method = missing.clean_reindex_fill_method(method)
488488
target = ibase._ensure_index(target)
489489

490-
if self.equals(target):
490+
if self.is_unique and self.equals(target):
491491
return np.arange(len(self), dtype='intp')
492492

493493
if method == 'pad' or method == 'backfill':

Diff for: pandas/tests/indexes/test_category.py

+12-13
Original file line numberDiff line numberDiff line change
@@ -365,29 +365,28 @@ def test_astype(self):
365365
tm.assert_index_equal(result, expected)
366366

367367
def test_reindex_base(self):
368-
369-
# determined by cat ordering
370-
idx = self.create_index()
371-
expected = np.arange(len(idx), dtype=np.intp)
372-
373-
actual = idx.get_indexer(idx)
374-
tm.assert_numpy_array_equal(expected, actual)
375-
376-
with tm.assert_raises_regex(ValueError, 'Invalid fill method'):
377-
idx.get_indexer(idx, method='invalid')
378-
379-
def test_reindexing(self):
368+
np.random.seed(123456789)
380369

381370
ci = self.create_index()
382371
oidx = Index(np.array(ci))
383372

384-
for n in [1, 2, 5, len(ci)]:
373+
for n in [1, 2, 5]:
385374
finder = oidx[np.random.randint(0, len(ci), size=n)]
386375
expected = oidx.get_indexer_non_unique(finder)[0]
387376

388377
actual = ci.get_indexer(finder)
389378
tm.assert_numpy_array_equal(expected, actual)
390379

380+
# see gh-17323
381+
for finder in [list("aabbca"), list("aababca")]:
382+
expected = oidx.get_indexer_non_unique(finder)[0]
383+
384+
actual = ci.get_indexer(finder)
385+
tm.assert_numpy_array_equal(expected, actual)
386+
387+
with tm.assert_raises_regex(ValueError, "Invalid fill method"):
388+
ci.get_indexer(ci, method="invalid")
389+
391390
def test_reindex_dtype(self):
392391
c = CategoricalIndex(['a', 'b', 'c', 'a'])
393392
res, indexer = c.reindex(['a', 'c'])

0 commit comments

Comments
 (0)