Skip to content

Commit b878886

Browse files
committed
BUG: Load data from a CategoricalIndex for dtype comparison, closes pandas-dev#16627
1 parent 664348c commit b878886

File tree

3 files changed

+28
-2
lines changed

3 files changed

+28
-2
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ Indexing
102102

103103
- When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
104104
- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).
105+
- Fixed a bug that prevented joining on a categorical MultiIndex (:issue:`13873`).
105106

106107

107108
I/O

pandas/core/reshape/merge.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import pandas.compat as compat
1212

1313
from pandas import (Categorical, Series, DataFrame,
14-
Index, MultiIndex, Timedelta)
14+
Index, MultiIndex, Timedelta, CategoricalIndex)
1515
from pandas.core.frame import _merge_doc
1616
from pandas.core.dtypes.common import (
1717
is_datetime64tz_dtype,
@@ -1441,9 +1441,13 @@ def _factorize_keys(lk, rk, sort=True):
14411441
rk = rk.values
14421442

14431443
# if we exactly match in categories, allow us to use codes
1444+
if isinstance(lk, CategoricalIndex):
1445+
ldata = lk._data
1446+
else:
1447+
ldata = lk
14441448
if (is_categorical_dtype(lk) and
14451449
is_categorical_dtype(rk) and
1446-
lk.is_dtype_equal(rk)):
1450+
ldata.is_dtype_equal(rk)):
14471451
return lk.codes, rk.codes, len(lk.categories)
14481452

14491453
if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):

pandas/tests/test_join.py

+21
Original file line numberDiff line numberDiff line change
@@ -192,3 +192,24 @@ def test_inner_join_indexer2():
192192

193193
exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64)
194194
assert_almost_equal(ridx, exp_ridx)
195+
196+
197+
def test_merge_join_categorical_multiindex():
198+
# From issue 16627
199+
import pandas as pd
200+
a = {'Cat1': pd.Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
201+
['a', 'b', 'c']),
202+
'Int1': [0, 1, 0, 1, 0, 0]}
203+
a = pd.DataFrame(a)
204+
205+
b = {'Cat': pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
206+
['a', 'b', 'c']),
207+
'Int': [0, 0, 0, 1, 1, 1],
208+
'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]}
209+
b = pd.DataFrame(b).set_index(['Cat', 'Int'])['Factor']
210+
211+
c = pd.merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
212+
right_on=['Cat', 'Int'], how='left')
213+
d = a.join(b, on=['Cat1', 'Int1'])
214+
c = c.drop(['Cat', 'Int'], axis=1)
215+
assert_almost_equal(c, d)

0 commit comments

Comments
 (0)