Skip to content

Commit 9c9dd53

Browse files
jdrudolphKrzysztof Chomski
authored and
Krzysztof Chomski
committed
BUG: merging with a boolean/int categorical column (pandas-dev#17841)
* BUG: merging with a boolean/int categorical column pandas-dev#17187
1 parent 8fd5472 commit 9c9dd53

File tree

3 files changed

+26
-1
lines changed

3 files changed

+26
-1
lines changed

Diff for: doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1017,6 +1017,7 @@ Categorical
10171017
- Bug in :func:`Series.isin` when called with a categorical (:issue:`16639`)
10181018
- Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`)
10191019
- Bug in categorical operations with :ref:`Series.cat <categorical.cat>` not preserving the original Series' name (:issue:`17509`)
1020+
- Bug in :func:`DataFrame.merge` failing for categorical columns with boolean/int data types (:issue:`17187`)
10201021

10211022
.. _whatsnew_0210.pypy:
10221023

Diff for: pandas/core/internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5596,7 +5596,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
55965596
# preserve these for validation in _concat_compat
55975597
return self.block.values
55985598

5599-
if self.block.is_bool:
5599+
if self.block.is_bool and not self.block.is_categorical:
56005600
# External code requested filling/upcasting, bool values must
56015601
# be upcasted to object to avoid being upcasted to numeric.
56025602
values = self.block.astype(np.object_).values

Diff for: pandas/tests/reshape/test_merge.py

+24
Original file line numberDiff line numberDiff line change
@@ -1546,6 +1546,30 @@ def test_dtype_on_categorical_dates(self):
15461546
result_inner = pd.merge(df, df2, how='inner', on=['date'])
15471547
assert_frame_equal(result_inner, expected_inner)
15481548

1549+
@pytest.mark.parametrize('category_column,categories,expected_categories',
1550+
[([False, True, True, False], [True, False],
1551+
[True, False]),
1552+
([2, 1, 1, 2], [1, 2], [1, 2]),
1553+
(['False', 'True', 'True', 'False'],
1554+
['True', 'False'], ['True', 'False'])])
1555+
def test_merging_with_bool_or_int_cateorical_column(self, category_column,
1556+
categories,
1557+
expected_categories):
1558+
# GH 17187
1559+
# merging with a boolean/int categorical column
1560+
df1 = pd.DataFrame({'id': [1, 2, 3, 4],
1561+
'cat': category_column})
1562+
df1['cat'] = df1['cat'].astype('category',
1563+
categories=categories, ordered=True)
1564+
df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]})
1565+
result = df1.merge(df2)
1566+
expected = pd.DataFrame({'id': [2, 4], 'cat': expected_categories,
1567+
'num': [1, 9]})
1568+
expected['cat'] = expected['cat'].astype('category',
1569+
categories=categories,
1570+
ordered=True)
1571+
assert_frame_equal(expected, result)
1572+
15491573

15501574
@pytest.fixture
15511575
def left_df():

0 commit comments

Comments
 (0)