Skip to content

BUG: Have object dtype for empty Categorical.categories #17249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
@@ -379,6 +379,9 @@ Numeric
Categorical
^^^^^^^^^^^
- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
- Bug in the categorical constructor with empty values and categories causing
the ``.categories`` to be an empty ``Float64Index`` rather than an empty
``Index`` with object dtype (:issue:`17248`)


Other
5 changes: 4 additions & 1 deletion pandas/core/categorical.py
Original file line number Diff line number Diff line change
@@ -290,7 +290,10 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
# On list with NaNs, int values will be converted to float. Use
# "object" dtype to prevent this. In the end objects will be
# casted to int/... in the category assignment step.
dtype = 'object' if isna(values).any() else None
if len(values) == 0 or isna(values).any():
dtype = 'object'
else:
dtype = None
values = _sanitize_array(values, None, dtype=dtype)

if categories is None:
9 changes: 4 additions & 5 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
@@ -776,7 +776,7 @@ def test_from_arrays_empty(self):
arrays = [[]] * N
names = list('ABC')[:N]
result = MultiIndex.from_arrays(arrays=arrays, names=names)
expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N,
expected = MultiIndex(levels=[[]] * N, labels=[[]] * N,
names=names)
tm.assert_index_equal(result, expected)

@@ -829,7 +829,7 @@ def test_from_product_empty(self):

# 1 level
result = MultiIndex.from_product([[]], names=['A'])
expected = pd.Float64Index([], name='A')
expected = pd.Index([], name='A')
tm.assert_index_equal(result, expected)

# 2 levels
@@ -838,7 +838,7 @@ def test_from_product_empty(self):
names = ['A', 'B']
for first, second in zip(l1, l2):
result = MultiIndex.from_product([first, second], names=names)
expected = MultiIndex(levels=[np.array(first), np.array(second)],
expected = MultiIndex(levels=[first, second],
labels=[[], []], names=names)
tm.assert_index_equal(result, expected)

@@ -847,8 +847,7 @@ def test_from_product_empty(self):
for N in range(4):
lvl2 = lrange(N)
result = MultiIndex.from_product([[], lvl2, []], names=names)
expected = MultiIndex(levels=[np.array(A)
for A in [[], lvl2, []]],
expected = MultiIndex(levels=[[], lvl2, []],
labels=[[], [], []], names=names)
tm.assert_index_equal(result, expected)

2 changes: 1 addition & 1 deletion pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
@@ -680,7 +680,7 @@ def test_concat_categorical_empty(self):
tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)

s1 = pd.Series([], dtype='category')
s2 = pd.Series([])
s2 = pd.Series([], dtype='object')

# different dtype => not-category
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
12 changes: 3 additions & 9 deletions pandas/tests/reshape/test_union_categoricals.py
Original file line number Diff line number Diff line change
@@ -107,17 +107,11 @@ def test_union_categoricals_empty(self):
exp = Categorical([])
tm.assert_categorical_equal(res, exp)

res = union_categoricals([pd.Categorical([]),
pd.Categorical([1.0])])
exp = Categorical([1.0])
res = union_categoricals([Categorical([]),
Categorical(['1'])])
exp = Categorical(['1'])
tm.assert_categorical_equal(res, exp)

# to make dtype equal
nanc = pd.Categorical(np.array([np.nan], dtype=np.float64))
res = union_categoricals([nanc,
pd.Categorical([])])
tm.assert_categorical_equal(res, nanc)

def test_union_categorical_same_category(self):
# check fastpath
c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
10 changes: 10 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
@@ -112,6 +112,16 @@ def test_setitem_listlike(self):
result = c.codes[np.array([100000]).astype(np.int64)]
tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))

def test_constructor_empty(self):
# GH 17248
c = Categorical([])
expected = Index([])
tm.assert_index_equal(c.categories, expected)

c = Categorical([], categories=[1, 2, 3])
expected = pd.Int64Index([1, 2, 3])
tm.assert_index_equal(c.categories, expected)

def test_constructor_unsortable(self):

# it works!