Skip to content

Commit e23fa82

Browse files
committed
API: disallow duplicate level names
closes #18872
1 parent 775099c commit e23fa82

File tree

10 files changed

+49
-85
lines changed

10 files changed

+49
-85
lines changed

Diff for: doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ Other API Changes
179179
- A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`)
180180
- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`)
181181
- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`).
182+
- Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`)
182183
- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`)
183184
- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`)
184185
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)

Diff for: pandas/core/indexes/multi.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -579,23 +579,22 @@ def _set_names(self, names, level=None, validate=True):
579579

580580
if level is None:
581581
level = range(self.nlevels)
582+
used = set()
582583
else:
583584
level = [self._get_level_number(l) for l in level]
585+
used = set(self.levels[l].name
586+
for l in set(range(self.nlevels)) - set(level))
584587

585588
# set the name
586589
for l, name in zip(level, names):
590+
if name is not None and name in used:
591+
raise ValueError("Duplicated level name: {}.".format(name))
587592
self.levels[l].rename(name, inplace=True)
593+
used.add(name)
588594

589595
names = property(fset=_set_names, fget=_get_names,
590596
doc="Names of levels in MultiIndex")
591597

592-
def _reference_duplicate_name(self, name):
593-
"""
594-
Returns True if the name refered to in self.names is duplicated.
595-
"""
596-
# count the times name equals an element in self.names.
597-
return sum(name == n for n in self.names) > 1
598-
599598
def _format_native_types(self, na_rep='nan', **kwargs):
600599
new_levels = []
601600
new_labels = []

Diff for: pandas/core/reshape/reshape.py

-11
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,6 @@ def __init__(self, values, index, level=-1, value_columns=None,
9191

9292
self.index = index
9393

94-
if isinstance(self.index, MultiIndex):
95-
if index._reference_duplicate_name(level):
96-
msg = ("Ambiguous reference to {level}. The index "
97-
"names are not unique.".format(level=level))
98-
raise ValueError(msg)
99-
10094
self.level = self.index._get_level_number(level)
10195

10296
# when index includes `nan`, need to lift levels/strides by 1
@@ -502,11 +496,6 @@ def factorize(index):
502496
return categories, codes
503497

504498
N, K = frame.shape
505-
if isinstance(frame.columns, MultiIndex):
506-
if frame.columns._reference_duplicate_name(level):
507-
msg = ("Ambiguous reference to {level}. The column "
508-
"names are not unique.".format(level=level))
509-
raise ValueError(msg)
510499

511500
# Will also convert negative level numbers and check if out of bounds.
512501
level_num = frame.columns._get_level_number(level)

Diff for: pandas/tests/frame/test_alter_axes.py

+19-17
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,20 @@ def test_set_index2(self):
130130
result = df.set_index(df.C)
131131
assert result.index.name == 'C'
132132

133+
@pytest.mark.parametrize('level', ['a', pd.Series(range(3), name='a')])
134+
def test_set_index_duplicate_names(self, level):
135+
# GH18872
136+
df = pd.DataFrame(np.arange(8).reshape(4, 2), columns=['a', 'b'])
137+
138+
# Pass an existing level name:
139+
df.index.name = 'a'
140+
pytest.raises(ValueError, df.set_index, level, append=True)
141+
pytest.raises(ValueError, df.set_index, [level], append=True)
142+
143+
# Pass twice the same level name:
144+
df.index.name = 'c'
145+
pytest.raises(ValueError, df.set_index, [level, level])
146+
133147
def test_set_index_nonuniq(self):
134148
df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
135149
'B': ['one', 'two', 'three', 'one', 'two'],
@@ -591,19 +605,6 @@ def test_reorder_levels(self):
591605
index=e_idx)
592606
assert_frame_equal(result, expected)
593607

594-
result = df.reorder_levels([0, 0, 0])
595-
e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']],
596-
labels=[[0, 0, 0, 0, 0, 0],
597-
[0, 0, 0, 0, 0, 0],
598-
[0, 0, 0, 0, 0, 0]],
599-
names=['L0', 'L0', 'L0'])
600-
expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)},
601-
index=e_idx)
602-
assert_frame_equal(result, expected)
603-
604-
result = df.reorder_levels(['L0', 'L0', 'L0'])
605-
assert_frame_equal(result, expected)
606-
607608
def test_reset_index(self):
608609
stacked = self.frame.stack()[::2]
609610
stacked = DataFrame({'foo': stacked, 'bar': stacked})
@@ -831,7 +832,7 @@ def test_set_index_names(self):
831832

832833
mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B'])
833834
mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values,
834-
names=['A', 'B', 'A', 'B'])
835+
names=['A', 'B', 'C', 'D'])
835836

836837
df = df.set_index(['A', 'B'])
837838

@@ -843,13 +844,14 @@ def test_set_index_names(self):
843844
# Check actual equality
844845
tm.assert_index_equal(df.set_index(df.index).index, mi)
845846

847+
idx2 = df.index.rename(['C', 'D'])
848+
846849
# Check that [MultiIndex, MultiIndex] yields a MultiIndex rather
847850
# than a pair of tuples
848-
assert isinstance(df.set_index(
849-
[df.index, df.index]).index, MultiIndex)
851+
assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex)
850852

851853
# Check equality
852-
tm.assert_index_equal(df.set_index([df.index, df.index]).index, mi2)
854+
tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2)
853855

854856
def test_rename_objects(self):
855857
renamed = self.mixed_frame.rename(columns=str.upper)

Diff for: pandas/tests/frame/test_reshape.py

-10
Original file line numberDiff line numberDiff line change
@@ -560,16 +560,6 @@ def test_unstack_dtypes(self):
560560
assert left.shape == (3, 2)
561561
tm.assert_frame_equal(left, right)
562562

563-
def test_unstack_non_unique_index_names(self):
564-
idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')],
565-
names=['c1', 'c1'])
566-
df = DataFrame([1, 2], index=idx)
567-
with pytest.raises(ValueError):
568-
df.unstack('c1')
569-
570-
with pytest.raises(ValueError):
571-
df.T.stack('c1')
572-
573563
def test_unstack_nan_index(self): # GH7466
574564
cast = lambda val: '{0:1}'.format('' if val != val else val)
575565
nan = np.nan

Diff for: pandas/tests/groupby/test_categorical.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -388,15 +388,19 @@ def test_groupby_multi_categorical_as_index(self):
388388
columns=['cat', 'A', 'B'])
389389
tm.assert_frame_equal(result, expected)
390390

391-
# another not in-axis grouper (conflicting names in index)
392-
s = Series(['a', 'b', 'b'], name='cat')
391+
# another not in-axis grouper
392+
s = Series(['a', 'b', 'b'], name='cat2')
393393
result = df.groupby(['cat', s], as_index=False).sum()
394394
expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
395395
'A': [10.0, nan, nan, 22.0, nan, nan],
396396
'B': [101.0, nan, nan, 205.0, nan, nan]},
397397
columns=['cat', 'A', 'B'])
398398
tm.assert_frame_equal(result, expected)
399399

400+
# GH18872: conflicting names in desired index
401+
pytest.raises(ValueError, lambda: df.groupby(['cat',
402+
s.rename('cat')]).sum())
403+
400404
# is original index dropped?
401405
expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
402406
'A': [10, 11, 10, 11, 10, 11],

Diff for: pandas/tests/indexes/test_multi.py

+15-14
Original file line numberDiff line numberDiff line change
@@ -536,15 +536,6 @@ def test_names(self):
536536
level_names = [level.name for level in index.levels]
537537
assert ind_names == level_names
538538

539-
def test_reference_duplicate_name(self):
540-
idx = MultiIndex.from_tuples(
541-
[('a', 'b'), ('c', 'd')], names=['x', 'x'])
542-
assert idx._reference_duplicate_name('x')
543-
544-
idx = MultiIndex.from_tuples(
545-
[('a', 'b'), ('c', 'd')], names=['x', 'y'])
546-
assert not idx._reference_duplicate_name('x')
547-
548539
def test_astype(self):
549540
expected = self.index.copy()
550541
actual = self.index.astype('O')
@@ -609,6 +600,21 @@ def test_constructor_mismatched_label_levels(self):
609600
with tm.assert_raises_regex(ValueError, label_error):
610601
self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]])
611602

603+
@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2],
604+
[1, 'a', 1]])
605+
def test_duplicate_level_names(self, names):
606+
# GH18872
607+
pytest.raises(ValueError, pd.MultiIndex.from_product,
608+
[[0, 1]] * 3, names=names)
609+
610+
# With .rename()
611+
mi = pd.MultiIndex.from_product([[0, 1]] * 3)
612+
pytest.raises(ValueError, mi.rename, names)
613+
614+
# With .rename(., level=)
615+
mi.rename(names[0], level=1, inplace=True)
616+
pytest.raises(ValueError, mi.rename, names[:2], level=[0, 2])
617+
612618
def assert_multiindex_copied(self, copy, original):
613619
# Levels should be (at least, shallow copied)
614620
tm.assert_copy(copy.levels, original.levels)
@@ -667,11 +673,6 @@ def test_changing_names(self):
667673
shallow_copy.names = [name + "c" for name in shallow_copy.names]
668674
self.check_level_names(self.index, new_names)
669675

670-
def test_duplicate_names(self):
671-
self.index.names = ['foo', 'foo']
672-
tm.assert_raises_regex(KeyError, 'Level foo not found',
673-
self.index._get_level_number, 'foo')
674-
675676
def test_get_level_number_integer(self):
676677
self.index.names = [1, 0]
677678
assert self.index._get_level_number(1) == 0

Diff for: pandas/tests/io/test_pytables.py

-6
Original file line numberDiff line numberDiff line change
@@ -1910,12 +1910,6 @@ def make_index(names=None):
19101910
'a', 'b'], index=make_index(['date', 'a', 't']))
19111911
pytest.raises(ValueError, store.append, 'df', df)
19121912

1913-
# dup within level
1914-
_maybe_remove(store, 'df')
1915-
df = DataFrame(np.zeros((12, 2)), columns=['a', 'b'],
1916-
index=make_index(['date', 'date', 'date']))
1917-
pytest.raises(ValueError, store.append, 'df', df)
1918-
19191913
# fully names
19201914
_maybe_remove(store, 'df')
19211915
df = DataFrame(np.zeros((12, 2)), columns=[

Diff for: pandas/tests/reshape/test_pivot.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -1578,11 +1578,6 @@ def test_crosstab_with_numpy_size(self):
15781578
tm.assert_frame_equal(result, expected)
15791579

15801580
def test_crosstab_dup_index_names(self):
1581-
# GH 13279
1581+
# GH 13279, GH 18872
15821582
s = pd.Series(range(3), name='foo')
1583-
result = pd.crosstab(s, s)
1584-
expected_index = pd.Index(range(3), name='foo')
1585-
expected = pd.DataFrame(np.eye(3, dtype=np.int64),
1586-
index=expected_index,
1587-
columns=expected_index)
1588-
tm.assert_frame_equal(result, expected)
1583+
pytest.raises(ValueError, pd.crosstab, s, s)

Diff for: pandas/tests/series/test_alter_axes.py

-11
Original file line numberDiff line numberDiff line change
@@ -214,17 +214,6 @@ def test_reorder_levels(self):
214214
expected = Series(np.arange(6), index=e_idx)
215215
assert_series_equal(result, expected)
216216

217-
result = s.reorder_levels([0, 0, 0])
218-
e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']],
219-
labels=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
220-
[0, 0, 0, 0, 0, 0]],
221-
names=['L0', 'L0', 'L0'])
222-
expected = Series(np.arange(6), index=e_idx)
223-
assert_series_equal(result, expected)
224-
225-
result = s.reorder_levels(['L0', 'L0', 'L0'])
226-
assert_series_equal(result, expected)
227-
228217
def test_rename_axis_inplace(self):
229218
# GH 15704
230219
series = self.ts.copy()

0 commit comments

Comments
 (0)