Skip to content

Commit 7b795b2

Browse files
authored
Clean up DataFrame.setitem behavior for duplicate columns (#39403)
1 parent 89ceec6 commit 7b795b2

File tree

3 files changed

+49
-4
lines changed

3 files changed

+49
-4
lines changed

Diff for: pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
from pandas.core.arrays.sparse import SparseFrameAccessor
130130
from pandas.core.construction import extract_array, sanitize_masked_array
131131
from pandas.core.generic import NDFrame, _shared_docs
132+
from pandas.core.indexers import check_key_length
132133
from pandas.core.indexes import base as ibase
133134
from pandas.core.indexes.api import (
134135
DatetimeIndex,
@@ -3229,9 +3230,8 @@ def _setitem_array(self, key, value):
32293230
self._check_setitem_copy()
32303231
self.iloc[indexer] = value
32313232
else:
3232-
if isinstance(value, DataFrame) and self.columns.is_unique:
3233-
if len(value.columns) != len(key):
3234-
raise ValueError("Columns must be same length as key")
3233+
if isinstance(value, DataFrame):
3234+
check_key_length(self.columns, key, value)
32353235
for k1, k2 in zip(key, value.columns):
32363236
self[k1] = value[k2]
32373237
else:

Diff for: pandas/core/indexers.py

+33
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
"""
22
Low-dependency indexing utilities.
33
"""
4+
from __future__ import annotations
5+
6+
from typing import TYPE_CHECKING
47
import warnings
58

69
import numpy as np
@@ -17,6 +20,10 @@
1720
)
1821
from pandas.core.dtypes.generic import ABCIndex, ABCSeries
1922

23+
if TYPE_CHECKING:
24+
from pandas.core.frame import DataFrame
25+
from pandas.core.indexes.base import Index
26+
2027
# -----------------------------------------------------------
2128
# Indexer Identification
2229

@@ -376,6 +383,32 @@ def unpack_1tuple(tup):
376383
return tup
377384

378385

386+
def check_key_length(columns: Index, key, value: DataFrame):
387+
"""
388+
Checks if a key used as indexer has the same length as the columns it is
389+
associated with.
390+
391+
Parameters
392+
----------
393+
columns : Index The columns of the DataFrame to index.
394+
key : A list-like of keys to index with.
395+
value : DataFrame The value to set for the keys.
396+
397+
Raises
398+
------
399+
ValueError: If the length of key is not equal to the number of columns in value
400+
or if the number of columns referenced by key is not equal to number
401+
of columns.
402+
"""
403+
if columns.is_unique:
404+
if len(value.columns) != len(key):
405+
raise ValueError("Columns must be same length as key")
406+
else:
407+
# Missing keys in columns are represented as -1
408+
if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns):
409+
raise ValueError("Columns must be same length as key")
410+
411+
379412
# -----------------------------------------------------------
380413
# Public indexer validation
381414

Diff for: pandas/tests/frame/indexing/test_setitem.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -378,11 +378,23 @@ def test_setitem_df_wrong_column_number(self, cols):
378378
def test_setitem_listlike_indexer_duplicate_columns(self):
379379
# GH#38604
380380
df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"])
381-
rhs = DataFrame([[10, 11, 12]], columns=["d", "e", "c"])
381+
rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"])
382382
df[["a", "b"]] = rhs
383383
expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"])
384384
tm.assert_frame_equal(df, expected)
385385

386+
df[["c", "b"]] = rhs
387+
expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"])
388+
tm.assert_frame_equal(df, expected)
389+
390+
def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self):
391+
# GH#39403
392+
df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"])
393+
rhs = DataFrame([[10, 11]], columns=["a", "b"])
394+
msg = "Columns must be same length as key"
395+
with pytest.raises(ValueError, match=msg):
396+
df[["a", "b"]] = rhs
397+
386398

387399
class TestDataFrameSetItemWithExpansion:
388400
def test_setitem_listlike_views(self):

0 commit comments

Comments
 (0)