Skip to content

Clean up DataFrame.setitem behavior for duplicate columns #39403

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
from pandas.core.arrays.sparse import SparseFrameAccessor
from pandas.core.construction import extract_array, sanitize_masked_array
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.indexers import check_key_length
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import (
DatetimeIndex,
Expand Down Expand Up @@ -3223,9 +3224,8 @@ def _setitem_array(self, key, value):
self._check_setitem_copy()
self.iloc[indexer] = value
else:
if isinstance(value, DataFrame) and self.columns.is_unique:
if len(value.columns) != len(key):
raise ValueError("Columns must be same length as key")
if isinstance(value, DataFrame):
check_key_length(self.columns, key, value)
for k1, k2 in zip(key, value.columns):
self[k1] = value[k2]
else:
Expand Down
33 changes: 33 additions & 0 deletions pandas/core/indexers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""
Low-dependency indexing utilities.
"""
from __future__ import annotations

from typing import TYPE_CHECKING
import warnings

import numpy as np
Expand All @@ -17,6 +20,10 @@
)
from pandas.core.dtypes.generic import ABCIndex, ABCSeries

if TYPE_CHECKING:
from pandas.core.frame import DataFrame
from pandas.core.indexes.base import Index

# -----------------------------------------------------------
# Indexer Identification

Expand Down Expand Up @@ -376,6 +383,32 @@ def unpack_1tuple(tup):
return tup


def check_key_length(columns: Index, key, value: DataFrame):
"""
Checks if a key used as indexer has the same length as the columns it is
associated with.

Parameters
----------
columns : Index The columns of the DataFrame to index.
key : A list-like of keys to index with.
value : DataFrame The value to set for the keys.

Raises
------
ValueError: If the length of key is not equal to the number of columns in value
or if the number of columns referenced by key is not equal to number
of columns.
"""
if columns.is_unique:
if len(value.columns) != len(key):
raise ValueError("Columns must be same length as key")
else:
# Missing keys in columns are represented as -1
if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns):
raise ValueError("Columns must be same length as key")


# -----------------------------------------------------------
# Public indexer validation

Expand Down
14 changes: 13 additions & 1 deletion pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,11 +378,23 @@ def test_setitem_df_wrong_column_number(self, cols):
def test_setitem_listlike_indexer_duplicate_columns(self):
# GH#38604
df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"])
rhs = DataFrame([[10, 11, 12]], columns=["d", "e", "c"])
rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"])
df[["a", "b"]] = rhs
expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"])
tm.assert_frame_equal(df, expected)

df[["c", "b"]] = rhs
expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"])
tm.assert_frame_equal(df, expected)

def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self):
# GH#39403
df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"])
rhs = DataFrame([[10, 11]], columns=["a", "b"])
msg = "Columns must be same length as key"
with pytest.raises(ValueError, match=msg):
df[["a", "b"]] = rhs


class TestDataFrameSetItemWithExpansion:
def test_setitem_listlike_views(self):
Expand Down