From 0d49d8aa2a8b34757669ad78d006685a2606e1cb Mon Sep 17 00:00:00 2001 From: Jan Rudolph Date: Mon, 20 Jul 2015 14:57:17 +0200 Subject: [PATCH 1/3] allow duplicate column names if they are not merged upon --- pandas/tools/merge.py | 11 +++-------- pandas/tools/tests/test_merge.py | 10 +++++++++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index c7c578232cd0f..430828a3db31b 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -402,19 +402,14 @@ def _validate_specification(self): if self.left_on is None: raise MergeError('Must pass left_on or left_index=True') else: - if not self.left.columns.is_unique: - raise MergeError("Left data columns not unique: %s" - % repr(self.left.columns)) - - if not self.right.columns.is_unique: - raise MergeError("Right data columns not unique: %s" - % repr(self.right.columns)) - # use the common columns common_cols = self.left.columns.intersection( self.right.columns) if len(common_cols) == 0: raise MergeError('No common columns to perform merge on') + if not common_cols.is_unique: + raise MergeError("Data columns not unique: %s" + % repr(common_cols)) self.left_on = self.right_on = common_cols elif self.on is not None: if self.left_on is not None or self.right_on is not None: diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index d357182a60b1f..cd3581273b74d 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -843,7 +843,6 @@ def test_join_append_timedeltas(self): assert_frame_equal(result, expected) def test_overlapping_columns_error_message(self): - # #2649 df = DataFrame({'key': [1, 2, 3], 'v1': [4, 5, 6], 'v2': [7, 8, 9]}) @@ -853,7 +852,16 @@ def test_overlapping_columns_error_message(self): df.columns = ['key', 'foo', 'foo'] df2.columns = ['key', 'bar', 'bar'] + expected = DataFrame({'key': [1, 2, 3], + 'v1': [4, 5, 6], + 'v2': [7, 8, 9], + 'v3': [4, 5, 6], + 'v4': [7, 8, 9]}) + expected.columns = ['key', 'foo', 'foo', 'bar', 'bar'] + assert_frame_equal(merge(df, df2), expected) + # #2649 + df2.columns = ['key1', 'foo', 'foo'] self.assertRaises(ValueError, merge, df, df2) def _check_merge(x, y): From 05ba2df40d07e800badbe1438f3e48c9e822dafd Mon Sep 17 00:00:00 2001 From: Jan Rudolph Date: Thu, 23 Jul 2015 13:49:35 +0200 Subject: [PATCH 2/3] release note --- doc/source/whatsnew/v0.17.0.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index b2a1e10469a0f..a6ac16aec46ff 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -80,6 +80,8 @@ Other enhancements - ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`). +- ``merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`). + .. _whatsnew_0170.api: .. _whatsnew_0170.api_breaking: From 4e850d46580bf6cb35056d5b67cb4ef21e774117 Mon Sep 17 00:00:00 2001 From: Jan Rudolph Date: Mon, 27 Jul 2015 14:47:05 +0200 Subject: [PATCH 3/3] clarified whatsnew --- doc/source/whatsnew/v0.17.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index a6ac16aec46ff..66edd6216e83e 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -80,7 +80,7 @@ Other enhancements - ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`). -- ``merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`). +- ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`). .. _whatsnew_0170.api: