diff --git a/RELEASE.rst b/RELEASE.rst index 20167e1918540..e57c6c565e2cf 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -51,12 +51,15 @@ pandas 0.12.0 columns (GH3437_) - ``.loc`` was not raising when passed an integer list (GH3449_) - Unordered time series selection was misbehaving when using label slicing (GH3448_) + - Duplicate indexes with getitem will return items in the correct order (GH3455_, GH3457_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH3251: https://github.com/pydata/pandas/issues/3251 .. _GH3379: https://github.com/pydata/pandas/issues/3379 .. _GH3038: https://github.com/pydata/pandas/issues/3038 .. _GH3437: https://github.com/pydata/pandas/issues/3437 +.. _GH3455: https://github.com/pydata/pandas/issues/3455 +.. _GH3457: https://github.com/pydata/pandas/issues/3457 .. _GH3448: https://github.com/pydata/pandas/issues/3448 .. _GH3449: https://github.com/pydata/pandas/issues/3449 diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 70fe378ad3c07..7562d20363027 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -563,26 +563,34 @@ def _convert_to_indexer(self, obj, axis=0): check = labels.levels[0].get_indexer(objarr) else: level = None - # XXX + + # unique index if labels.is_unique: indexer = check = labels.get_indexer(objarr) + + # non-unique (dups) else: - mask = np.zeros(len(labels), dtype=bool) + indexer = [] + check = np.arange(len(labels)) lvalues = labels.values for x in objarr: # ugh to_or = lib.map_infer(lvalues, x.__eq__) if not to_or.any(): raise KeyError('%s not in index' % str(x)) - mask |= to_or - indexer = check = mask.nonzero()[0] + # add the indicies (as we want to take) + indexer.extend(check[to_or]) + + indexer = Index(indexer) + mask = check == -1 if mask.any(): raise KeyError('%s not in index' % objarr[mask]) - + return indexer + else: return labels.get_loc(obj) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 6106509d530f4..6bba9f6d32efc 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4621,7 +4621,6 @@ def test_to_csv_from_csv(self): xp.columns = map(int,xp.columns) assert_frame_equal(xp,rs) - @slow def test_to_csv_moar(self): from pandas.util.testing import makeCustomDataframe as mkdf @@ -4935,6 +4934,21 @@ def test_to_csv_dups_cols(self): with ensure_clean() as filename: self.assertRaises(Exception, df.to_csv, filename) + # GH3457 + from pandas.util.testing import makeCustomDataframe as mkdf + + N=10 + df= mkdf(N, 3) + df.columns = ['a','a','b'] + + with ensure_clean() as filename: + df.to_csv(filename) + + # read_csv will rename the dups columns + result = read_csv(filename,index_col=0) + result = result.rename(columns={ 'a.1' : 'a' }) + assert_frame_equal(result,df) + def test_to_csv_chunking(self): aa=DataFrame({'A':range(100000)}) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 002282a21162d..86cd0ef524b35 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -761,6 +761,16 @@ def test_setitem_iloc(self): expected = DataFrame(np.array([0,101,102,3,104,105,6,7,8]).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]) assert_frame_equal(df,expected) + def test_dups_fancy_indexing(self): + + # GH 3455 + from pandas.util.testing import makeCustomDataframe as mkdf + df= mkdf(10, 3) + df.columns = ['a','a','b'] + cols = ['b','a'] + result = df[['b','a']].columns + expected = Index(['b','a','a']) + self.assert_(result.equals(expected)) if __name__ == '__main__': import nose