diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a1a0857fe6365..d4be2f60a9e7a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -373,6 +373,22 @@ is the case with :attr:`Period.end_time`, for example p.end_time +.. _whatsnew_0240.api_breaking.frame_to_dict_index_orient: + +Raise ValueError in ``DataFrame.to_dict(orient='index')`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with +``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`) + +.. ipython:: python + :okexcept: + + df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) + df + + df.to_dict(orient='index') + .. _whatsnew_0240.api.datetimelike.normalize: Tick DateOffset Normalize Restrictions diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 986fe347898f5..db05c4372583a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1225,6 +1225,10 @@ def to_dict(self, orient='dict', into=dict): for k, v in zip(self.columns, np.atleast_1d(row))) for row in self.values] elif orient.lower().startswith('i'): + if not self.index.is_unique: + raise ValueError( + "DataFrame index must be unique for orient='index'." + ) return into_c((t[0], dict(zip(self.columns, t[1:]))) for t in self.itertuples()) else: diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index a0e23d256c25b..61fe9d12c173c 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -71,6 +71,12 @@ def test_to_dict_timestamp(self): tm.assert_dict_equal(test_data_mixed.to_dict(orient='split'), expected_split_mixed) + def test_to_dict_index_not_unique_with_index_orient(self): + # GH22801 + # Data loss when indexes are not unique. Raise ValueError. + df = DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) + pytest.raises(ValueError, df.to_dict, orient='index') + def test_to_dict_invalid_orient(self): df = DataFrame({'A': [0, 1]}) pytest.raises(ValueError, df.to_dict, orient='xinvalid')