pandas-dev · Yousinator · Jun 25, 2024 · Jun 25, 2024 · Jun 28, 2024 · Jun 29, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -41,6 +41,7 @@ Other enhancements
 - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
 - :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
 - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
+- :meth:`DataFrame.drop_duplicates` now supports a new parameter ``index`` to drop duplicate indices. (:issue:`58648`)
 - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
 - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -46,7 +46,6 @@
     lib,
     properties,
 )
-from pandas._libs.hashtable import duplicated
 from pandas._libs.lib import is_range_indexer
 from pandas.compat import PYPY
 from pandas.compat._constants import REF_COUNT
@@ -175,7 +174,6 @@
     treat_as_nested,
 )
 from pandas.core.methods import selectn
-from pandas.core.reshape.melt import melt
 from pandas.core.series import Series
 from pandas.core.shared_docs import _shared_docs
 from pandas.core.sorting import (
@@ -1386,7 +1384,9 @@ def style(self) -> Styler:
 
         return Styler(self)
 
-    _shared_docs["items"] = r"""
+    _shared_docs[
+        "items"
+    ] = r"""
         Iterate over (column name, Series) pairs.
 
         Iterates over the DataFrame columns, returning a tuple with
@@ -6553,6 +6553,7 @@ def drop_duplicates(
         keep: DropKeep = "first",
         inplace: bool = False,
         ignore_index: bool = False,
+        index: bool = False,
     ) -> DataFrame | None:
         """
         Return DataFrame with duplicate rows removed.
@@ -6577,6 +6578,9 @@ def drop_duplicates(
         ignore_index : bool, default ``False``
             If ``True``, the resulting axis will be labeled 0, 1, …, n - 1.
 
+        index : bool, default ``False``
+            If ``True``, drop duplicates based on the index instead of columns.
+
         Returns
         -------
         DataFrame or None
@@ -6633,14 +6637,33 @@ def drop_duplicates(
         1  Yum Yum   cup     4.0
         2  Indomie   cup     3.5
         4  Indomie  pack     5.0
+
+        To remove duplicates based on index, use ``index=True``.
+
+        >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 1])
+        >>> df.drop_duplicates(index=True)
+            A
+        0   1
+        1   2
+
+        To remove duplicates based on index and keep last occurrences, use ``keep='last'`` with ``index=True``.
+
+        >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 1])
+        >>> df.drop_duplicates(index=True, keep="last")
+            A
+        0   1
+        1   3
         """
         if self.empty:
             return self.copy(deep=False)
 
         inplace = validate_bool_kwarg(inplace, "inplace")
         ignore_index = validate_bool_kwarg(ignore_index, "ignore_index")
 
-        result = self[-self.duplicated(subset, keep=keep)]
+        if index:
+            subset = self.index.names
+
+        result = self[-self.duplicated(subset=subset, keep=keep)]
         if ignore_index:
             result.index = default_index(len(result))
 
@@ -9076,7 +9099,9 @@ def groupby(
             dropna=dropna,
         )
 
-    _shared_docs["pivot"] = """
+    _shared_docs[
+        "pivot"
+    ] = """
         Return reshaped DataFrame organized by given index / column values.
 
         Reshape data (produce a "pivot" table) based on column values. Uses
@@ -9220,7 +9245,9 @@ def pivot(
 
         return pivot(self, index=index, columns=columns, values=values)
 
-    _shared_docs["pivot_table"] = """
+    _shared_docs[
+        "pivot_table"
+    ] = """
         Create a spreadsheet-style pivot table as a DataFrame.
 
         The levels in the pivot table will be stored in MultiIndex objects
@@ -10497,9 +10524,11 @@ def _append(
 
             index = Index(
                 [other.name],
-                name=self.index.names
-                if isinstance(self.index, MultiIndex)
-                else self.index.name,
+                name=(
+                    self.index.names
+                    if isinstance(self.index, MultiIndex)
+                    else self.index.name
+                ),
             )
             row_df = other.to_frame().T
             # infer_objects is needed for

diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py
@@ -441,6 +441,26 @@ def test_drop_duplicates_null_in_object_column(nulls_fixture):
     tm.assert_frame_equal(result, df)
 
 
+def test_drop_duplicates_index():
+    # Example 1: Basic usage with integer index and duplicate rows
+    df = DataFrame({"A": [1, 2, 3]}, index=[0, 1, 1])
+    result = df.drop_duplicates(index=True)
+    expected = DataFrame({"A": [1, 2]}, index=[0, 1])
+    tm.assert_frame_equal(result, expected)
+
+    # Example 2: Using strings as index
+    df2 = DataFrame({"A": [1, 2, 3]}, index=["a", "b", "c"])
+    result2 = df2.drop_duplicates(index=True)
+    expected2 = DataFrame({"A": [1, 2, 3]}, index=["a", "b", "c"])
+    tm.assert_frame_equal(result2, expected2)
+
+    # Example 3: Index is not reset after dropping duplicates
+    df3 = DataFrame({"A": [1, 2, 3]}, index=["a", "b", "a"])
+    result3 = df3.drop_duplicates(index=True)
+    expected3 = DataFrame({"A": [1, 2]}, index=["a", "b"])
+    tm.assert_frame_equal(result3, expected3)
+
+
 def test_drop_duplicates_series_vs_dataframe(keep):
     # GH#14192
     df = DataFrame(