Skip to content

Commit 92c2910

Browse files
stijnvanhoeyTomAugspurger
authored andcommitted
DOC: update the pandas.Index.duplicated docstring (#20117)
1 parent e6c2647 commit 92c2910

File tree

5 files changed

+126
-22
lines changed

5 files changed

+126
-22
lines changed

Diff for: pandas/core/base.py

-18
Original file line numberDiff line numberDiff line change
@@ -1197,24 +1197,6 @@ def drop_duplicates(self, keep='first', inplace=False):
11971197
else:
11981198
return result
11991199

1200-
_shared_docs['duplicated'] = (
1201-
"""Return boolean %(duplicated)s denoting duplicate values
1202-
1203-
Parameters
1204-
----------
1205-
keep : {'first', 'last', False}, default 'first'
1206-
- ``first`` : Mark duplicates as ``True`` except for the first
1207-
occurrence.
1208-
- ``last`` : Mark duplicates as ``True`` except for the last
1209-
occurrence.
1210-
- False : Mark all duplicates as ``True``.
1211-
1212-
Returns
1213-
-------
1214-
duplicated : %(duplicated)s
1215-
""")
1216-
1217-
@Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs)
12181200
def duplicated(self, keep='first'):
12191201
from pandas.core.algorithms import duplicated
12201202
if isinstance(self, ABCIndexClass):

Diff for: pandas/core/indexes/base.py

+53-1
Original file line numberDiff line numberDiff line change
@@ -4325,8 +4325,60 @@ def drop_duplicates(self, keep='first'):
43254325
"""
43264326
return super(Index, self).drop_duplicates(keep=keep)
43274327

4328-
@Appender(base._shared_docs['duplicated'] % _index_doc_kwargs)
43294328
def duplicated(self, keep='first'):
4329+
"""
4330+
Indicate duplicate index values.
4331+
4332+
Duplicated values are indicated as ``True`` values in the resulting
4333+
array. Either all duplicates, all except the first, or all except the
4334+
last occurrence of duplicates can be indicated.
4335+
4336+
Parameters
4337+
----------
4338+
keep : {'first', 'last', False}, default 'first'
4339+
The value or values in a set of duplicates to mark as missing.
4340+
4341+
- 'first' : Mark duplicates as ``True`` except for the first
4342+
occurrence.
4343+
- 'last' : Mark duplicates as ``True`` except for the last
4344+
occurrence.
4345+
- ``False`` : Mark all duplicates as ``True``.
4346+
4347+
Examples
4348+
--------
4349+
By default, for each set of duplicated values, the first occurrence is
4350+
set to False and all others to True:
4351+
4352+
>>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
4353+
>>> idx.duplicated()
4354+
array([False, False, True, False, True])
4355+
4356+
which is equivalent to
4357+
4358+
>>> idx.duplicated(keep='first')
4359+
array([False, False, True, False, True])
4360+
4361+
By using 'last', the last occurrence of each set of duplicated values
4362+
is set on False and all others on True:
4363+
4364+
>>> idx.duplicated(keep='last')
4365+
array([ True, False, True, False, False])
4366+
4367+
By setting keep on ``False``, all duplicates are True:
4368+
4369+
>>> idx.duplicated(keep=False)
4370+
array([ True, False, True, False, True])
4371+
4372+
Returns
4373+
-------
4374+
numpy.ndarray
4375+
4376+
See Also
4377+
--------
4378+
pandas.Series.duplicated : Equivalent method on pandas.Series
4379+
pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame
4380+
pandas.Index.drop_duplicates : Remove duplicate values from Index
4381+
"""
43304382
return super(Index, self).duplicated(keep=keep)
43314383

43324384
_index_shared_docs['fillna'] = """

Diff for: pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ def unique(self, level=None):
399399
return self._shallow_copy(result, categories=result.categories,
400400
ordered=result.ordered)
401401

402-
@Appender(base._shared_docs['duplicated'] % _index_doc_kwargs)
402+
@Appender(Index.duplicated.__doc__)
403403
def duplicated(self, keep='first'):
404404
from pandas._libs.hashtable import duplicated_int64
405405
codes = self.codes.astype('i8')

Diff for: pandas/core/indexes/multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -916,7 +916,7 @@ def f(k, stringify):
916916
for k, stringify in zip(key, self._have_mixed_levels)])
917917
return hash_tuple(key)
918918

919-
@Appender(base._shared_docs['duplicated'] % _index_doc_kwargs)
919+
@Appender(Index.duplicated.__doc__)
920920
def duplicated(self, keep='first'):
921921
from pandas.core.sorting import get_group_index
922922
from pandas._libs.hashtable import duplicated_int64

Diff for: pandas/core/series.py

+71-1
Original file line numberDiff line numberDiff line change
@@ -1458,8 +1458,78 @@ def drop_duplicates(self, keep='first', inplace=False):
14581458
"""
14591459
return super(Series, self).drop_duplicates(keep=keep, inplace=inplace)
14601460

1461-
@Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs)
14621461
def duplicated(self, keep='first'):
1462+
"""
1463+
Indicate duplicate Series values.
1464+
1465+
Duplicated values are indicated as ``True`` values in the resulting
1466+
Series. Either all duplicates, all except the first or all except the
1467+
last occurrence of duplicates can be indicated.
1468+
1469+
Parameters
1470+
----------
1471+
keep : {'first', 'last', False}, default 'first'
1472+
- 'first' : Mark duplicates as ``True`` except for the first
1473+
occurrence.
1474+
- 'last' : Mark duplicates as ``True`` except for the last
1475+
occurrence.
1476+
- ``False`` : Mark all duplicates as ``True``.
1477+
1478+
Examples
1479+
--------
1480+
By default, for each set of duplicated values, the first occurrence is
1481+
set on False and all others on True:
1482+
1483+
>>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
1484+
>>> animals.duplicated()
1485+
0 False
1486+
1 False
1487+
2 True
1488+
3 False
1489+
4 True
1490+
dtype: bool
1491+
1492+
which is equivalent to
1493+
1494+
>>> animals.duplicated(keep='first')
1495+
0 False
1496+
1 False
1497+
2 True
1498+
3 False
1499+
4 True
1500+
dtype: bool
1501+
1502+
By using 'last', the last occurrence of each set of duplicated values
1503+
is set on False and all others on True:
1504+
1505+
>>> animals.duplicated(keep='last')
1506+
0 True
1507+
1 False
1508+
2 True
1509+
3 False
1510+
4 False
1511+
dtype: bool
1512+
1513+
By setting keep on ``False``, all duplicates are True:
1514+
1515+
>>> animals.duplicated(keep=False)
1516+
0 True
1517+
1 False
1518+
2 True
1519+
3 False
1520+
4 True
1521+
dtype: bool
1522+
1523+
Returns
1524+
-------
1525+
pandas.core.series.Series
1526+
1527+
See Also
1528+
--------
1529+
pandas.Index.duplicated : Equivalent method on pandas.Index
1530+
pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame
1531+
pandas.Series.drop_duplicates : Remove duplicate values from Series
1532+
"""
14631533
return super(Series, self).duplicated(keep=keep)
14641534

14651535
def idxmin(self, axis=None, skipna=True, *args, **kwargs):

0 commit comments

Comments
 (0)