Skip to content

Commit 112b1ad

Browse files
committed
Fix issue-61209: Updated unique() behavior
1 parent b69a2ae commit 112b1ad

File tree

3 files changed

+73
-60
lines changed

3 files changed

+73
-60
lines changed

Diff for: pandas/core/base.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
)
1717

1818
import numpy as np
19+
from typing import Any
20+
from pandas._typing import ArrayLike
1921

2022
from pandas._libs import lib
2123
from pandas._typing import (
@@ -1096,13 +1098,28 @@ def value_counts(
10961098
dropna=dropna,
10971099
)
10981100

1099-
def unique(self):
1101+
def unique(self, dropna: bool = True) -> ArrayLike:
1102+
"""
1103+
Return unique values in the object.
1104+
1105+
Parameters
1106+
----------
1107+
dropna : bool, default True
1108+
If True, exclude NA/null values.
1109+
1110+
Returns
1111+
-------
1112+
ndarray or ExtensionArray
1113+
"""
11001114
values = self._values
11011115
if not isinstance(values, np.ndarray):
1102-
# i.e. ExtensionArray
1116+
# For ExtensionArray
11031117
result = values.unique()
11041118
else:
11051119
result = algorithms.unique1d(values)
1120+
1121+
if dropna:
1122+
result = result[~isna(result)]
11061123
return result
11071124

11081125
@final

Diff for: pandas/core/series.py

+16-58
Original file line numberDiff line numberDiff line change
@@ -2084,72 +2084,30 @@ def mode(self, dropna: bool = True) -> Series:
20842084
dtype=self.dtype,
20852085
).__finalize__(self, method="mode")
20862086

2087-
def unique(self) -> ArrayLike:
2087+
def unique(self, dropna: bool = True) -> ArrayLike:
20882088
"""
20892089
Return unique values of Series object.
2090-
2091-
Uniques are returned in order of appearance. Hash table-based unique,
2092-
therefore does NOT sort.
2093-
2090+
2091+
Parameters
2092+
----------
2093+
dropna : bool, default True
2094+
If True, exclude NA/null values.
2095+
20942096
Returns
20952097
-------
20962098
ndarray or ExtensionArray
2097-
The unique values returned as a NumPy array. See Notes.
2098-
2099-
See Also
2100-
--------
2101-
Series.drop_duplicates : Return Series with duplicate values removed.
2102-
unique : Top-level unique method for any 1-d array-like object.
2103-
Index.unique : Return Index with unique values from an Index object.
2104-
2105-
Notes
2106-
-----
2107-
Returns the unique values as a NumPy array. In case of an
2108-
extension-array backed Series, a new
2109-
:class:`~api.extensions.ExtensionArray` of that type with just
2110-
the unique values is returned. This includes
2111-
2112-
* Categorical
2113-
* Period
2114-
* Datetime with Timezone
2115-
* Datetime without Timezone
2116-
* Timedelta
2117-
* Interval
2118-
* Sparse
2119-
* IntegerNA
2120-
2121-
See Examples section.
2122-
2099+
The unique values returned as a NumPy array or ExtensionArray.
2100+
21232101
Examples
21242102
--------
2125-
>>> pd.Series([2, 1, 3, 3], name="A").unique()
2126-
array([2, 1, 3])
2127-
2128-
>>> pd.Series([pd.Timestamp("2016-01-01") for _ in range(3)]).unique()
2129-
<DatetimeArray>
2130-
['2016-01-01 00:00:00']
2131-
Length: 1, dtype: datetime64[s]
2132-
2133-
>>> pd.Series(
2134-
... [pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)]
2135-
... ).unique()
2136-
<DatetimeArray>
2137-
['2016-01-01 00:00:00-05:00']
2138-
Length: 1, dtype: datetime64[s, US/Eastern]
2139-
2140-
An Categorical will return categories in the order of
2141-
appearance and with the same dtype.
2142-
2143-
>>> pd.Series(pd.Categorical(list("baabc"))).unique()
2144-
['b', 'a', 'c']
2145-
Categories (3, object): ['a', 'b', 'c']
2146-
>>> pd.Series(
2147-
... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
2148-
... ).unique()
2149-
['b', 'a', 'c']
2150-
Categories (3, object): ['a' < 'b' < 'c']
2103+
>>> s = pd.Series([1, 2, 2, pd.NA])
2104+
>>> s.unique()
2105+
array([1, 2])
2106+
2107+
>>> s.unique(dropna=False)
2108+
array([1, 2, <NA>], dtype=object)
21512109
"""
2152-
return super().unique()
2110+
return super().unique(dropna=dropna)
21532111

21542112
@overload
21552113
def drop_duplicates(

Diff for: pandas/tests/series/test_arithmetic.py

+38
Original file line numberDiff line numberDiff line change
@@ -958,3 +958,41 @@ def test_rmod_consistent_large_series():
958958
expected = Series([1] * 10001)
959959

960960
tm.assert_series_equal(result, expected)
961+
962+
from pandas._testing import assert_numpy_array_equal, assert_extension_array_equal
963+
964+
# Test Case 1: Basic numeric unique with NA (dropna=False)
965+
def test_unique_numeric_dropna_false():
966+
s = pd.Series([1, 2, 2, pd.NA, 3, pd.NA])
967+
result = s.unique(dropna=False)
968+
expected = np.array([1, 2, pd.NA, 3], dtype=object)
969+
assert_numpy_array_equal(result, expected)
970+
971+
# Test Case 2: Empty Series
972+
def test_unique_empty_series():
973+
s = pd.Series([], dtype='float64')
974+
result = s.unique()
975+
expected = np.array([], dtype='float64')
976+
assert_numpy_array_equal(result, expected)
977+
978+
# Test Case 3: Categorical data
979+
def test_unique_categorical():
980+
s = pd.Series(pd.Categorical(['a', 'b', 'a', pd.NA]))
981+
result = s.unique(dropna=False)
982+
expected = pd.Categorical(['a', 'b', pd.NA])
983+
assert_extension_array_equal(result, expected)
984+
985+
986+
# Test Case 4: NA values
987+
def test_unique_with_nas_simple():
988+
s = pd.Series([1, 2, 2, pd.NA, 3, pd.NA], dtype='Int64')
989+
990+
# Current behavior (returns ExtensionArray)
991+
result = s.unique()
992+
expected = pd.array([1, 2, 3], dtype='Int64')
993+
tm.assert_extension_array_equal(result, expected)
994+
995+
# With dropna=False
996+
result_with_na = s.unique(dropna=False)
997+
expected_with_na = pd.array([1, 2, pd.NA, 3], dtype='Int64')
998+
tm.assert_extension_array_equal(result_with_na, expected_with_na)

0 commit comments

Comments
 (0)