diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 7111d54d65815..1da61d4b83e16 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -562,6 +562,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). +- Bug in :meth:`Series.nunique` with ``dropna=True`` was returning incorrect results when both ``NA`` and ``None`` missing values were present (:issue:`37566`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/base.py b/pandas/core/base.py index c91e4db004f2a..8db1d8073fb7d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -33,7 +33,7 @@ is_scalar, ) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries -from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.missing import isna, remove_na_arraylike from pandas.core import algorithms from pandas.core.accessor import DirNamesMixin @@ -1032,11 +1032,8 @@ def nunique(self, dropna: bool = True) -> int: >>> s.nunique() 4 """ - uniqs = self.unique() - n = len(uniqs) - if dropna and isna(uniqs).any(): - n -= 1 - return n + obj = remove_na_arraylike(self) if dropna else self + return len(obj.unique()) @property def is_unique(self) -> bool: diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index e5592cef59592..1a554c85e018b 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -121,3 +121,11 @@ def test_unique_bad_unicode(idx_or_series_w_bad_unicode): else: expected = np.array(["\ud83d"], dtype=object) tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_nunique_dropna(dropna): + # GH37566 + s = pd.Series(["yes", "yes", pd.NA, np.nan, None, pd.NaT]) + res = s.nunique(dropna) + assert res == 1 if dropna else 5