From 4f5f7eb31d827cb4bb711371fd6819bf007b27b2 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 4 Nov 2020 00:43:08 +0800 Subject: [PATCH 01/11] BUG: nunique not ignoring both None and np.nan --- pandas/core/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index c91e4db004f2a..4132630578c7e 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1032,10 +1032,11 @@ def nunique(self, dropna: bool = True) -> int: >>> s.nunique() 4 """ - uniqs = self.unique() + if dropna: + uniqs = self.dropna().unique() + else: + uniqs = self.unique() n = len(uniqs) - if dropna and isna(uniqs).any(): - n -= 1 return n @property From 25cd909000010ed80398cb40d9e9730db5e5e7ba Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 4 Nov 2020 00:44:41 +0800 Subject: [PATCH 02/11] Update test_unique.py --- pandas/tests/base/test_unique.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index e5592cef59592..98734c31e113a 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -121,3 +121,11 @@ def test_unique_bad_unicode(idx_or_series_w_bad_unicode): else: expected = np.array(["\ud83d"], dtype=object) tm.assert_numpy_array_equal(result, expected) + +def test_nunique_dropna(): + # test for #37566 + s = pd.Series(['yes','yes', pd.NA, np.nan, None, pd.NaT]) + res = s.nunique(dropna = True) + expected = 1 + assert res == expected + From 7f91c9009cc29a6b497a458b5487d1c0819287ed Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 4 Nov 2020 00:51:37 +0800 Subject: [PATCH 03/11] Update v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 7111d54d65815..1ade6575fed4a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -562,6 +562,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). +- Bug in :meth:`nunique` with ``dropna = True`` returns wrong result when different kinds of NA-like values exist (:issue:`37566`) .. --------------------------------------------------------------------------- From 8c9e2d5db6e428b4b3ba6c01273536c6236bfb35 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 4 Nov 2020 00:57:02 +0800 Subject: [PATCH 04/11] Update test_unique.py --- pandas/tests/base/test_unique.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 98734c31e113a..93ccecde168d4 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -122,10 +122,10 @@ def test_unique_bad_unicode(idx_or_series_w_bad_unicode): expected = np.array(["\ud83d"], dtype=object) tm.assert_numpy_array_equal(result, expected) + def test_nunique_dropna(): # test for #37566 - s = pd.Series(['yes','yes', pd.NA, np.nan, None, pd.NaT]) - res = s.nunique(dropna = True) + s = pd.Series(['yes', 'yes', pd.NA, np.nan, None, pd.NaT]) + res = s.nunique(dropna=True) expected = 1 assert res == expected - From 1de2b48f38858119a8db32f49a9731b7289c229a Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 4 Nov 2020 08:45:34 +0800 Subject: [PATCH 05/11] Update v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 1ade6575fed4a..3f3a80d71f9a0 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -562,7 +562,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). -- Bug in :meth:`nunique` with ``dropna = True`` returns wrong result when different kinds of NA-like values exist (:issue:`37566`) +- Bug in :meth:`Series.nunique` with ``dropna = True`` was returning incorrect results when both ``NA`` and ``None`` missing values were present (:issue:`37566`) .. --------------------------------------------------------------------------- From 89dc4d70bbefa6bd592793a3781b85f6aa9654de Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 4 Nov 2020 08:47:06 +0800 Subject: [PATCH 06/11] Update base.py --- pandas/core/base.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 4132630578c7e..e22b9ab54013f 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1032,12 +1032,8 @@ def nunique(self, dropna: bool = True) -> int: >>> s.nunique() 4 """ - if dropna: - uniqs = self.dropna().unique() - else: - uniqs = self.unique() - n = len(uniqs) - return n + obj = self.dropna() if dropna else self + return len(obj.unique()) @property def is_unique(self) -> bool: From 30e0f10cad2690ebb1bf1f38fc2f13a844e74e42 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 4 Nov 2020 09:05:02 +0800 Subject: [PATCH 07/11] Update test_unique.py --- pandas/tests/base/test_unique.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 93ccecde168d4..9a68b338f8b06 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -123,9 +123,9 @@ def test_unique_bad_unicode(idx_or_series_w_bad_unicode): tm.assert_numpy_array_equal(result, expected) -def test_nunique_dropna(): - # test for #37566 +@pytest.mark.parametrize("dropna", [True, False]) +def test_nunique_dropna(dropna): + # GH37566 s = pd.Series(['yes', 'yes', pd.NA, np.nan, None, pd.NaT]) res = s.nunique(dropna=True) - expected = 1 - assert res == expected + assert res == 1 if dropna else 5 From aff0b016315295d6c137de4589bda96aa16eda36 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 4 Nov 2020 09:13:31 +0800 Subject: [PATCH 08/11] Update test_unique.py --- pandas/tests/base/test_unique.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 9a68b338f8b06..26d615a287fda 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -126,6 +126,6 @@ def test_unique_bad_unicode(idx_or_series_w_bad_unicode): @pytest.mark.parametrize("dropna", [True, False]) def test_nunique_dropna(dropna): # GH37566 - s = pd.Series(['yes', 'yes', pd.NA, np.nan, None, pd.NaT]) + s = pd.Series(["yes", "yes", pd.NA, np.nan, None, pd.NaT]) res = s.nunique(dropna=True) assert res == 1 if dropna else 5 From 52d47e3bb18bd387b0fc5f87fb90750722106a98 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 4 Nov 2020 11:38:04 +0800 Subject: [PATCH 09/11] Update base.py --- pandas/core/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index e22b9ab54013f..8db1d8073fb7d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -33,7 +33,7 @@ is_scalar, ) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries -from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.missing import isna, remove_na_arraylike from pandas.core import algorithms from pandas.core.accessor import DirNamesMixin @@ -1032,7 +1032,7 @@ def nunique(self, dropna: bool = True) -> int: >>> s.nunique() 4 """ - obj = self.dropna() if dropna else self + obj = remove_na_arraylike(self) if dropna else self return len(obj.unique()) @property From aaf14e2f964dfd79e7d4b45e5d2f0f81de0d4569 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 4 Nov 2020 15:51:07 +0800 Subject: [PATCH 10/11] Update test_unique.py --- pandas/tests/base/test_unique.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 26d615a287fda..1a554c85e018b 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -127,5 +127,5 @@ def test_unique_bad_unicode(idx_or_series_w_bad_unicode): def test_nunique_dropna(dropna): # GH37566 s = pd.Series(["yes", "yes", pd.NA, np.nan, None, pd.NaT]) - res = s.nunique(dropna=True) + res = s.nunique(dropna) assert res == 1 if dropna else 5 From d26e523a30bc3f29dfe25459a28579d9ea4a0191 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Tue, 10 Nov 2020 09:19:53 +0800 Subject: [PATCH 11/11] Update v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 3f3a80d71f9a0..1da61d4b83e16 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -562,7 +562,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). -- Bug in :meth:`Series.nunique` with ``dropna = True`` was returning incorrect results when both ``NA`` and ``None`` missing values were present (:issue:`37566`) +- Bug in :meth:`Series.nunique` with ``dropna=True`` was returning incorrect results when both ``NA`` and ``None`` missing values were present (:issue:`37566`) .. ---------------------------------------------------------------------------