Skip to content

Commit 247ecef

Browse files
Backport PR #38330: REGR: Groupby first/last/nth treats None as an observation (#38333)
Co-authored-by: Richard Shadrach <[email protected]>
1 parent 039aaba commit 247ecef

File tree

3 files changed

+25
-8
lines changed

3 files changed

+25
-8
lines changed

Diff for: doc/source/whatsnew/v1.1.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Fixed regressions
2727
- Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`).
2828
- Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`)
2929
- Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`)
30+
- Fixed regression in :meth:`.GroupBy.first` and :meth:`.GroupBy.last` where ``None`` was considered a non-NA value (:issue:`38286`)
3031

3132
.. ---------------------------------------------------------------------------
3233

Diff for: pandas/_libs/groupby.pyx

+4-8
Original file line numberDiff line numberDiff line change
@@ -926,9 +926,7 @@ def group_last(rank_t[:, :] out,
926926
for j in range(K):
927927
val = values[i, j]
928928

929-
# None should not be treated like other NA-like
930-
# so that it won't be converted to nan
931-
if not checknull(val) or val is None:
929+
if not checknull(val):
932930
# NB: use _treat_as_na here once
933931
# conditional-nogil is available.
934932
nobs[lab, j] += 1
@@ -937,7 +935,7 @@ def group_last(rank_t[:, :] out,
937935
for i in range(ncounts):
938936
for j in range(K):
939937
if nobs[i, j] == 0:
940-
out[i, j] = NAN
938+
out[i, j] = None
941939
else:
942940
out[i, j] = resx[i, j]
943941
else:
@@ -1021,9 +1019,7 @@ def group_nth(rank_t[:, :] out,
10211019
for j in range(K):
10221020
val = values[i, j]
10231021

1024-
# None should not be treated like other NA-like
1025-
# so that it won't be converted to nan
1026-
if not checknull(val) or val is None:
1022+
if not checknull(val):
10271023
# NB: use _treat_as_na here once
10281024
# conditional-nogil is available.
10291025
nobs[lab, j] += 1
@@ -1033,7 +1029,7 @@ def group_nth(rank_t[:, :] out,
10331029
for i in range(ncounts):
10341030
for j in range(K):
10351031
if nobs[i, j] == 0:
1036-
out[i, j] = NAN
1032+
out[i, j] = None
10371033
else:
10381034
out[i, j] = resx[i, j]
10391035

Diff for: pandas/tests/groupby/test_nth.py

+20
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,26 @@ def test_first_last_with_None(method):
105105
tm.assert_frame_equal(result, df)
106106

107107

108+
@pytest.mark.parametrize("method", ["first", "last"])
109+
@pytest.mark.parametrize(
110+
"df, expected",
111+
[
112+
(
113+
DataFrame({"id": "a", "value": [None, "foo", np.nan]}),
114+
DataFrame({"value": ["foo"]}, index=Index(["a"], name="id")),
115+
),
116+
(
117+
DataFrame({"id": "a", "value": [np.nan]}, dtype=object),
118+
DataFrame({"value": [None]}, index=Index(["a"], name="id")),
119+
),
120+
],
121+
)
122+
def test_first_last_with_None_expanded(method, df, expected):
123+
# GH 32800, 38286
124+
result = getattr(df.groupby("id"), method)()
125+
tm.assert_frame_equal(result, expected)
126+
127+
108128
def test_first_last_nth_dtypes(df_mixed_floats):
109129

110130
df = df_mixed_floats.copy()

0 commit comments

Comments
 (0)