Skip to content

Commit 7e510bc

Browse files
Khor Chean Weipmhatre1
Khor Chean Wei
authored andcommitted
ENH: add numeric_only to Dataframe.cum* methods (pandas-dev#58172)
* add numeric only to df * docs * docs * docs * docs * docs * add test * add test * add test * add test * resolve conversation * resolve conversation * enhance oc * enhance oc * enhance oc * enhance oc
1 parent 0dcec7f commit 7e510bc

File tree

5 files changed

+126
-17
lines changed

5 files changed

+126
-17
lines changed

doc/source/whatsnew/v3.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Other enhancements
3636
- Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`)
3737
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
3838
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
39-
-
39+
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
4040

4141
.. ---------------------------------------------------------------------------
4242
.. _whatsnew_300.notable_bug_fixes:

pandas/core/frame.py

+40-8
Original file line numberDiff line numberDiff line change
@@ -12043,20 +12043,52 @@ def kurt(
1204312043
product = prod
1204412044

1204512045
@doc(make_doc("cummin", ndim=2))
12046-
def cummin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
12047-
return NDFrame.cummin(self, axis, skipna, *args, **kwargs)
12046+
def cummin(
12047+
self,
12048+
axis: Axis = 0,
12049+
skipna: bool = True,
12050+
numeric_only: bool = False,
12051+
*args,
12052+
**kwargs,
12053+
) -> Self:
12054+
data = self._get_numeric_data() if numeric_only else self
12055+
return NDFrame.cummin(data, axis, skipna, *args, **kwargs)
1204812056

1204912057
@doc(make_doc("cummax", ndim=2))
12050-
def cummax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
12051-
return NDFrame.cummax(self, axis, skipna, *args, **kwargs)
12058+
def cummax(
12059+
self,
12060+
axis: Axis = 0,
12061+
skipna: bool = True,
12062+
numeric_only: bool = False,
12063+
*args,
12064+
**kwargs,
12065+
) -> Self:
12066+
data = self._get_numeric_data() if numeric_only else self
12067+
return NDFrame.cummax(data, axis, skipna, *args, **kwargs)
1205212068

1205312069
@doc(make_doc("cumsum", ndim=2))
12054-
def cumsum(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
12055-
return NDFrame.cumsum(self, axis, skipna, *args, **kwargs)
12070+
def cumsum(
12071+
self,
12072+
axis: Axis = 0,
12073+
skipna: bool = True,
12074+
numeric_only: bool = False,
12075+
*args,
12076+
**kwargs,
12077+
) -> Self:
12078+
data = self._get_numeric_data() if numeric_only else self
12079+
return NDFrame.cumsum(data, axis, skipna, *args, **kwargs)
1205612080

1205712081
@doc(make_doc("cumprod", 2))
12058-
def cumprod(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
12059-
return NDFrame.cumprod(self, axis, skipna, *args, **kwargs)
12082+
def cumprod(
12083+
self,
12084+
axis: Axis = 0,
12085+
skipna: bool = True,
12086+
numeric_only: bool = False,
12087+
*args,
12088+
**kwargs,
12089+
) -> Self:
12090+
data = self._get_numeric_data() if numeric_only else self
12091+
return NDFrame.cumprod(data, axis, skipna, *args, **kwargs)
1206012092

1206112093
def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series:
1206212094
"""

pandas/core/generic.py

+59-5
Original file line numberDiff line numberDiff line change
@@ -11925,7 +11925,45 @@ def last_valid_index(self) -> Hashable:
1192511925
DataFrame.any : Return True if one (or more) elements are True.
1192611926
"""
1192711927

11928-
_cnum_doc = """
11928+
_cnum_pd_doc = """
11929+
Return cumulative {desc} over a DataFrame or Series axis.
11930+
11931+
Returns a DataFrame or Series of the same size containing the cumulative
11932+
{desc}.
11933+
11934+
Parameters
11935+
----------
11936+
axis : {{0 or 'index', 1 or 'columns'}}, default 0
11937+
The index or the name of the axis. 0 is equivalent to None or 'index'.
11938+
For `Series` this parameter is unused and defaults to 0.
11939+
skipna : bool, default True
11940+
Exclude NA/null values. If an entire row/column is NA, the result
11941+
will be NA.
11942+
numeric_only : bool, default False
11943+
Include only float, int, boolean columns.
11944+
*args, **kwargs
11945+
Additional keywords have no effect but might be accepted for
11946+
compatibility with NumPy.
11947+
11948+
Returns
11949+
-------
11950+
{name1} or {name2}
11951+
Return cumulative {desc} of {name1} or {name2}.
11952+
11953+
See Also
11954+
--------
11955+
core.window.expanding.Expanding.{accum_func_name} : Similar functionality
11956+
but ignores ``NaN`` values.
11957+
{name2}.{accum_func_name} : Return the {desc} over
11958+
{name2} axis.
11959+
{name2}.cummax : Return cumulative maximum over {name2} axis.
11960+
{name2}.cummin : Return cumulative minimum over {name2} axis.
11961+
{name2}.cumsum : Return cumulative sum over {name2} axis.
11962+
{name2}.cumprod : Return cumulative product over {name2} axis.
11963+
11964+
{examples}"""
11965+
11966+
_cnum_series_doc = """
1192911967
Return cumulative {desc} over a DataFrame or Series axis.
1193011968
1193111969
Returns a DataFrame or Series of the same size containing the cumulative
@@ -12716,28 +12754,44 @@ def make_doc(name: str, ndim: int) -> str:
1271612754
kwargs = {"min_count": ""}
1271712755

1271812756
elif name == "cumsum":
12719-
base_doc = _cnum_doc
12757+
if ndim == 1:
12758+
base_doc = _cnum_series_doc
12759+
else:
12760+
base_doc = _cnum_pd_doc
12761+
1272012762
desc = "sum"
1272112763
see_also = ""
1272212764
examples = _cumsum_examples
1272312765
kwargs = {"accum_func_name": "sum"}
1272412766

1272512767
elif name == "cumprod":
12726-
base_doc = _cnum_doc
12768+
if ndim == 1:
12769+
base_doc = _cnum_series_doc
12770+
else:
12771+
base_doc = _cnum_pd_doc
12772+
1272712773
desc = "product"
1272812774
see_also = ""
1272912775
examples = _cumprod_examples
1273012776
kwargs = {"accum_func_name": "prod"}
1273112777

1273212778
elif name == "cummin":
12733-
base_doc = _cnum_doc
12779+
if ndim == 1:
12780+
base_doc = _cnum_series_doc
12781+
else:
12782+
base_doc = _cnum_pd_doc
12783+
1273412784
desc = "minimum"
1273512785
see_also = ""
1273612786
examples = _cummin_examples
1273712787
kwargs = {"accum_func_name": "min"}
1273812788

1273912789
elif name == "cummax":
12740-
base_doc = _cnum_doc
12790+
if ndim == 1:
12791+
base_doc = _cnum_series_doc
12792+
else:
12793+
base_doc = _cnum_pd_doc
12794+
1274112795
desc = "maximum"
1274212796
see_also = ""
1274312797
examples = _cummax_examples

pandas/tests/frame/test_cumulative.py

+24
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@
77
"""
88

99
import numpy as np
10+
import pytest
1011

1112
from pandas import (
1213
DataFrame,
1314
Series,
15+
Timestamp,
1416
)
1517
import pandas._testing as tm
1618

@@ -81,3 +83,25 @@ def test_cumsum_preserve_dtypes(self):
8183
}
8284
)
8385
tm.assert_frame_equal(result, expected)
86+
87+
@pytest.mark.parametrize("method", ["cumsum", "cumprod", "cummin", "cummax"])
88+
@pytest.mark.parametrize("axis", [0, 1])
89+
def test_numeric_only_flag(self, method, axis):
90+
df = DataFrame(
91+
{
92+
"int": [1, 2, 3],
93+
"bool": [True, False, False],
94+
"string": ["a", "b", "c"],
95+
"float": [1.0, 3.5, 4.0],
96+
"datetime": [
97+
Timestamp(2018, 1, 1),
98+
Timestamp(2019, 1, 1),
99+
Timestamp(2020, 1, 1),
100+
],
101+
}
102+
)
103+
df_numeric_only = df.drop(["string", "datetime"], axis=1)
104+
105+
result = getattr(df, method)(axis=axis, numeric_only=True)
106+
expected = getattr(df_numeric_only, method)(axis)
107+
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/test_api.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -183,10 +183,9 @@ def test_frame_consistency(groupby_func):
183183
elif groupby_func in ("bfill", "ffill"):
184184
exclude_expected = {"inplace", "axis", "limit_area"}
185185
elif groupby_func in ("cummax", "cummin"):
186-
exclude_expected = {"skipna", "args"}
187-
exclude_result = {"numeric_only"}
186+
exclude_expected = {"axis", "skipna", "args"}
188187
elif groupby_func in ("cumprod", "cumsum"):
189-
exclude_expected = {"skipna"}
188+
exclude_expected = {"axis", "skipna", "numeric_only"}
190189
elif groupby_func in ("pct_change",):
191190
exclude_expected = {"kwargs"}
192191
elif groupby_func in ("rank",):

0 commit comments

Comments
 (0)