Skip to content

Commit 4594457

Browse files
committed
API: rolling.apply will pass Series to function
closes #5071
1 parent fac2ef1 commit 4594457

File tree

4 files changed

+121
-37
lines changed

4 files changed

+121
-37
lines changed

doc/source/whatsnew/v0.23.0.txt

+27
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,33 @@ If you wish to retain the old behavior while using Python >= 3.6, you can use
479479
'Taxes': -200,
480480
'Net result': 300}).sort_index()
481481

482+
.. _whatsnew_0230.api_breaking.window_raw:
483+
484+
Rolling/Expanding.apply() will by default send a ``Series`` to the function
485+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
486+
487+
The :func`Series.rolling`, :func:`DataFrame.rolling`, :func`Series.expanding`, :func:`DataFrame.expanding` methods when used with ``.apply()`` have gained a ``raw=False`` parameter.
488+
This is similar to :func:`DataFame.apply`. This parameter, ``False`` by default allows one to send a ``np.ndarray`` to the applied function, rather than the default of a ``Series``.
489+
This is a change from prior versions, when the applied function would *always* received an ndarray. This allow one to use pandas operations generically. (:issue:`5071`)
490+
491+
.. ipython:: python
492+
493+
s = pd.Series(np.arange(5), np.arange(5) + 1)
494+
s
495+
496+
Pass a ``Series``:
497+
498+
.. ipython:: python
499+
500+
s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1])
501+
502+
Mimic the original behavior of passing a ndarray:
503+
504+
.. ipython:: python
505+
506+
s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True)
507+
508+
482509
.. _whatsnew_0230.api_breaking.deprecate_panel:
483510

484511
Deprecate Panel

pandas/_libs/window.pyx

+32-14
Original file line numberDiff line numberDiff line change
@@ -1432,39 +1432,44 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
14321432
return output
14331433

14341434

1435-
def roll_generic(ndarray[float64_t, cast=True] input,
1435+
def roll_generic(object obj,
14361436
int64_t win, int64_t minp, object index, object closed,
1437-
int offset, object func,
1437+
int offset, object func, bint raw,
14381438
object args, object kwargs):
14391439
cdef:
14401440
ndarray[double_t] output, counts, bufarr
1441+
ndarray[float64_t, cast=True] arr
14411442
float64_t *buf
14421443
float64_t *oldbuf
14431444
int64_t nobs = 0, i, j, s, e, N
14441445
bint is_variable
14451446
ndarray[int64_t] start, end
14461447

1447-
if not input.flags.c_contiguous:
1448-
input = input.copy('C')
1449-
1450-
n = len(input)
1448+
n = len(obj)
14511449
if n == 0:
1452-
return input
1450+
return obj
1451+
1452+
arr = np.asarray(obj)
1453+
1454+
# ndarray input
1455+
if not raw:
1456+
if not arr.flags.c_contiguous:
1457+
arr = arr.copy('C')
14531458

1454-
counts = roll_sum(np.concatenate([np.isfinite(input).astype(float),
1459+
counts = roll_sum(np.concatenate([np.isfinite(arr).astype(float),
14551460
np.array([0.] * offset)]),
14561461
win, minp, index, closed)[offset:]
14571462

1458-
start, end, N, win, minp, is_variable = get_window_indexer(input, win,
1463+
start, end, N, win, minp, is_variable = get_window_indexer(arr, win,
14591464
minp, index,
14601465
closed,
14611466
floor=0)
14621467

14631468
output = np.empty(N, dtype=float)
14641469

14651470
if is_variable:
1471+
# variable window arr or series
14661472

1467-
# variable window
14681473
if offset != 0:
14691474
raise ValueError("unable to roll_generic with a non-zero offset")
14701475

@@ -1473,7 +1478,20 @@ def roll_generic(ndarray[float64_t, cast=True] input,
14731478
e = end[i]
14741479

14751480
if counts[i] >= minp:
1476-
output[i] = func(input[s:e], *args, **kwargs)
1481+
if raw:
1482+
output[i] = func(arr[s:e], *args, **kwargs)
1483+
else:
1484+
output[i] = func(obj.iloc[s:e], *args, **kwargs)
1485+
else:
1486+
output[i] = NaN
1487+
1488+
elif not raw:
1489+
# series
1490+
for i from 0 <= i < N:
1491+
if counts[i] >= minp:
1492+
sl = slice(int_max(i + offset - win + 1, 0),
1493+
int_min(i + offset + 1, N))
1494+
output[i] = func(obj.iloc[sl], *args, **kwargs)
14771495
else:
14781496
output[i] = NaN
14791497

@@ -1482,12 +1500,12 @@ def roll_generic(ndarray[float64_t, cast=True] input,
14821500
# truncated windows at the beginning, through first full-length window
14831501
for i from 0 <= i < (int_min(win, N) - offset):
14841502
if counts[i] >= minp:
1485-
output[i] = func(input[0: (i + offset + 1)], *args, **kwargs)
1503+
output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs)
14861504
else:
14871505
output[i] = NaN
14881506

14891507
# remaining full-length windows
1490-
buf = <float64_t *> input.data
1508+
buf = <float64_t *> arr.data
14911509
bufarr = np.empty(win, dtype=float)
14921510
oldbuf = <float64_t *> bufarr.data
14931511
for i from (win - offset) <= i < (N - offset):
@@ -1502,7 +1520,7 @@ def roll_generic(ndarray[float64_t, cast=True] input,
15021520
# truncated windows at the end
15031521
for i from int_max(N - offset, 0) <= i < N:
15041522
if counts[i] >= minp:
1505-
output[i] = func(input[int_max(i + offset - win + 1, 0): N],
1523+
output[i] = func(arr[int_max(i + offset - win + 1, 0): N],
15061524
*args,
15071525
**kwargs)
15081526
else:

pandas/core/window.py

+24-8
Original file line numberDiff line numberDiff line change
@@ -955,9 +955,20 @@ def count(self):
955955
----------
956956
func : function
957957
Must produce a single value from an ndarray input
958-
\*args and \*\*kwargs are passed to the function""")
958+
raw : bool, default False
959+
* ``False`` : passes each row or column as a Series to the
960+
function.
961+
* ``True`` : the passed function will receive ndarray objects
962+
instead.
963+
If you are just applying a NumPy reduction function this will
964+
achieve much better performance.
965+
.. versionadded:: 0.23.0
966+
967+
\*args and \*\*kwargs are passed to the function""")
968+
969+
def apply(self, func, raw=False, args=(), kwargs={}):
970+
from pandas import Series
959971

960-
def apply(self, func, args=(), kwargs={}):
961972
# TODO: _level is unused?
962973
_level = kwargs.pop('_level', None) # noqa
963974
window = self._get_window()
@@ -966,8 +977,11 @@ def apply(self, func, args=(), kwargs={}):
966977

967978
def f(arg, window, min_periods, closed):
968979
minp = _use_window(min_periods, window)
969-
return _window.roll_generic(arg, window, minp, indexi, closed,
970-
offset, func, args, kwargs)
980+
if not raw:
981+
arg = Series(arg, index=self.obj.index)
982+
return _window.roll_generic(
983+
arg, window, minp, indexi,
984+
closed, offset, func, raw, args, kwargs)
971985

972986
return self._apply(f, func, args=args, kwargs=kwargs,
973987
center=False)
@@ -1498,8 +1512,9 @@ def count(self):
14981512
@Substitution(name='rolling')
14991513
@Appender(_doc_template)
15001514
@Appender(_shared_docs['apply'])
1501-
def apply(self, func, args=(), kwargs={}):
1502-
return super(Rolling, self).apply(func, args=args, kwargs=kwargs)
1515+
def apply(self, func, raw=False, args=(), kwargs={}):
1516+
return super(Rolling, self).apply(
1517+
func, raw=raw, args=args, kwargs=kwargs)
15031518

15041519
@Substitution(name='rolling')
15051520
@Appender(_shared_docs['sum'])
@@ -1756,8 +1771,9 @@ def count(self, **kwargs):
17561771
@Substitution(name='expanding')
17571772
@Appender(_doc_template)
17581773
@Appender(_shared_docs['apply'])
1759-
def apply(self, func, args=(), kwargs={}):
1760-
return super(Expanding, self).apply(func, args=args, kwargs=kwargs)
1774+
def apply(self, func, raw=False, args=(), kwargs={}):
1775+
return super(Expanding, self).apply(
1776+
func, raw=raw, args=args, kwargs=kwargs)
17611777

17621778
@Substitution(name='expanding')
17631779
@Appender(_shared_docs['sum'])

pandas/tests/test_window.py

+38-15
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ def assert_equal(left, right):
2929
tm.assert_frame_equal(left, right)
3030

3131

32+
@pytest.fixture(params=[True, False])
33+
def raw(request):
34+
return request.param
35+
36+
3237
class Base(object):
3338

3439
_nan_locs = np.arange(20, 40)
@@ -1150,17 +1155,17 @@ def test_rolling_quantile_param(self):
11501155
with pytest.raises(TypeError):
11511156
ser.rolling(3).quantile('foo')
11521157

1153-
def test_rolling_apply(self):
1158+
def test_rolling_apply(self, raw):
11541159
# suppress warnings about empty slices, as we are deliberately testing
11551160
# with a 0-length Series
11561161
with warnings.catch_warnings():
11571162
warnings.filterwarnings("ignore",
11581163
message=".*(empty slice|0 for slice).*",
11591164
category=RuntimeWarning)
11601165

1161-
ser = Series([])
1162-
tm.assert_series_equal(ser,
1163-
ser.rolling(10).apply(lambda x: x.mean()))
1166+
expected = Series([])
1167+
result = expected.rolling(10).apply(lambda x: x.mean(), raw=raw)
1168+
tm.assert_series_equal(result, expected)
11641169

11651170
def f(x):
11661171
return x[np.isfinite(x)].mean()
@@ -1169,24 +1174,37 @@ def f(x):
11691174

11701175
# GH 8080
11711176
s = Series([None, None, None])
1172-
result = s.rolling(2, min_periods=0).apply(lambda x: len(x))
1177+
result = s.rolling(2, min_periods=0).apply(lambda x: len(x), raw=raw)
11731178
expected = Series([1., 2., 2.])
11741179
tm.assert_series_equal(result, expected)
11751180

1176-
result = s.rolling(2, min_periods=0).apply(len)
1181+
result = s.rolling(2, min_periods=0).apply(len, raw=raw)
11771182
tm.assert_series_equal(result, expected)
11781183

1179-
def test_rolling_apply_out_of_bounds(self):
1184+
def test_rolling_apply_out_of_bounds(self, raw):
11801185
# #1850
11811186
vals = pd.Series([1, 2, 3, 4])
11821187

1183-
result = vals.rolling(10).apply(np.sum)
1188+
result = vals.rolling(10).apply(np.sum, raw=raw)
11841189
assert result.isna().all()
11851190

1186-
result = vals.rolling(10, min_periods=1).apply(np.sum)
1191+
result = vals.rolling(10, min_periods=1).apply(np.sum, raw=raw)
11871192
expected = pd.Series([1, 3, 6, 10], dtype=float)
11881193
tm.assert_almost_equal(result, expected)
11891194

1195+
def test_rolling_apply_with_pandas_objects(self):
1196+
# 5071
1197+
df = pd.DataFrame({'A': np.random.randn(5),
1198+
'B': np.random.randint(0, 10, size=5)})
1199+
1200+
def f(x):
1201+
return x.iloc[-1]
1202+
1203+
df.rolling(2).apply(f)
1204+
result = df.rolling(2).apply(f)
1205+
expected = df[1:].reindex_like(df)
1206+
tm.assert_frame_equal(result, expected)
1207+
11901208
def test_rolling_std(self):
11911209
self._check_moment_func(lambda x: np.std(x, ddof=1),
11921210
name='std')
@@ -2805,20 +2823,25 @@ def expanding_func(x, min_periods=1, center=False, axis=0):
28052823
return getattr(exp, func)()
28062824
self._check_expanding(expanding_func, static_comp, preserve_nan=False)
28072825

2808-
def test_expanding_apply(self):
2826+
def test_expanding_apply(self, raw):
28092827

28102828
def expanding_mean(x, min_periods=1):
2829+
28112830
exp = x.expanding(min_periods=min_periods)
2812-
return exp.apply(lambda x: x.mean())
2831+
result = exp.apply(lambda x: x.mean(), raw=raw)
2832+
return result
28132833

2814-
self._check_expanding(expanding_mean, np.mean)
2834+
# TODO(jreback), needed to add preserve_nan=False
2835+
# here to make this pass
2836+
self._check_expanding(expanding_mean, np.mean, preserve_nan=False)
28152837

28162838
ser = Series([])
2817-
tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean()))
2839+
tm.assert_series_equal(ser, ser.expanding().apply(
2840+
lambda x: x.mean(), raw=raw))
28182841

28192842
# GH 8080
28202843
s = Series([None, None, None])
2821-
result = s.expanding(min_periods=0).apply(lambda x: len(x))
2844+
result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw)
28222845
expected = Series([1., 2., 3.])
28232846
tm.assert_series_equal(result, expected)
28242847

@@ -3057,7 +3080,7 @@ def func(x):
30573080
expected = g.apply(func)
30583081
tm.assert_series_equal(result, expected)
30593082

3060-
def test_rolling_apply(self):
3083+
def test_rolling_apply(self, raw):
30613084
g = self.frame.groupby('A')
30623085
r = g.rolling(window=4)
30633086

0 commit comments

Comments
 (0)