Skip to content

Commit 08b6b28

Browse files
committed
ENH: add fill_value to asfreq, see pandas-dev#3715
1 parent 362e78d commit 08b6b28

File tree

5 files changed

+154
-18
lines changed

5 files changed

+154
-18
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,8 @@ Other enhancements
133133
- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
134134
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
135135
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
136-
137136
- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)
138-
137+
- ``DataFrame.asfreq()`` now accepts a ``fill_value`` option to fill missing values during resampling (:issue:`3715`).
139138

140139
.. _whatsnew_0200.api_breaking:
141140

pandas/core/generic.py

+68-5
Original file line numberDiff line numberDiff line change
@@ -4072,12 +4072,17 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
40724072
sort=sort, group_keys=group_keys, squeeze=squeeze,
40734073
**kwargs)
40744074

4075-
def asfreq(self, freq, method=None, how=None, normalize=False):
4075+
def asfreq(self, freq, method=None, how=None, normalize=False,
4076+
fill_value=None):
40764077
"""
40774078
Convert TimeSeries to specified frequency.
40784079
40794080
Optionally provide filling method to pad/backfill missing values.
40804081
4082+
Returns the original data conformed to a new index with the specified
4083+
frequency. ``resample`` is more appropriate if an operation, such as
4084+
summarization, is necessary to represent the data at the new frequency.
4085+
40814086
Parameters
40824087
----------
40834088
freq : DateOffset object, or string
@@ -4092,18 +4097,79 @@ def asfreq(self, freq, method=None, how=None, normalize=False):
40924097
For PeriodIndex only, see PeriodIndex.asfreq
40934098
normalize : bool, default False
40944099
Whether to reset output index to midnight
4100+
fill_value: scalar, optional
4101+
Value to use for missing values, applied during upsampling (note
4102+
this does not fill NaNs that already were present).
4103+
4104+
.. versionadded:: 0.20.0
40954105
40964106
Returns
40974107
-------
40984108
converted : type of caller
40994109
4110+
Examples
4111+
--------
4112+
4113+
Start by creating a series with 4 one minute timestamps.
4114+
4115+
>>> index = pd.date_range('1/1/2000', periods=4, freq='T')
4116+
>>> series = pd.Series([0.0, None, 2.0, 3.0], index=index)
4117+
>>> df = pd.DataFrame({'s':series})
4118+
>>> df
4119+
s
4120+
2000-01-01 00:00:00 0.0
4121+
2000-01-01 00:01:00 NaN
4122+
2000-01-01 00:02:00 2.0
4123+
2000-01-01 00:03:00 3.0
4124+
4125+
Upsample the series into 30 second bins.
4126+
4127+
>>> df.asfreq(freq='30S')
4128+
s
4129+
2000-01-01 00:00:00 0.0
4130+
2000-01-01 00:00:30 NaN
4131+
2000-01-01 00:01:00 NaN
4132+
2000-01-01 00:01:30 NaN
4133+
2000-01-01 00:02:00 2.0
4134+
2000-01-01 00:02:30 NaN
4135+
2000-01-01 00:03:00 3.0
4136+
4137+
Upsample again, providing a ``fill value``.
4138+
4139+
>>> df.asfreq(freq='30S', fill_value=9.0)
4140+
s
4141+
2000-01-01 00:00:00 0.0
4142+
2000-01-01 00:00:30 9.0
4143+
2000-01-01 00:01:00 NaN
4144+
2000-01-01 00:01:30 9.0
4145+
2000-01-01 00:02:00 2.0
4146+
2000-01-01 00:02:30 9.0
4147+
2000-01-01 00:03:00 3.0
4148+
4149+
Upsample again, providing a ``method``.
4150+
4151+
>>> df.asfreq(freq='30S', method='bfill')
4152+
s
4153+
2000-01-01 00:00:00 0.0
4154+
2000-01-01 00:00:30 NaN
4155+
2000-01-01 00:01:00 NaN
4156+
2000-01-01 00:01:30 2.0
4157+
2000-01-01 00:02:00 2.0
4158+
2000-01-01 00:02:30 3.0
4159+
2000-01-01 00:03:00 3.0
4160+
4161+
See Also
4162+
--------
4163+
reindex
4164+
41004165
Notes
41014166
-----
41024167
To learn more about the frequency strings, please see `this link
41034168
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
41044169
"""
41054170
from pandas.tseries.resample import asfreq
4106-
return asfreq(self, freq, method=method, how=how, normalize=normalize)
4171+
return asfreq(self, freq, method=method, how=how, normalize=normalize,
4172+
fill_value=fill_value)
41074173

41084174
def at_time(self, time, asof=False):
41094175
"""
@@ -4184,9 +4250,6 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
41844250
41854251
.. versionadded:: 0.19.0
41864252
4187-
Notes
4188-
-----
4189-
41904253
To learn more about the offset strings, please see `this link
41914254
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
41924255

pandas/tests/frame/test_timeseries.py

+20
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,26 @@ def test_asfreq_datetimeindex(self):
323323
ts = df['A'].asfreq('B')
324324
tm.assertIsInstance(ts.index, DatetimeIndex)
325325

326+
def test_asfreq_fillvalue(self):
327+
# test for fill value during upsampling, related to issue 3715
328+
329+
# setup
330+
rng = pd.date_range('1/1/2016', periods=10, freq='2S')
331+
ts = pd.Series(np.arange(len(rng)), index=rng)
332+
df = pd.DataFrame({'one': ts})
333+
334+
# insert pre-existing missing value
335+
df.loc['2016-01-01 00:00:08', 'one'] = None
336+
337+
actual_df = df.asfreq(freq='1S', fill_value=9.0)
338+
expected_df = df.asfreq(freq='1S').fillna(9.0)
339+
expected_df.loc['2016-01-01 00:00:08', 'one'] = None
340+
assert_frame_equal(expected_df, actual_df)
341+
342+
expected_series = ts.asfreq(freq='1S').fillna(9.0)
343+
actual_series = ts.asfreq(freq='1S', fill_value=9.0)
344+
assert_series_equal(expected_series, actual_series)
345+
326346
def test_first_last_valid(self):
327347
N = len(self.frame.index)
328348
mat = randn(N)

pandas/tseries/resample.py

+28-11
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def transform(self, arg, *args, **kwargs):
353353
def _downsample(self, f):
354354
raise AbstractMethodError(self)
355355

356-
def _upsample(self, f, limit=None):
356+
def _upsample(self, f, limit=None, fill_value=None):
357357
raise AbstractMethodError(self)
358358

359359
def _gotitem(self, key, ndim, subset=None):
@@ -509,12 +509,25 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
509509
limit_direction=limit_direction,
510510
downcast=downcast, **kwargs)
511511

512-
def asfreq(self):
512+
def asfreq(self, fill_value=None):
513513
"""
514514
return the values at the new freq,
515-
essentially a reindex with (no filling)
515+
essentially a reindex
516+
517+
Parameters
518+
----------
519+
fill_value: scalar, optional
520+
Value to use for missing values, applied during upsampling (note
521+
this does not fill NaNs that already were present).
522+
523+
.. versionadded:: 0.20.0
524+
525+
See Also
526+
--------
527+
Series.asfreq
528+
DataFrame.asfreq
516529
"""
517-
return self._upsample('asfreq')
530+
return self._upsample('asfreq', fill_value=fill_value)
518531

519532
def std(self, ddof=1, *args, **kwargs):
520533
"""
@@ -713,12 +726,14 @@ def _adjust_binner_for_upsample(self, binner):
713726
binner = binner[:-1]
714727
return binner
715728

716-
def _upsample(self, method, limit=None):
729+
def _upsample(self, method, limit=None, fill_value=None):
717730
"""
718731
method : string {'backfill', 'bfill', 'pad',
719732
'ffill', 'asfreq'} method for upsampling
720733
limit : int, default None
721734
Maximum size gap to fill when reindexing
735+
fill_value : scalar, default None
736+
Value to use for missing values
722737
723738
See also
724739
--------
@@ -745,7 +760,7 @@ def _upsample(self, method, limit=None):
745760
result.index = res_index
746761
else:
747762
result = obj.reindex(res_index, method=method,
748-
limit=limit)
763+
limit=limit, fill_value=fill_value)
749764

750765
return self._wrap_result(result)
751766

@@ -865,12 +880,14 @@ def _downsample(self, how, **kwargs):
865880
'Frequency {} cannot be resampled to {}, as they are not '
866881
'sub or super periods'.format(ax.freq, self.freq))
867882

868-
def _upsample(self, method, limit=None):
883+
def _upsample(self, method, limit=None, fill_value=None):
869884
"""
870885
method : string {'backfill', 'bfill', 'pad', 'ffill'}
871886
method for upsampling
872887
limit : int, default None
873888
Maximum size gap to fill when reindexing
889+
fill_value : scalar, default None
890+
Value to use for missing values
874891
875892
See also
876893
--------
@@ -884,8 +901,8 @@ def _upsample(self, method, limit=None):
884901
" datetime-like")
885902
# we may need to actually resample as if we are timestamps
886903
if self.kind == 'timestamp':
887-
return super(PeriodIndexResampler, self)._upsample(method,
888-
limit=limit)
904+
return super(PeriodIndexResampler, self)._upsample(
905+
method, limit=limit, fill_value=fill_value)
889906

890907
ax = self.ax
891908
obj = self.obj
@@ -1346,7 +1363,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
13461363
Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))
13471364

13481365

1349-
def asfreq(obj, freq, method=None, how=None, normalize=False):
1366+
def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None):
13501367
"""
13511368
Utility frequency conversion method for Series/DataFrame
13521369
"""
@@ -1366,7 +1383,7 @@ def asfreq(obj, freq, method=None, how=None, normalize=False):
13661383
return obj.copy()
13671384
dti = date_range(obj.index[0], obj.index[-1], freq=freq)
13681385
dti.name = obj.index.name
1369-
rs = obj.reindex(dti, method=method)
1386+
rs = obj.reindex(dti, method=method, fill_value=fill_value)
13701387
if normalize:
13711388
rs.index = rs.index.normalize()
13721389
return rs

pandas/tseries/tests/test_resample.py

+37
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,24 @@ def test_asfreq_upsample(self):
693693
expected = frame.reindex(new_index)
694694
assert_frame_equal(result, expected)
695695

696+
def test_asfreq_fill_value(self):
697+
# test for fill value during resampling, issue 3715
698+
699+
s = self.create_series()
700+
701+
result = s.resample('1H').asfreq()
702+
new_index = self.create_index(s.index[0], s.index[-1], freq='1H')
703+
expected = s.reindex(new_index)
704+
assert_series_equal(result, expected)
705+
706+
frame = s.to_frame('value')
707+
frame.iloc[1] = None
708+
result = frame.resample('1H').asfreq(fill_value=4.0)
709+
new_index = self.create_index(frame.index[0],
710+
frame.index[-1], freq='1H')
711+
expected = frame.reindex(new_index, fill_value=4.0)
712+
assert_frame_equal(result, expected)
713+
696714
def test_resample_interpolate(self):
697715
# # 12925
698716
df = self.create_series().to_frame('value')
@@ -2159,6 +2177,25 @@ def test_asfreq_upsample(self):
21592177
result = frame.resample('1H').asfreq()
21602178
assert_frame_equal(result, expected)
21612179

2180+
def test_asfreq_fill_value(self):
2181+
# test for fill value during resampling, issue 3715
2182+
2183+
s = self.create_series()
2184+
new_index = date_range(s.index[0].to_timestamp(how='start'),
2185+
(s.index[-1]).to_timestamp(how='start'),
2186+
freq='1H')
2187+
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
2188+
result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0)
2189+
assert_series_equal(result, expected)
2190+
2191+
frame = s.to_frame('value')
2192+
new_index = date_range(frame.index[0].to_timestamp(how='start'),
2193+
(frame.index[-1]).to_timestamp(how='start'),
2194+
freq='1H')
2195+
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
2196+
result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0)
2197+
assert_frame_equal(result, expected)
2198+
21622199
def test_selection(self):
21632200
index = self.create_series().index
21642201
# This is a bug, these should be implemented

0 commit comments

Comments
 (0)