Skip to content

BUG: pd.Series.interpolate non-numeric index column (21662) #25394

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ Numeric
- Bug in :meth:`to_numeric` in which numbers were being coerced to float, even though ``errors`` was not ``coerce`` (:issue:`24910`)
- Bug in error messages in :meth:`DataFrame.corr` and :meth:`Series.corr`. Added the possibility of using a callable. (:issue:`25729`)
- Bug in :meth:`Series.divmod` and :meth:`Series.rdivmod` which would raise an (incorrect) ``ValueError`` rather than return a pair of :class:`Series` objects as result (:issue:`25557`)
-
- Raises a helpful exception when a non-numeric index is sent to :meth:`interpolate` with methods which require numeric index. (:issue:`21662`)
-
-

Expand Down
20 changes: 16 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
from pandas.core.dtypes.common import (
ensure_int64, ensure_object, is_bool, is_bool_dtype,
is_datetime64_any_dtype, is_datetime64tz_dtype, is_dict_like,
is_extension_array_dtype, is_integer, is_list_like, is_number,
is_numeric_dtype, is_object_dtype, is_period_arraylike, is_re_compilable,
is_scalar, is_timedelta64_dtype, pandas_dtype)
is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
is_dict_like, is_extension_array_dtype, is_integer, is_list_like,
is_number, is_numeric_dtype, is_object_dtype, is_period_arraylike,
is_re_compilable, is_scalar, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries
from pandas.core.dtypes.inference import is_hashable
from pandas.core.dtypes.missing import isna, notna
Expand Down Expand Up @@ -6863,6 +6863,18 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
index = np.arange(len(_maybe_transposed_self._get_axis(alt_ax)))
else:
index = _maybe_transposed_self._get_axis(alt_ax)
methods = {"index", "values", "nearest", "time"}
is_numeric_or_datetime = (
is_numeric_dtype(index) or
is_datetime64_dtype(index) or
is_timedelta64_dtype(index)
)
if method not in methods and not is_numeric_or_datetime:
raise ValueError(
"Index column must be numeric or datetime type when "
"using {method} method other than linear. "
"Try setting a numeric or datetime index column before "
"interpolating.".format(method=method))

if isna(index).any():
raise NotImplementedError("Interpolation with NaNs in the index "
Expand Down
68 changes: 68 additions & 0 deletions pandas/tests/series/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,22 @@ def nontemporal_method(request):
return method, kwargs


@pytest.fixture(params=['linear', 'slinear', 'zero', 'quadratic', 'cubic',
'barycentric', 'krogh', 'polynomial', 'spline',
'piecewise_polynomial', 'from_derivatives', 'pchip',
'akima', ])
def interp_methods_ind(request):
""" Fixture that returns a (method name, required kwargs) pair to
be tested for various Index types.

This fixture does not include methods - 'time', 'index', 'nearest',
'values' as a parameterization
"""
method = request.param
kwargs = dict(order=1) if method in ('spline', 'polynomial') else dict()
return method, kwargs


class TestSeriesInterpolateData():
def test_interpolate(self, datetime_series, string_series):
ts = Series(np.arange(len(datetime_series), dtype=float),
Expand Down Expand Up @@ -1397,3 +1413,55 @@ def test_nonzero_warning(self):
ser = pd.Series([1, 0, 3, 4])
with tm.assert_produces_warning(FutureWarning):
ser.nonzero()

@pytest.mark.parametrize(
"ind",
[
['a', 'b', 'c', 'd'],
pd.period_range(start="2019-01-01", periods=4),
pd.interval_range(start=0, end=4),
])
def test_interp_non_timedelta_index(self, interp_methods_ind, ind):
# gh 21662
df = pd.DataFrame([0, 1, np.nan, 3], index=ind)

method, kwargs = interp_methods_ind
if method == "pchip":
_skip_if_no_pchip()

if method == "linear":
result = df[0].interpolate(**kwargs)
expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
assert_series_equal(result, expected)
else:
expected_error = (
"Index column must be numeric or datetime type when "
"using {method} method other than linear. "
"Try setting a numeric or datetime index column before "
"interpolating.".format(method=method))
with pytest.raises(ValueError, match=expected_error):
df[0].interpolate(method=method, **kwargs)

def test_interpolate_timedelta_index(self, interp_methods_ind):
"""
Tests for non numerical index types - object, period, timedelta
Note that all methods except time, index, nearest and values
are tested here.
"""
# gh 21662
ind = pd.timedelta_range(start=1, periods=4)
df = pd.DataFrame([0, 1, np.nan, 3], index=ind)

method, kwargs = interp_methods_ind
if method == "pchip":
_skip_if_no_pchip()

if method in {"linear", "pchip"}:
result = df[0].interpolate(method=method, **kwargs)
expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
assert_series_equal(result, expected)
else:
pytest.skip(
"This interpolation method is not supported for "
"Timedelta Index yet."
)