Skip to content

ENH: Provide an errors parameter to fillna #15653

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1909,3 +1909,27 @@ def pandas_dtype(dtype):
raise TypeError('dtype {0} not understood'.format(dtype))

return npdtype


def _is_fillable_value(value):
pandas_ts_types = ('Timestamp', 'Period', 'Timedelta')
pandas_block_types = ('Series', 'DataFrame')

if any([isinstance(value, (list, dict)),
callable(value),
(not (isinstance(value, string_types) or
isinstance(value, (int, float, complex, str, None.__class__)) or
is_numeric_dtype(value) or
is_datetime_or_timedelta_dtype(value) or
is_period_dtype(value) or
type(value).__name__ in pandas_ts_types) or
type(value).__name__ in pandas_block_types)]):
return False
else:
return True


def validate_fill_value(value):
if not _is_fillable_value(value):
raise TypeError('"value" parameter must be a scalar, but '
'you passed a "{0}"'.format(type(value).__name__))
34 changes: 33 additions & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
is_object_dtype,
is_integer,
_TD_DTYPE,
_NS_DTYPE)
_NS_DTYPE,
is_datetime64_any_dtype, is_float,
is_numeric_dtype, is_complex, is_period_arraylike)
from datetime import datetime, timedelta
from .inference import is_list_like


Expand Down Expand Up @@ -394,3 +397,32 @@ def na_value_for_dtype(dtype):
elif is_bool_dtype(dtype):
return False
return np.nan


def is_valid_fill_value(value, dtype):
"""
Makes sure the fill value is appropriate for the given dtype.

Parameters
----------
value : scalar
dtype: string / dtype
"""
if isinstance(value, dict):
return True
if not is_scalar(value):
# maybe always raise?
# raise TypeError('"value" parameter must be a scalar or dict, but '
# 'you passed a "{0}"'.format(type(value).__name__))
return False
elif isnull(value):
return True
elif is_bool_dtype(dtype):
return isinstance(value, (np.bool, bool))
elif is_numeric_dtype(dtype):
return is_float(value) or is_integer(value) or is_complex(value)
elif is_datetime64_any_dtype(dtype):
return isinstance(value, (np.datetime64, datetime))
elif is_timedelta64_dtype(dtype):
return isinstance(value, (np.timedelta64, timedelta))
return True
39 changes: 31 additions & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
pandas_dtype)
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
from pandas.core.dtypes.missing import isnull, notnull
from pandas.core.dtypes.generic import ABCSeries, ABCPanel
from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame

from pandas.core.common import (_values_from_object,
_maybe_box_datetimelike,
Expand Down Expand Up @@ -3735,9 +3735,27 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,

@Appender(_shared_docs['fillna'] % _shared_doc_kwargs)
def fillna(self, value=None, method=None, axis=None, inplace=False,
limit=None, downcast=None):
limit=None, downcast=None, errors=None):
inplace = validate_bool_kwarg(inplace, 'inplace')

# if a singular fill value is provided, validate it
# special case: a DataFrame may be passed to a DataFrame
# in that case, short-circuit
if value is not None and not (isinstance(value, ABCDataFrame) and
isinstance(self, ABCDataFrame)):
# fill values by column, not all at once, to respect dtypes
if not isinstance(value, (dict, ABCSeries)) and \
isinstance(self, ABCDataFrame):
value = {col: value for col in self.columns}
try:
missing.validate_fill_value(self, value)
except TypeError:
if errors == 'ignore':
return self
elif errors == 'raise':
raise
# if errors == 'coerce' continue

if isinstance(value, (list, tuple)):
raise TypeError('"value" parameter must be a scalar or dict, but '
'you passed a "{0}"'.format(type(value).__name__))
Expand All @@ -3756,7 +3774,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
if self._is_mixed_type and axis == 1:
if inplace:
raise NotImplementedError()
result = self.T.fillna(method=method, limit=limit).T
result = self.T.fillna(method=method, limit=limit,
errors=errors).T

# need to downcast here because of all of the transposes
result._data = result._data.downcast()
Expand All @@ -3772,7 +3791,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
elif self.ndim == 3:

# fill in 2d chunks
result = dict([(col, s.fillna(method=method, value=value))
result = dict([(col, s.fillna(method=method, value=value,
errors=errors))
for col, s in self.iteritems()])
new_obj = self._constructor.\
from_dict(result).__finalize__(self)
Expand Down Expand Up @@ -3804,7 +3824,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,

new_data = self._data.fillna(value=value, limit=limit,
inplace=inplace,
downcast=downcast)
downcast=downcast,
errors=errors)

elif isinstance(value, (dict, ABCSeries)):
if axis == 1:
Expand All @@ -3817,12 +3838,14 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
if k not in result:
continue
obj = result[k]
obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
return result
obj.fillna(v, limit=limit, inplace=True,
downcast=downcast, errors=errors)
return None if inplace else result
elif not is_list_like(value):
new_data = self._data.fillna(value=value, limit=limit,
inplace=inplace,
downcast=downcast)
downcast=downcast,
errors=errors)
elif isinstance(value, DataFrame) and self.ndim == 2:
new_data = self.where(self.notnull(), value)
else:
Expand Down
30 changes: 21 additions & 9 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,10 +362,13 @@ def apply(self, func, mgr=None, **kwargs):
return result

def fillna(self, value, limit=None, inplace=False, downcast=None,
mgr=None):
errors=None, mgr=None):
""" fillna on the block with the value. If we fail, then convert to
ObjectBlock and try again
"""
if not errors:
errors = 'coerce'

inplace = validate_bool_kwarg(inplace, 'inplace')

if not self._can_hold_na:
Expand Down Expand Up @@ -399,12 +402,16 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,
if not mask.any():
return self if inplace else self.copy()

# we cannot coerce the underlying object, so
# make an ObjectBlock
return self.to_object_block(mgr=mgr).fillna(original_value,
limit=limit,
inplace=inplace,
downcast=False)
if errors == 'coerce':
# we cannot coerce the underlying object, so
# make an ObjectBlock
return self.to_object_block(mgr=mgr).fillna(original_value,
limit=limit,
inplace=inplace,
downcast=False,
errors='ignore')
else: # errors == 'ignore'
return self
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is tricky for datetimes, they auto-infer.


def _maybe_downcast(self, blocks, downcast=None):

Expand Down Expand Up @@ -2132,11 +2139,14 @@ def _try_coerce_result(self, result):
return result

def fillna(self, value, limit=None, inplace=False, downcast=None,
mgr=None):
errors=None, mgr=None):
# we may need to upcast our fill to match our dtype
if limit is not None:
raise NotImplementedError("specifying a limit for 'fillna' has "
"not been implemented yet")
if errors is not None:
raise NotImplementedError("specifying error handling for 'fillna' "
"has not been implemented yet")

values = self.values if inplace else self.values.copy()
values = self._try_coerce_result(values.fillna(value=value,
Expand Down Expand Up @@ -2626,11 +2636,13 @@ def interpolate(self, method='pad', axis=0, inplace=False, limit=None,
placement=self.mgr_locs)

def fillna(self, value, limit=None, inplace=False, downcast=None,
mgr=None):
errors=None, mgr=None):
# we may need to upcast our fill to match our dtype
if limit is not None:
raise NotImplementedError("specifying a limit for 'fillna' has "
"not been implemented yet")
if errors is not None:
raise NotImplementedError
values = self.values if inplace else self.values.copy()
values = values.fillna(value, downcast=downcast)
return [self.make_block_same_class(values=values,
Expand Down
32 changes: 31 additions & 1 deletion pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
_ensure_float64)

from pandas.core.dtypes.cast import infer_dtype_from_array
from pandas.core.dtypes.missing import isnull
from pandas.core.dtypes.missing import isnull, is_valid_fill_value
from pandas.core.dtypes.generic import ABCSeries


def mask_missing(arr, values_to_mask):
Expand Down Expand Up @@ -634,6 +635,35 @@ def fill_zeros(result, x, y, name, fill):
return result


def validate_fill_value(obj, value):
"""

Fillna error coercion routine.

Parameters
----------
obj : Series of DataFrame
The Series or DataFrame for which a fill value is being evaluated.
If obj is a DataFrame this method simply returns True (e.g. the fillna
operation is allowed to continue) because it will be broken up and
parsed as a sequence of sub-Series later on.
value : object
The value to be used as a fill for the object.

Returns
-------
continue : bool
Whether or not, based on the values and the error mode, the fill
operation ought to continue.
"""
if isinstance(obj, ABCSeries):
if not is_valid_fill_value(value, obj.dtype):
raise TypeError('"value" parameter must be compatible '
'with the {0} dtype, but you passed a '
'"{1}"'.format(obj.dtype,
type(value).__name__))


def _interp_limit(invalid, fw_limit, bw_limit):
"""Get idx of values that won't be filled b/c they exceed the limits.

Expand Down
37 changes: 35 additions & 2 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

from warnings import catch_warnings
import numpy as np
from datetime import datetime
from datetime import datetime, timedelta
from pandas.util import testing as tm
import pytest

import pandas as pd
from pandas.core import config as cf
Expand All @@ -14,7 +15,7 @@
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.missing import (
array_equivalent, isnull, notnull,
na_value_for_dtype)
na_value_for_dtype, is_valid_fill_value)


def test_notnull():
Expand Down Expand Up @@ -312,3 +313,35 @@ def test_na_value_for_dtype():

for dtype in ['O']:
assert np.isnan(na_value_for_dtype(np.dtype(dtype)))


@pytest.mark.parametrize(('value', 'dtype'),
[(False, bool), (np.nan, bool),
(0, int), (0.0, int), (0j, int), (np.nan, int),
(0, float), (0.0, float), (0j, float),
(np.nan, float),
(0, complex), (0.0, complex), (0j, complex),
(np.nan, complex),
(False, str), (0, str), (0.0, str), (0j, str),
(np.nan, str), ('0', str),
(datetime(1970, 1, 1), np.datetime64),
(pd.Timestamp('1970-01-01'), np.datetime64),
(timedelta(0), np.timedelta64),
(pd.Timedelta(0), np.timedelta64)])
def test_valid_fill_value(value, dtype):
assert is_valid_fill_value(value, dtype)


@pytest.mark.parametrize(('value', 'dtype'),
[(0, bool), (0.0, bool), (0j, bool), ('0', bool),
('0', int),
('0', float),
('0', complex),
('0', np.dtype('datetime64')),
(timedelta(0), np.dtype('datetime64')),
(pd.Period('1970-01-01'), np.dtype('datetime64')),
('0', np.dtype('timedelta64')),
(datetime(1970, 1, 1), np.dtype('timedelta64')),
(pd.Period('1970-01-01'), np.dtype('timedelta64'))])
def test_invalid_fill_value(value, dtype):
assert not is_valid_fill_value(value, dtype)
Loading