From 552437b5457177ee9b27baee0494937c19cf410d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 2 Oct 2017 09:52:08 -0400 Subject: [PATCH] DEPR: deprecate raise_on_error in .where/.mask in favor of errors= closes #14968 --- doc/source/whatsnew/v0.21.0.txt | 3 +- pandas/core/computation/expressions.py | 38 ++++++++-------------- pandas/core/frame.py | 10 +++--- pandas/core/generic.py | 45 ++++++++++++++++++++++---- pandas/core/internals.py | 43 +++++++++++++++--------- pandas/core/ops.py | 14 +++----- pandas/core/panel.py | 2 +- pandas/core/series.py | 2 +- pandas/core/sparse/frame.py | 2 +- pandas/tests/series/test_indexing.py | 11 +++++++ pandas/tests/series/test_missing.py | 6 ++-- pandas/tests/test_expressions.py | 1 + 12 files changed, 108 insertions(+), 69 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 66b44d4d391e1..e86eae0a5e593 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -666,8 +666,9 @@ Deprecations - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) - ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) - passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) -- Passing a non-existent column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) - ``.get_value`` and ``.set_value`` on ``Series``, ``DataFrame``, ``Panel``, ``SparseSeries``, and ``SparseDataFrame`` are deprecated in favor of using ``.iat[]`` or ``.at[]`` accessors (:issue:`15269`) +- Passing a non-existant column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) +- ``raise_on_error`` parameter to :func:`Series.where`, :func:`Series.mask`, :func:`DataFrame.where`, :func:`DataFrame.mask` is deprecated, in favor of ``errors=`` (:issue:`14968`) .. _whatsnew_0210.deprecations.select: diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 2196fb5917a44..c74da6379e32f 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -56,7 +56,7 @@ def set_numexpr_threads(n=None): ne.set_num_threads(n) -def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs): +def _evaluate_standard(op, op_str, a, b, **eval_kwargs): """ standard evaluation """ if _TEST_MODE: _store_test_result(False) @@ -89,7 +89,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): return False -def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, +def _evaluate_numexpr(op, op_str, a, b, truediv=True, reversed=False, **eval_kwargs): result = None @@ -111,25 +111,22 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, except ValueError as detail: if 'unknown type object' in str(detail): pass - except Exception as detail: - if raise_on_error: - raise if _TEST_MODE: _store_test_result(result is not None) if result is None: - result = _evaluate_standard(op, op_str, a, b, raise_on_error) + result = _evaluate_standard(op, op_str, a, b) return result -def _where_standard(cond, a, b, raise_on_error=True): +def _where_standard(cond, a, b): return np.where(_values_from_object(cond), _values_from_object(a), _values_from_object(b)) -def _where_numexpr(cond, a, b, raise_on_error=False): +def _where_numexpr(cond, a, b): result = None if _can_use_numexpr(None, 'where', a, b, 'where'): @@ -147,11 +144,10 @@ def _where_numexpr(cond, a, b, raise_on_error=False): if 'unknown type object' in str(detail): pass except Exception as detail: - if raise_on_error: - raise TypeError(str(detail)) + raise TypeError(str(detail)) if result is None: - result = _where_standard(cond, a, b, raise_on_error) + result = _where_standard(cond, a, b) return result @@ -189,7 +185,7 @@ def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('/', '//', '**')), return True -def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, +def evaluate(op, op_str, a, b, use_numexpr=True, **eval_kwargs): """ evaluate and return the expression of the op on a and b @@ -200,19 +196,16 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, op_str: the string version of the op a : left operand b : right operand - raise_on_error : pass the error to the higher level if indicated - (default is False), otherwise evaluate the op with and - return the results use_numexpr : whether to try to use numexpr (default True) """ + use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b) if use_numexpr: - return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, - **eval_kwargs) - return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error) + return _evaluate(op, op_str, a, b, **eval_kwargs) + return _evaluate_standard(op, op_str, a, b) -def where(cond, a, b, raise_on_error=False, use_numexpr=True): +def where(cond, a, b, use_numexpr=True): """ evaluate the where condition cond on a and b Parameters @@ -221,15 +214,12 @@ def where(cond, a, b, raise_on_error=False, use_numexpr=True): cond : a boolean array a : return if cond is True b : return if cond is False - raise_on_error : pass the error to the higher level if indicated - (default is False), otherwise evaluate the op with and - return the results use_numexpr : whether to try to use numexpr (default True) """ if use_numexpr: - return _where(cond, a, b, raise_on_error=raise_on_error) - return _where_standard(cond, a, b, raise_on_error=raise_on_error) + return _where(cond, a, b) + return _where_standard(cond, a, b) def set_test_mode(v=True): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a77c002b625cb..142ccf1f034bc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3862,9 +3862,9 @@ def _combine_match_columns(self, other, func, level=None, try_cast=try_cast) return self._constructor(new_data) - def _combine_const(self, other, func, raise_on_error=True, try_cast=True): + def _combine_const(self, other, func, errors='raise', try_cast=True): new_data = self._data.eval(func=func, other=other, - raise_on_error=raise_on_error, + errors=errors, try_cast=try_cast) return self._constructor(new_data) @@ -4035,8 +4035,7 @@ def combiner(x, y, needs_i8_conversion=False): else: mask = isna(x_values) - return expressions.where(mask, y_values, x_values, - raise_on_error=True) + return expressions.where(mask, y_values, x_values) return self.combine(other, combiner, overwrite=False) @@ -4091,8 +4090,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, if mask.all(): continue - self[col] = expressions.where(mask, this, that, - raise_on_error=True) + self[col] = expressions.where(mask, this, that) # ---------------------------------------------------------------------- # Misc methods diff --git a/pandas/core/generic.py b/pandas/core/generic.py index eecdd8a6109e9..942a9ff279092 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5758,7 +5758,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, return left.__finalize__(self), right.__finalize__(other) def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, - try_cast=False, raise_on_error=True): + errors='raise', try_cast=False): """ Equivalent to public method `where`, except that `other` is not applied as a function even if callable. Used in __setitem__. @@ -5887,7 +5887,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, else: new_data = self._data.where(other=other, cond=cond, align=align, - raise_on_error=raise_on_error, + errors=errors, try_cast=try_cast, axis=block_axis, transpose=self._AXIS_REVERSED) @@ -5924,12 +5924,21 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, Whether to perform the operation in place on the data axis : alignment axis if needed, default None level : alignment level if needed, default None + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + + Note that currently this parameter won't affect + the results and will always coerce to a suitable dtype. + try_cast : boolean, default False try to cast the result back to the input type (if possible), raise_on_error : boolean, default True Whether to raise on invalid data types (e.g. trying to where on strings) + .. deprecated:: 0.21.0 + Returns ------- wh : same type as caller @@ -6005,24 +6014,46 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, cond_rev="False", name='where', name_other='mask')) def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, - try_cast=False, raise_on_error=True): + errors='raise', try_cast=False, raise_on_error=None): + + if raise_on_error is not None: + warnings.warn( + "raise_on_error is deprecated in " + "favor of errors='raise|ignore'", + FutureWarning, stacklevel=2) + + if raise_on_error: + errors = 'raise' + else: + errors = 'ignore' other = com._apply_if_callable(other, self) - return self._where(cond, other, inplace, axis, level, try_cast, - raise_on_error) + return self._where(cond, other, inplace, axis, level, + errors=errors, try_cast=try_cast) @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="False", cond_rev="True", name='mask', name_other='where')) def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, - try_cast=False, raise_on_error=True): + errors='raise', try_cast=False, raise_on_error=None): + + if raise_on_error is not None: + warnings.warn( + "raise_on_error is deprecated in " + "favor of errors='raise|ignore'", + FutureWarning, stacklevel=2) + + if raise_on_error: + errors = 'raise' + else: + errors = 'ignore' inplace = validate_bool_kwarg(inplace, 'inplace') cond = com._apply_if_callable(cond, self) return self.where(~cond, other=other, inplace=inplace, axis=axis, level=level, try_cast=try_cast, - raise_on_error=raise_on_error) + errors=errors) _shared_docs['shift'] = (""" Shift index by desired number of periods with an optional time freq diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 90de4ded18f8c..a8f1a0c78c238 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -533,10 +533,16 @@ def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs): **kwargs) def _astype(self, dtype, copy=False, errors='raise', values=None, - klass=None, mgr=None, raise_on_error=False, **kwargs): + klass=None, mgr=None, **kwargs): """ - Coerce to the new type (if copy=True, return a new copy) - raise on an except if raise == True + Coerce to the new type + + dtype : str, dtype convertible + copy : boolean, default False + copy if indicated + errors : str, {'raise', 'ignore'}, default 'ignore' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object """ errors_legal_values = ('raise', 'ignore') @@ -1248,7 +1254,7 @@ def shift(self, periods, axis=0, mgr=None): return [self.make_block(new_values, fastpath=True)] - def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): + def eval(self, func, other, errors='raise', try_cast=False, mgr=None): """ evaluate the block; return result block from the result @@ -1256,8 +1262,10 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): ---------- func : how to combine self, other other : a ndarray/object - raise_on_error : if True, raise when I can't perform the function, - False by default (and just return the data that we had coming in) + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + try_cast : try casting the results to the input type Returns @@ -1295,7 +1303,7 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): except TypeError: block = self.coerce_to_target_dtype(orig_other) return block.eval(func, orig_other, - raise_on_error=raise_on_error, + errors=errors, try_cast=try_cast, mgr=mgr) # get the result, may need to transpose the other @@ -1337,7 +1345,7 @@ def get_result(other): # error handler if we have an issue operating with the function def handle_error(): - if raise_on_error: + if errors == 'raise': # The 'detail' variable is defined in outer scope. raise TypeError('Could not operate %s with block values %s' % (repr(other), str(detail))) # noqa @@ -1383,7 +1391,7 @@ def handle_error(): result = _block_shape(result, ndim=self.ndim) return [self.make_block(result, fastpath=True, )] - def where(self, other, cond, align=True, raise_on_error=True, + def where(self, other, cond, align=True, errors='raise', try_cast=False, axis=0, transpose=False, mgr=None): """ evaluate the block; return result block(s) from the result @@ -1393,8 +1401,10 @@ def where(self, other, cond, align=True, raise_on_error=True, other : a ndarray/object cond : the condition to respect align : boolean, perform alignment on other/cond - raise_on_error : if True, raise when I can't perform the function, - False by default (and just return the data that we had coming in) + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + axis : int transpose : boolean Set to True if self is stored with axes reversed @@ -1404,6 +1414,7 @@ def where(self, other, cond, align=True, raise_on_error=True, a new block(s), the result of the func """ import pandas.core.computation.expressions as expressions + assert errors in ['raise', 'ignore'] values = self.values orig_other = other @@ -1436,9 +1447,9 @@ def func(cond, values, other): try: return self._try_coerce_result(expressions.where( - cond, values, other, raise_on_error=True)) + cond, values, other)) except Exception as detail: - if raise_on_error: + if errors == 'raise': raise TypeError('Could not operate [%s] with block values ' '[%s]' % (repr(other), str(detail))) else: @@ -1454,10 +1465,10 @@ def func(cond, values, other): except TypeError: # we cannot coerce, return a compat dtype - # we are explicity ignoring raise_on_error here + # we are explicity ignoring errors block = self.coerce_to_target_dtype(other) blocks = block.where(orig_other, cond, align=align, - raise_on_error=raise_on_error, + errors=errors, try_cast=try_cast, axis=axis, transpose=transpose) return self._maybe_downcast(blocks, 'infer') @@ -2745,7 +2756,7 @@ def sp_index(self): def kind(self): return self.values.kind - def _astype(self, dtype, copy=False, raise_on_error=True, values=None, + def _astype(self, dtype, copy=False, errors='raise', values=None, klass=None, mgr=None, **kwargs): if values is None: values = self.values diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 506b9267f32b4..f0bd2477eec07 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -671,8 +671,7 @@ def na_op(x, y): import pandas.core.computation.expressions as expressions try: - result = expressions.evaluate(op, str_rep, x, y, - raise_on_error=True, **eval_kwargs) + result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): dtype = find_common_type([x.dtype, y.dtype]) @@ -1196,8 +1195,7 @@ def na_op(x, y): import pandas.core.computation.expressions as expressions try: - result = expressions.evaluate(op, str_rep, x, y, - raise_on_error=True, **eval_kwargs) + result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: xrav = x.ravel() if isinstance(y, (np.ndarray, ABCSeries)): @@ -1329,7 +1327,7 @@ def f(self, other): # straight boolean comparisions we want to allow all columns # (regardless of dtype to pass thru) See #4537 for discussion. res = self._combine_const(other, func, - raise_on_error=False, + errors='ignore', try_cast=False) return res.fillna(True).astype(bool) @@ -1354,8 +1352,7 @@ def na_op(x, y): import pandas.core.computation.expressions as expressions try: - result = expressions.evaluate(op, str_rep, x, y, - raise_on_error=True, **eval_kwargs) + result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: # TODO: might need to find_common_type here? @@ -1385,8 +1382,7 @@ def na_op(x, y): import pandas.core.computation.expressions as expressions try: - result = expressions.evaluate(op, str_rep, x, y, - raise_on_error=True) + result = expressions.evaluate(op, str_rep, x, y) except TypeError: xrav = x.ravel() result = np.empty(x.size, dtype=bool) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index fad6a39223b9e..14fba9560cae2 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1527,7 +1527,7 @@ def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, - raise_on_error=True, + errors='raise', **eval_kwargs) except TypeError: result = op(x, y) diff --git a/pandas/core/series.py b/pandas/core/series.py index 43b7f1d043e4d..49b6a6651367b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -253,7 +253,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, # create/copy the manager if isinstance(data, SingleBlockManager): if dtype is not None: - data = data.astype(dtype=dtype, raise_on_error=False, + data = data.astype(dtype=dtype, errors='ignore', copy=copy) elif copy: data = data.copy() diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index a43aad9a0204e..1b45b180b8dc1 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -638,7 +638,7 @@ def _combine_match_columns(self, other, func, level=None, fill_value=None, new_data, index=self.index, columns=union, default_fill_value=self.default_fill_value).__finalize__(self) - def _combine_const(self, other, func, raise_on_error=True, try_cast=True): + def _combine_const(self, other, func, errors='raise', try_cast=True): return self._apply_columns(lambda x: func(x, other)) def _reindex_index(self, index, method, copy, level, fill_value=np.nan, diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 0ca319565e24b..75ae47ed2fdc1 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1096,6 +1096,17 @@ def test_take(self): with tm.assert_produces_warning(FutureWarning): s.take([-1, 3, 4], convert=False) + def test_where_raise_on_error_deprecation(self): + + # gh-14968 + # deprecation of raise_on_error + s = Series(np.random.randn(5)) + cond = s > 0 + with tm.assert_produces_warning(FutureWarning): + s.where(cond, raise_on_error=True) + with tm.assert_produces_warning(FutureWarning): + s.mask(cond, raise_on_error=True) + def test_where(self): s = Series(np.random.randn(5)) cond = s > 0 diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 01bf7274fd384..bd4e8b23f31b4 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -292,15 +292,15 @@ def test_fillna_consistency(self): dtype='object') assert_series_equal(result, expected) - # where (we ignore the raise_on_error) + # where (we ignore the errors=) result = s.where([True, False], Timestamp('20130101', tz='US/Eastern'), - raise_on_error=False) + errors='ignore') assert_series_equal(result, expected) result = s.where([True, False], Timestamp('20130101', tz='US/Eastern'), - raise_on_error=True) + errors='ignore') assert_series_equal(result, expected) # with a non-datetime diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 2b972477ae999..6d2607962dfb0 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -124,6 +124,7 @@ def run_binary(self, df, other, assert_func, test_flex=False, expr._MIN_ELEMENTS = 0 expr.set_test_mode(True) operations = ['gt', 'lt', 'ge', 'le', 'eq', 'ne'] + for arith in operations: if test_flex: op = lambda x, y: getattr(df, arith)(y)