diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 2e299da5e5794..340d2e7acfe36 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -1112,14 +1112,14 @@ will be the names of the transforming functions. .. ipython:: python - tsdf.transform([np.abs, lambda x: x + 1]) + tsdf.transform([np.abs, lambda x: x + 1], series_ops_only=True) Passing multiple functions to a Series will yield a DataFrame. The resulting column names will be the transforming functions. .. ipython:: python - tsdf["A"].transform([np.abs, lambda x: x + 1]) + tsdf["A"].transform([np.abs, lambda x: x + 1], series_ops_only=True) Transforming with a dict @@ -1130,7 +1130,7 @@ Passing a dict of functions will allow selective transforming per column. .. ipython:: python - tsdf.transform({"A": np.abs, "B": lambda x: x + 1}) + tsdf.transform({"A": np.abs, "B": lambda x: x + 1}, series_ops_only=True) Passing a dict of lists will generate a MultiIndexed DataFrame with these selective transforms. diff --git a/pandas/conftest.py b/pandas/conftest.py index 62f22921f0482..2f8d0bced3673 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -2015,3 +2015,11 @@ def warsaw(request) -> str: @pytest.fixture() def arrow_string_storage(): return ("pyarrow", "pyarrow_numpy") + + +@pytest.fixture(params=[True, False]) +def series_ops_only(request): + """ + Parameter used in Series.transform and DataFrame.transform. Remove in pandas v3.0. + """ + return request.param diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 1525e316f345f..2e19a430184cd 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -211,12 +211,14 @@ def transform(self) -> DataFrame | Series: axis = self.axis args = self.args kwargs = self.kwargs + by_row = self.by_row is_series = obj.ndim == 1 if obj._get_axis_number(axis) == 1: assert not is_series - return obj.T.transform(func, 0, *args, **kwargs).T + soo = not by_row + return obj.T.transform(func, 0, *args, series_ops_only=soo, **kwargs).T if is_list_like(func) and not is_dict_like(func): func = cast(list[AggFuncTypeBase], func) @@ -230,14 +232,17 @@ def transform(self) -> DataFrame | Series: func = cast(AggFuncTypeDict, func) return self.transform_dict_like(func) - # func is either str or callable - func = cast(AggFuncTypeBase, func) - try: - result = self.transform_str_or_callable(func) - except TypeError: - raise - except Exception as err: - raise ValueError("Transform function failed") from err + if not by_row: + result = obj.apply(func, by_row=by_row, args=args, **kwargs) + else: + # func is either str or callable + func = cast(AggFuncTypeBase, func) + try: + result = self.transform_str_or_callable(func) + except TypeError: + raise + except Exception as err: + raise ValueError("Transform function failed") from err # Functions that transform may return empty Series/DataFrame # when the dtype is not appropriate @@ -267,6 +272,7 @@ def transform_dict_like(self, func) -> DataFrame: obj = self.obj args = self.args kwargs = self.kwargs + soo = not self.by_row # transform is currently only for Series/DataFrame assert isinstance(obj, ABCNDFrame) @@ -279,7 +285,7 @@ def transform_dict_like(self, func) -> DataFrame: results: dict[Hashable, DataFrame | Series] = {} for name, how in func.items(): colg = obj._gotitem(name, ndim=1) - results[name] = colg.transform(how, 0, *args, **kwargs) + results[name] = colg.transform(how, 0, *args, series_ops_only=soo, **kwargs) return concat(results, axis=1) def transform_str_or_callable(self, func) -> DataFrame | Series: @@ -602,7 +608,10 @@ def apply_list_or_dict_like(self) -> DataFrame | Series: Result when self.func is a list-like or dict-like, None otherwise. """ if self.axis == 1 and isinstance(self.obj, ABCDataFrame): - return self.obj.T.apply(self.func, 0, args=self.args, **self.kwargs).T + soo = not self.by_row + return self.obj.T.apply( + self.func, 0, args=self.args, series_ops_only=soo, **self.kwargs + ).T func = self.func kwargs = self.kwargs diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a16597221ac92..780013dd7fe57 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9961,11 +9961,30 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): axis=_shared_doc_kwargs["axis"], ) def transform( - self, func: AggFuncType, axis: Axis = 0, *args, **kwargs + self, + func: AggFuncType, + axis: Axis = 0, + *args, + series_ops_only: bool = False, + **kwargs, ) -> DataFrame: from pandas.core.apply import frame_apply - op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) + if not series_ops_only and is_list_like(func): + cls_name = type(self).__name__ + warnings.warn( + f"{cls_name}.transform will in the future only operate on " + "whole series. Set series_ops_only = True to opt into the new behavior " + f"or use {cls_name}.map to continue operating on series elements.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + by_row: Literal[False, "compat"] = False if series_ops_only else "compat" + + op = frame_apply( + self, func=func, axis=axis, args=args, by_row=by_row, kwargs=kwargs + ) result = op.transform() assert isinstance(result, DataFrame) return result diff --git a/pandas/core/series.py b/pandas/core/series.py index fd50a85f3c2e3..99da730686157 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4613,12 +4613,30 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): axis=_shared_doc_kwargs["axis"], ) def transform( - self, func: AggFuncType, axis: Axis = 0, *args, **kwargs + self, + func: AggFuncType, + axis: Axis = 0, + *args, + series_ops_only: bool = False, + **kwargs, ) -> DataFrame | Series: # Validate axis argument self._get_axis_number(axis) + if not series_ops_only and not isinstance(func, (str, np.ufunc)): + cls_name = type(self).__name__ + warnings.warn( + f"{cls_name}.transform will in the future only operate on " + "whole series. Set series_ops_only = True to opt into the new behavior " + f"or use {cls_name}.map to continue operating on series elements.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + by_row: Literal[False, "compat"] = False if series_ops_only else "compat" ser = self.copy(deep=False) if using_copy_on_write() else self - result = SeriesApply(ser, func=func, args=args, kwargs=kwargs).transform() + result = SeriesApply( + ser, func=func, by_row=by_row, args=args, kwargs=kwargs + ).transform() return result def apply( diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index ec219941a3afc..cd51aad7725ac 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -378,7 +378,7 @@ 1 1 2 2 dtype: int64 ->>> s.transform([np.sqrt, np.exp]) +>>> s.transform([np.sqrt, np.exp]) # doctest: +SKIP sqrt exp 0 0.000000 1.000000 1 1.000000 2.718282 diff --git a/pandas/tests/apply/common.py b/pandas/tests/apply/common.py index b4d153df54059..7bd5e9020eb44 100644 --- a/pandas/tests/apply/common.py +++ b/pandas/tests/apply/common.py @@ -1,3 +1,8 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_list_like from pandas.core.groupby.base import transformation_kernels # There is no Series.cumcount or DataFrame.cumcount @@ -5,3 +10,29 @@ x for x in sorted(transformation_kernels) if x != "cumcount" ] frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"] + + +def transform_obj(obj, func, *args, axis=0, series_ops_only=False, **kwargs): + """helper function to ease use of series_ops_only and deprecation warning.""" + if isinstance(func, np.ufunc): + result = obj.transform(func, axis, *args, **kwargs) + elif series_ops_only: + result = obj.transform( + func, axis, *args, series_ops_only=series_ops_only, **kwargs + ) + elif isinstance(obj, pd.DataFrame) and not is_list_like(func): + result = obj.transform(func, axis, *args, **kwargs) + elif isinstance(func, str): + result = obj.transform(func, axis, *args, **kwargs) + else: + cls_name = type(obj).__name__ + msg = ( + f"{cls_name}.transform will in the future only operate on " + "whole series. Set series_ops_only = True to opt into the new behavior " + f"or use {cls_name}.map to continue operating on series elements." + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = obj.transform( + func, axis, *args, series_ops_only=series_ops_only, **kwargs + ) + return result diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 2d57515882aed..6397f754718b6 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import pytest @@ -7,7 +9,10 @@ Series, ) import pandas._testing as tm -from pandas.tests.apply.common import frame_transform_kernels +from pandas.tests.apply.common import ( + frame_transform_kernels, + transform_obj, +) from pandas.tests.frame.common import zip_frames @@ -31,7 +36,7 @@ def test_transform_ufunc(axis, float_frame, frame_or_series): f_sqrt = np.sqrt(obj) # ufunc - result = obj.transform(np.sqrt, axis=axis) + result = transform_obj(obj, np.sqrt, axis=axis) expected = f_sqrt tm.assert_equal(result, expected) @@ -45,7 +50,7 @@ def test_transform_ufunc(axis, float_frame, frame_or_series): (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), ], ) -def test_transform_listlike(axis, float_frame, ops, names): +def test_transform_listlike(axis, float_frame, ops, names, series_ops_only): # GH 35964 other_axis = 1 if axis in {0, "index"} else 0 with np.errstate(all="ignore"): @@ -54,19 +59,19 @@ def test_transform_listlike(axis, float_frame, ops, names): expected.columns = MultiIndex.from_product([float_frame.columns, names]) else: expected.index = MultiIndex.from_product([float_frame.index, names]) - result = float_frame.transform(ops, axis=axis) + result = transform_obj(float_frame, ops, axis=axis, series_ops_only=series_ops_only) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("ops", [[], np.array([])]) -def test_transform_empty_listlike(float_frame, ops, frame_or_series): +def test_transform_empty_listlike(float_frame, ops, frame_or_series, series_ops_only): obj = unpack_obj(float_frame, frame_or_series, 0) with pytest.raises(ValueError, match="No transform functions were provided"): - obj.transform(ops) + transform_obj(obj, ops, series_ops_only=series_ops_only) -def test_transform_listlike_func_with_args(): +def test_transform_listlike_func_with_args(series_ops_only): # GH 50624 df = DataFrame({"x": [1, 2, 3]}) @@ -78,9 +83,12 @@ def foo2(x, b=2, c=0): msg = r"foo1\(\) got an unexpected keyword argument 'b'" with pytest.raises(TypeError, match=msg): - df.transform([foo1, foo2], 0, 3, b=3, c=4) + with warnings.catch_warnings(): + transform_obj( + df, [foo1, foo2], 3, series_ops_only=series_ops_only, b=3, c=4 + ) - result = df.transform([foo1, foo2], 0, 3, c=4) + result = transform_obj(df, [foo1, foo2], 3, series_ops_only=series_ops_only, c=4) expected = DataFrame( [[8, 8], [9, 9], [10, 10]], columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), @@ -89,7 +97,7 @@ def foo2(x, b=2, c=0): @pytest.mark.parametrize("box", [dict, Series]) -def test_transform_dictlike(axis, float_frame, box): +def test_transform_dictlike(axis, float_frame, box, series_ops_only): # GH 35964 if axis in (0, "index"): e = float_frame.columns[0] @@ -97,14 +105,17 @@ def test_transform_dictlike(axis, float_frame, box): else: e = float_frame.index[0] expected = float_frame.iloc[[0]].transform(np.abs) - result = float_frame.transform(box({e: np.abs}), axis=axis) + result = transform_obj( + float_frame, box({e: np.abs}), axis=axis, series_ops_only=series_ops_only + ) tm.assert_frame_equal(result, expected) -def test_transform_dictlike_mixed(): +def test_transform_dictlike_mixed(series_ops_only): # GH 40018 - mix of lists and non-lists in values of a dictionary df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]}) - result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) + func = {"b": ["sqrt", "abs"], "c": "sqrt"} + result = transform_obj(df, func, series_ops_only=series_ops_only) expected = DataFrame( [[1.0, 1, 1.0], [2.0, 4, 2.0]], columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), @@ -127,23 +138,23 @@ def test_transform_empty_dictlike(float_frame, ops, frame_or_series): obj = unpack_obj(float_frame, frame_or_series, 0) with pytest.raises(ValueError, match="No transform functions were provided"): - obj.transform(ops) + transform_obj(obj, ops) @pytest.mark.parametrize("use_apply", [True, False]) -def test_transform_udf(axis, float_frame, use_apply, frame_or_series): +def test_transform_udf(axis, float_frame, use_apply, frame_or_series, series_ops_only): # GH 35964 obj = unpack_obj(float_frame, frame_or_series, axis) # transform uses UDF either via apply or passing the entire DataFrame def func(x): # transform is using apply iff x is not a DataFrame - if use_apply == isinstance(x, frame_or_series): + if use_apply == isinstance(x, frame_or_series) and not series_ops_only: # Force transform to fallback raise ValueError return x + 1 - result = obj.transform(func, axis=axis) + result = transform_obj(obj, func, axis=axis, series_ops_only=series_ops_only) expected = obj + 1 tm.assert_equal(result, expected) @@ -153,11 +164,12 @@ def func(x): @pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1]) -def test_transform_bad_dtype(op, frame_or_series, request): +def test_transform_bad_dtype(op, frame_or_series, request, series_ops_only): # GH 35964 if op == "ngroup": + error_type = AttributeError if series_ops_only else ValueError request.node.add_marker( - pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") + pytest.mark.xfail(raises=error_type, reason="ngroup not valid for NDFrame") ) obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms @@ -170,23 +182,19 @@ def test_transform_bad_dtype(op, frame_or_series, request): ] ) - with pytest.raises(error, match=msg): - obj.transform(op) - with pytest.raises(error, match=msg): - obj.transform([op]) - with pytest.raises(error, match=msg): - obj.transform({"A": op}) - with pytest.raises(error, match=msg): - obj.transform({"A": [op]}) + for func in [op, [op], {"A": op}, {"A": [op]}]: + with pytest.raises(error, match=msg): + transform_obj(obj, func, series_ops_only=series_ops_only) @pytest.mark.parametrize("op", frame_kernels_raise) -def test_transform_failure_typeerror(request, op): +def test_transform_failure_typeerror(request, op, series_ops_only): # GH 35964 if op == "ngroup": + error = AttributeError if series_ops_only else ValueError request.node.add_marker( - pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") + pytest.mark.xfail(raises=error, reason="ngroup not valid for NDFrame") ) # Using object makes most transform kernels fail @@ -199,66 +207,64 @@ def test_transform_failure_typeerror(request, op): ] ) - with pytest.raises(error, match=msg): - df.transform([op]) - - with pytest.raises(error, match=msg): - df.transform({"A": op, "B": op}) - - with pytest.raises(error, match=msg): - df.transform({"A": [op], "B": [op]}) - - with pytest.raises(error, match=msg): - df.transform({"A": [op, "shift"], "B": [op]}) + for func in [ + [op], + {"A": op, "B": op}, + {"A": [op], "B": [op]}, + {"A": [op, "shift"], "B": [op]}, + ]: + with pytest.raises(error, match=msg): + transform_obj(df, func, series_ops_only=series_ops_only) -def test_transform_failure_valueerror(): +def test_transform_failure_valueerror(series_ops_only): # GH 40211 def op(x): - if np.sum(np.sum(x)) < 10: + if np.sum(x) < 10: raise ValueError return x df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) - msg = "Transform function failed" + msg = "" if series_ops_only else "Transform function failed" - with pytest.raises(ValueError, match=msg): - df.transform([op]) - - with pytest.raises(ValueError, match=msg): - df.transform({"A": op, "B": op}) - - with pytest.raises(ValueError, match=msg): - df.transform({"A": [op], "B": [op]}) - - with pytest.raises(ValueError, match=msg): - df.transform({"A": [op, "shift"], "B": [op]}) + for func in [ + [op], + {"A": op, "B": op}, + {"A": [op], "B": [op]}, + {"A": [op, "shift"], "B": [op]}, + ]: + with pytest.raises(ValueError, match=msg): + transform_obj(df, func, series_ops_only=series_ops_only) @pytest.mark.parametrize("use_apply", [True, False]) -def test_transform_passes_args(use_apply, frame_or_series): +def test_transform_passes_args(use_apply, frame_or_series, series_ops_only): # GH 35964 # transform uses UDF either via apply or passing the entire DataFrame - expected_args = [1, 2] - expected_kwargs = {"c": 3} + args = [1, 2] + kwargs = {"c": 3} def f(x, a, b, c): # transform is using apply iff x is not a DataFrame - if use_apply == isinstance(x, frame_or_series): + if use_apply == isinstance(x, frame_or_series) and not series_ops_only: # Force transform to fallback raise ValueError - assert [a, b] == expected_args - assert c == expected_kwargs["c"] + assert [a, b] == args + assert c == kwargs["c"] return x - frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs) + transform_obj( + frame_or_series([1]), f, *args, series_ops_only=series_ops_only, **kwargs + ) -def test_transform_empty_dataframe(): +def test_transform_empty_dataframe(series_ops_only): # https://github.com/pandas-dev/pandas/issues/39636 df = DataFrame([], columns=["col1", "col2"]) - result = df.transform(lambda x: x + 10) + result = transform_obj(df, lambda x: x + 10, series_ops_only=series_ops_only) tm.assert_frame_equal(result, df) - result = df["col1"].transform(lambda x: x + 10) + result = transform_obj( + df["col1"], lambda x: x + 10, series_ops_only=series_ops_only + ) tm.assert_series_equal(result, df["col1"]) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index a3d9de5e78afb..d2c71395698b7 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -5,7 +5,6 @@ # 3. incompatible ops/dtype/args/kwargs # 4. invalid result shape/type # If your test does not fit into one of these categories, add to this list. - from itertools import chain import re @@ -21,6 +20,7 @@ notna, ) import pandas._testing as tm +from pandas.tests.apply.common import transform_obj @pytest.mark.parametrize("result_type", ["foo", 1]) @@ -71,12 +71,15 @@ def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action): @pytest.mark.parametrize("method", ["apply", "agg", "transform"]) @pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}]) -def test_nested_renamer(frame_or_series, method, func): +def test_nested_renamer(frame_or_series, method, func, series_ops_only): # GH 35964 obj = frame_or_series({"A": [1]}) match = "nested renamer is not supported" with pytest.raises(SpecificationError, match=match): - getattr(obj, method)(func) + if method == "transform": + transform_obj(obj, func, series_ops_only=series_ops_only) + else: + getattr(obj, method)(func) @pytest.mark.parametrize( @@ -102,31 +105,38 @@ def test_apply_dict_depr(): @pytest.mark.parametrize("method", ["agg", "transform"]) -def test_dict_nested_renaming_depr(method): +def test_dict_nested_renaming_depr(method, series_ops_only): df = DataFrame({"A": range(5), "B": 5}) # nested renaming msg = r"nested renamer is not supported" + func = {"A": {"foo": "min"}, "B": {"bar": "max"}} with pytest.raises(SpecificationError, match=msg): - getattr(df, method)({"A": {"foo": "min"}, "B": {"bar": "max"}}) + if method == "transform": + transform_obj(df, func, series_ops_only=series_ops_only) + else: + getattr(df, method)(func) @pytest.mark.parametrize("method", ["apply", "agg", "transform"]) @pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}]) -def test_missing_column(method, func): +def test_missing_column(method, func, series_ops_only): # GH 40004 obj = DataFrame({"A": [1]}) match = re.escape("Column(s) ['B'] do not exist") with pytest.raises(KeyError, match=match): - getattr(obj, method)(func) + if method == "transform": + transform_obj(obj, func, series_ops_only=series_ops_only) + else: + getattr(obj, method)(func) -def test_transform_mixed_column_name_dtypes(): +def test_transform_mixed_column_name_dtypes(series_ops_only): # GH39025 df = DataFrame({"a": ["1"]}) msg = r"Column\(s\) \[1, 'b'\] do not exist" with pytest.raises(KeyError, match=msg): - df.transform({"a": int, 1: str, "b": int}) + transform_obj(df, {"a": int, 1: str, "b": int}, series_ops_only=series_ops_only) @pytest.mark.parametrize( @@ -264,12 +274,13 @@ def test_agg_none_to_type(): df.agg({"a": lambda x: int(x.iloc[0])}) -def test_transform_none_to_type(): +def test_transform_none_to_type(series_ops_only): # GH#34377 df = DataFrame({"a": [None]}) msg = "argument must be a" + func = {"a": lambda x: int(x.iloc[0])} with pytest.raises(TypeError, match=msg): - df.transform({"a": lambda x: int(x.iloc[0])}) + transform_obj(df, func, series_ops_only=series_ops_only) @pytest.mark.parametrize( @@ -316,12 +327,12 @@ def test_transform_and_agg_err_series(string_series, func, msg): @pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]]) -def test_transform_wont_agg_frame(axis, float_frame, func): +def test_transform_wont_agg_frame(axis, float_frame, func, series_ops_only): # GH 35964 # cannot both transform and agg msg = "Function did not transform" with pytest.raises(ValueError, match=msg): - float_frame.transform(func, axis=axis) + transform_obj(float_frame, func, axis=axis, series_ops_only=series_ops_only) @pytest.mark.parametrize("func", [["min", "max"], ["sqrt", "max"]]) @@ -334,13 +345,15 @@ def test_transform_wont_agg_series(string_series, func): warn_msg = "invalid value encountered in sqrt" with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(warn, match=warn_msg, check_stacklevel=False): - string_series.transform(func) + transform_obj(string_series, func, series_ops_only=True) @pytest.mark.parametrize( "op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}] ) -def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper): +def test_transform_reducer_raises( + all_reductions, frame_or_series, op_wrapper, series_ops_only +): # GH 35964 op = op_wrapper(all_reductions) @@ -349,4 +362,4 @@ def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper): msg = "Function did not transform" with pytest.raises(ValueError, match=msg): - obj.transform(op) + transform_obj(obj, op, series_ops_only=series_ops_only) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index aeb6a01eb587a..f07d1c24a0fb4 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -11,7 +11,6 @@ timedelta_range, ) import pandas._testing as tm -from pandas.tests.apply.common import series_transform_kernels @pytest.fixture(params=[False, "compat"]) @@ -317,66 +316,6 @@ def test_transform(string_series, by_row): tm.assert_series_equal(result.reindex_like(expected), expected) -@pytest.mark.parametrize("op", series_transform_kernels) -def test_transform_partial_failure(op, request): - # GH 35964 - if op in ("ffill", "bfill", "pad", "backfill", "shift"): - request.node.add_marker( - pytest.mark.xfail(reason=f"{op} is successful on any dtype") - ) - - # Using object makes most transform kernels fail - ser = Series(3 * [object]) - - if op in ("fillna", "ngroup"): - error = ValueError - msg = "Transform function failed" - else: - error = TypeError - msg = "|".join( - [ - "not supported between instances of 'type' and 'type'", - "unsupported operand type", - ] - ) - - with pytest.raises(error, match=msg): - ser.transform([op, "shift"]) - - with pytest.raises(error, match=msg): - ser.transform({"A": op, "B": "shift"}) - - with pytest.raises(error, match=msg): - ser.transform({"A": [op], "B": ["shift"]}) - - with pytest.raises(error, match=msg): - ser.transform({"A": [op, "shift"], "B": [op]}) - - -def test_transform_partial_failure_valueerror(): - # GH 40211 - def noop(x): - return x - - def raising_op(_): - raise ValueError - - ser = Series(3 * [object]) - msg = "Transform function failed" - - with pytest.raises(ValueError, match=msg): - ser.transform([noop, raising_op]) - - with pytest.raises(ValueError, match=msg): - ser.transform({"A": raising_op, "B": noop}) - - with pytest.raises(ValueError, match=msg): - ser.transform({"A": [raising_op], "B": [noop]}) - - with pytest.raises(ValueError, match=msg): - ser.transform({"A": [noop, raising_op], "B": [noop]}) - - def test_demo(): # demonstration tests s = Series(range(6), dtype="int64", name="series") diff --git a/pandas/tests/apply/test_series_transform.py b/pandas/tests/apply/test_series_transform.py index 82592c4711ece..8eac2ecb8b034 100644 --- a/pandas/tests/apply/test_series_transform.py +++ b/pandas/tests/apply/test_series_transform.py @@ -8,19 +8,23 @@ concat, ) import pandas._testing as tm +from pandas.tests.apply.common import ( + series_transform_kernels, + transform_obj, +) @pytest.mark.parametrize( "args, kwargs, increment", [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)], ) -def test_agg_args(args, kwargs, increment): +def test_agg_args(args, kwargs, increment, series_ops_only): # GH 43357 def f(x, a=0, b=0, c=0): return x + a + 10 * b + 100 * c s = Series([1, 2]) - result = s.transform(f, 0, *args, **kwargs) + result = transform_obj(s, f, *args, series_ops_only=series_ops_only, **kwargs) expected = s + increment tm.assert_series_equal(result, expected) @@ -34,16 +38,16 @@ def f(x, a=0, b=0, c=0): (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), ], ) -def test_transform_listlike(string_series, ops, names): +def test_transform_listlike(string_series, ops, names, series_ops_only): # GH 35964 with np.errstate(all="ignore"): expected = concat([op(string_series) for op in ops], axis=1) expected.columns = names - result = string_series.transform(ops) + result = transform_obj(string_series, ops, series_ops_only=series_ops_only) tm.assert_frame_equal(result, expected) -def test_transform_listlike_func_with_args(): +def test_transform_listlike_func_with_args(series_ops_only): # GH 50624 s = Series([1, 2, 3]) @@ -56,29 +60,91 @@ def foo2(x, b=2, c=0): msg = r"foo1\(\) got an unexpected keyword argument 'b'" with pytest.raises(TypeError, match=msg): - s.transform([foo1, foo2], 0, 3, b=3, c=4) + transform_obj(s, [foo1, foo2], 3, series_ops_only=series_ops_only, b=3, c=4) - result = s.transform([foo1, foo2], 0, 3, c=4) + result = transform_obj(s, [foo1, foo2], 3, series_ops_only=series_ops_only, c=4) expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]}) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("box", [dict, Series]) -def test_transform_dictlike(string_series, box): +def test_transform_dictlike(string_series, box, series_ops_only): # GH 35964 with np.errstate(all="ignore"): expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1) expected.columns = ["foo", "bar"] - result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs})) + op = box({"foo": np.sqrt, "bar": np.abs}) + result = transform_obj(string_series, op, series_ops_only=series_ops_only) tm.assert_frame_equal(result, expected) -def test_transform_dictlike_mixed(): +def test_transform_dictlike_mixed(series_ops_only): # GH 40018 - mix of lists and non-lists in values of a dictionary - df = Series([1, 4]) - result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) + ser = Series([1, 4]) + op = {"b": ["sqrt", "abs"], "c": "sqrt"} + result = transform_obj(ser, op, series_ops_only=series_ops_only) expected = DataFrame( [[1.0, 1, 1.0], [2.0, 4, 2.0]], columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", series_transform_kernels) +def test_transform_partial_failure(op, request, series_ops_only): + # GH 35964 + if op in ("ffill", "bfill", "pad", "backfill", "shift"): + request.node.add_marker( + pytest.mark.xfail(reason=f"{op} is successful on any dtype") + ) + + # Using object makes most transform kernels fail + ser = Series(3 * [object]) + + if op == "ngroup" and series_ops_only: + error = AttributeError + msg = "'ngroup' is not a valid function for 'Series' object" + elif op == "fillna" and series_ops_only: + error = ValueError + msg = "Must specify a fill 'value' or 'method'" + elif op in ("fillna", "ngroup") and not series_ops_only: + error = ValueError + msg = "Transform function failed" + else: + error = TypeError + msg = "|".join( + [ + "not supported between instances of 'type' and 'type'", + "unsupported operand type", + ] + ) + + for func in [ + op, + {"A": op, "B": "shift"}, + {"A": [op], "B": ["shift"]}, + {"A": [op, "shift"], "B": [op]}, + ]: + with pytest.raises(error, match=msg): + transform_obj(ser, func, series_ops_only=series_ops_only) + + +def test_transform_partial_failure_valueerror(series_ops_only): + # GH 40211 + def noop(x): + return x + + def raising_op(_): + raise ValueError + + ser = Series(3 * [object]) + msg = "" if series_ops_only else "Transform function failed" + + for func in [ + [noop, raising_op], + {"A": raising_op, "B": noop}, + {"A": [raising_op], "B": [noop]}, + {"A": [noop, raising_op], "B": [noop]}, + ]: + with pytest.raises(ValueError, match=msg): + transform_obj(ser, func, series_ops_only=series_ops_only) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 76b974330cbf1..32852856fc4bb 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1854,7 +1854,7 @@ def func(ser): tm.assert_frame_equal(df, df_orig) -def test_transform_series(using_copy_on_write): +def test_transform_series(using_copy_on_write, series_ops_only): ser = Series([1, 2, 3]) ser_orig = ser.copy() @@ -1862,7 +1862,11 @@ def func(ser): ser.iloc[0] = 100 return ser - ser.transform(func) + warning_type = None if series_ops_only else FutureWarning + + msg = "Series.transform will in the future only operate on whole series. Set " + with tm.assert_produces_warning(warning_type, match=msg): + ser.transform(func, series_ops_only=series_ops_only) if using_copy_on_write: tm.assert_series_equal(ser, ser_orig) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 1522b83a4f5d0..226019b60b817 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -350,7 +350,11 @@ (pd.DataFrame, frame_data, operator.methodcaller("describe")), (pd.Series, ([1, 2],), operator.methodcaller("pct_change")), (pd.DataFrame, frame_data, operator.methodcaller("pct_change")), - (pd.Series, ([1],), operator.methodcaller("transform", lambda x: x - x.min())), + ( + pd.Series, + ([1],), + operator.methodcaller("transform", lambda x: x - x.min(), series_ops_only=True), + ), ( pd.DataFrame, frame_mi_data, diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index cf3f41e04902c..c408f39abe276 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1186,11 +1186,17 @@ def test_groupby_transform_with_datetimes(func, values): tm.assert_series_equal(result, expected) -def test_groupby_transform_dtype(): +def test_groupby_transform_dtype(series_ops_only): # GH 22243 df = DataFrame({"a": [1], "val": [1.35]}) - result = df["val"].transform(lambda x: x.map(lambda y: f"+{y}")) + warning_type = None if series_ops_only else FutureWarning + + msg = "Series.transform will in the future only operate on whole series. Set " + with tm.assert_produces_warning(warning_type, match=msg): + result = df["val"].transform( + lambda x: x.map(lambda y: f"+{y}"), series_ops_only=series_ops_only + ) expected1 = Series(["+1.35"], name="val", dtype="object") tm.assert_series_equal(result, expected1)