Skip to content

DEPR: deprecate element-wise operations in (Series|DataFrame).transform #54915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions doc/source/user_guide/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1112,14 +1112,14 @@ will be the names of the transforming functions.

.. ipython:: python

tsdf.transform([np.abs, lambda x: x + 1])
tsdf.transform([np.abs, lambda x: x + 1], series_ops_only=True)

Passing multiple functions to a Series will yield a DataFrame. The
resulting column names will be the transforming functions.

.. ipython:: python

tsdf["A"].transform([np.abs, lambda x: x + 1])
tsdf["A"].transform([np.abs, lambda x: x + 1], series_ops_only=True)


Transforming with a dict
Expand All @@ -1130,7 +1130,7 @@ Passing a dict of functions will allow selective transforming per column.

.. ipython:: python

tsdf.transform({"A": np.abs, "B": lambda x: x + 1})
tsdf.transform({"A": np.abs, "B": lambda x: x + 1}, series_ops_only=True)

Passing a dict of lists will generate a MultiIndexed DataFrame with these
selective transforms.
Expand Down
8 changes: 8 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2015,3 +2015,11 @@ def warsaw(request) -> str:
@pytest.fixture()
def arrow_string_storage():
return ("pyarrow", "pyarrow_numpy")


@pytest.fixture(params=[True, False])
def series_ops_only(request):
"""
Parameter used in Series.transform and DataFrame.transform. Remove in pandas v3.0.
"""
return request.param
31 changes: 20 additions & 11 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,14 @@ def transform(self) -> DataFrame | Series:
axis = self.axis
args = self.args
kwargs = self.kwargs
by_row = self.by_row

is_series = obj.ndim == 1

if obj._get_axis_number(axis) == 1:
assert not is_series
return obj.T.transform(func, 0, *args, **kwargs).T
soo = not by_row
return obj.T.transform(func, 0, *args, series_ops_only=soo, **kwargs).T

if is_list_like(func) and not is_dict_like(func):
func = cast(list[AggFuncTypeBase], func)
Expand All @@ -230,14 +232,17 @@ def transform(self) -> DataFrame | Series:
func = cast(AggFuncTypeDict, func)
return self.transform_dict_like(func)

# func is either str or callable
func = cast(AggFuncTypeBase, func)
try:
result = self.transform_str_or_callable(func)
except TypeError:
raise
except Exception as err:
raise ValueError("Transform function failed") from err
if not by_row:
result = obj.apply(func, by_row=by_row, args=args, **kwargs)
else:
# func is either str or callable
func = cast(AggFuncTypeBase, func)
try:
result = self.transform_str_or_callable(func)
except TypeError:
raise
except Exception as err:
raise ValueError("Transform function failed") from err

# Functions that transform may return empty Series/DataFrame
# when the dtype is not appropriate
Expand Down Expand Up @@ -267,6 +272,7 @@ def transform_dict_like(self, func) -> DataFrame:
obj = self.obj
args = self.args
kwargs = self.kwargs
soo = not self.by_row

# transform is currently only for Series/DataFrame
assert isinstance(obj, ABCNDFrame)
Expand All @@ -279,7 +285,7 @@ def transform_dict_like(self, func) -> DataFrame:
results: dict[Hashable, DataFrame | Series] = {}
for name, how in func.items():
colg = obj._gotitem(name, ndim=1)
results[name] = colg.transform(how, 0, *args, **kwargs)
results[name] = colg.transform(how, 0, *args, series_ops_only=soo, **kwargs)
return concat(results, axis=1)

def transform_str_or_callable(self, func) -> DataFrame | Series:
Expand Down Expand Up @@ -602,7 +608,10 @@ def apply_list_or_dict_like(self) -> DataFrame | Series:
Result when self.func is a list-like or dict-like, None otherwise.
"""
if self.axis == 1 and isinstance(self.obj, ABCDataFrame):
return self.obj.T.apply(self.func, 0, args=self.args, **self.kwargs).T
soo = not self.by_row
return self.obj.T.apply(
self.func, 0, args=self.args, series_ops_only=soo, **self.kwargs
).T

func = self.func
kwargs = self.kwargs
Expand Down
23 changes: 21 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9961,11 +9961,30 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs):
axis=_shared_doc_kwargs["axis"],
)
def transform(
self, func: AggFuncType, axis: Axis = 0, *args, **kwargs
self,
func: AggFuncType,
axis: Axis = 0,
*args,
series_ops_only: bool = False,
**kwargs,
) -> DataFrame:
from pandas.core.apply import frame_apply

op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs)
if not series_ops_only and is_list_like(func):
cls_name = type(self).__name__
warnings.warn(
f"{cls_name}.transform will in the future only operate on "
"whole series. Set series_ops_only = True to opt into the new behavior "
f"or use {cls_name}.map to continue operating on series elements.",
FutureWarning,
stacklevel=find_stack_level(),
)

by_row: Literal[False, "compat"] = False if series_ops_only else "compat"

op = frame_apply(
self, func=func, axis=axis, args=args, by_row=by_row, kwargs=kwargs
)
result = op.transform()
assert isinstance(result, DataFrame)
return result
Expand Down
22 changes: 20 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4613,12 +4613,30 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs):
axis=_shared_doc_kwargs["axis"],
)
def transform(
self, func: AggFuncType, axis: Axis = 0, *args, **kwargs
self,
func: AggFuncType,
axis: Axis = 0,
*args,
series_ops_only: bool = False,
**kwargs,
) -> DataFrame | Series:
# Validate axis argument
self._get_axis_number(axis)
if not series_ops_only and not isinstance(func, (str, np.ufunc)):
cls_name = type(self).__name__
warnings.warn(
f"{cls_name}.transform will in the future only operate on "
"whole series. Set series_ops_only = True to opt into the new behavior "
f"or use {cls_name}.map to continue operating on series elements.",
FutureWarning,
stacklevel=find_stack_level(),
)

by_row: Literal[False, "compat"] = False if series_ops_only else "compat"
ser = self.copy(deep=False) if using_copy_on_write() else self
result = SeriesApply(ser, func=func, args=args, kwargs=kwargs).transform()
result = SeriesApply(
ser, func=func, by_row=by_row, args=args, kwargs=kwargs
).transform()
return result

def apply(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@
1 1
2 2
dtype: int64
>>> s.transform([np.sqrt, np.exp])
>>> s.transform([np.sqrt, np.exp]) # doctest: +SKIP
sqrt exp
0 0.000000 1.000000
1 1.000000 2.718282
Expand Down
31 changes: 31 additions & 0 deletions pandas/tests/apply/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,38 @@
import numpy as np

import pandas as pd
import pandas._testing as tm
from pandas.api.types import is_list_like
from pandas.core.groupby.base import transformation_kernels

# There is no Series.cumcount or DataFrame.cumcount
series_transform_kernels = [
x for x in sorted(transformation_kernels) if x != "cumcount"
]
frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"]


def transform_obj(obj, func, *args, axis=0, series_ops_only=False, **kwargs):
"""helper function to ease use of series_ops_only and deprecation warning."""
if isinstance(func, np.ufunc):
result = obj.transform(func, axis, *args, **kwargs)
elif series_ops_only:
result = obj.transform(
func, axis, *args, series_ops_only=series_ops_only, **kwargs
)
elif isinstance(obj, pd.DataFrame) and not is_list_like(func):
result = obj.transform(func, axis, *args, **kwargs)
elif isinstance(func, str):
result = obj.transform(func, axis, *args, **kwargs)
else:
cls_name = type(obj).__name__
msg = (
f"{cls_name}.transform will in the future only operate on "
"whole series. Set series_ops_only = True to opt into the new behavior "
f"or use {cls_name}.map to continue operating on series elements."
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = obj.transform(
func, axis, *args, series_ops_only=series_ops_only, **kwargs
)
return result
Loading