From 9a194fa3fc9d9d286252af981cba934f4cfdb8c5 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 6 Sep 2021 11:16:42 -0400 Subject: [PATCH 1/5] ENH: Experimental Higher Order Methods API --- doc/source/user_guide/homs_api.rst | 76 +++++++++ pandas/core/apply.py | 84 +++++++++- pandas/core/config_init.py | 17 ++ pandas/core/groupby/generic.py | 26 ++++ pandas/tests/apply/test_frame_apply.py | 145 +++++++++++++----- .../tests/groupby/aggregate/test_aggregate.py | 25 ++- pandas/tests/groupby/aggregate/test_other.py | 32 +++- pandas/tests/groupby/test_function.py | 6 +- pandas/tests/groupby/test_groupby.py | 32 ++-- pandas/tests/resample/test_deprecated.py | 11 +- pandas/tests/resample/test_resample_api.py | 35 ++++- pandas/tests/reshape/test_pivot.py | 12 +- pandas/tests/window/test_api.py | 14 +- 13 files changed, 433 insertions(+), 82 deletions(-) create mode 100644 doc/source/user_guide/homs_api.rst diff --git a/doc/source/user_guide/homs_api.rst b/doc/source/user_guide/homs_api.rst new file mode 100644 index 0000000000000..f319ab49433a6 --- /dev/null +++ b/doc/source/user_guide/homs_api.rst @@ -0,0 +1,76 @@ +.. _homs: + +:orphan: + +{{ header }} + +*************************** +pandas Higher Order Methods +*************************** + +pandas is experimenting with improving the behavior of higher order methods (HOMs). These +are methods that take a function as an argument, often a user-defined function (UDF). +They include ``.apply``, ``.agg``, ``.transform``, and ``.filter``. The goal is to make +these methods behave in a more predictable and consistent manner, reducing the complexity +of their implementation, and improving performance where possible. This page details the +differences between the old and new behaviors, as well as providing some context behind +each change that is being made. + +There are a great number of changes that are planned. In order to transition in a +reasonable manner for users, all changes are behind an experimental "use_hom_api" +option. When enabled, pandas HOMs are subject to breaking changes without notice. +Users can opt into the new behavior and provide feedback. Once the improvements have +been made, this option will be declared no longer experimental. At this point, any +breaking changes will happen only when preceded by a ``FutureWarning`` and when +pandas releases a major version. After a period of community feedback, and when the +behavior is deemed ready for release, pandas will then raise a ``FutureWarning`` that +the default value of this option will be set to ``True`` in a future version. Once the +default is ``True``, users can still override it to ``False``. After a sufficient +amount of time, pandas will remove this option altogether and only the new behavior +will remain. + +``DataFrame.agg`` with list-likes +--------------------------------- + +Previously, using ``DataFrame.agg`` with a list-like argument would transpose the result when +compared with just providing a single aggregation function. + +.. ipython:: python + + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + + df.agg("sum") + df.agg(["sum"]) + +This transpose no longer occurs, making the result more consistent. + +.. ipython:: python + + with pd.option_context("use_hom_api", True): + result = df.agg(["sum"]) + result + + with pd.option_context("use_hom_api", True): + result = df.agg(["sum", "mean"]) + result + +``DataFrame.groupby(...).agg`` with list-likes +---------------------------------------------- + +Previously, using ``DataFrame.groupby(...).agg`` with a list-like argument would put the +columns as the first level of the resulting hierarchical columns. The result is +that the columns for each aggregation function are separated, inconsistent with the result +for a single aggregator. + +.. ipython:: python + + df.groupby("a").agg("sum") + df.groupby("a").agg(["sum", "min"]) + +Now the levels are swapped, so that the columns for each aggregation are together. + +.. ipython:: python + + with pd.option_context("use_hom_api", True): + result = df.groupby("a").agg(["sum", "min"]) + result diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 238f1382890c9..5dc080a3313b9 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -22,7 +22,10 @@ import numpy as np -from pandas._config import option_context +from pandas._config import ( + get_option, + option_context, +) from pandas._libs import lib from pandas._typing import ( @@ -168,7 +171,10 @@ def agg(self) -> DataFrame | Series | None: return self.agg_dict_like() elif is_list_like(arg): # we require a list, but not a 'str' - return self.agg_list_like() + if get_option("use_hom_api"): + return self.hom_list_like("agg") + else: + return self.agg_list_like() if callable(arg): f = com.get_cython_func(arg) @@ -442,6 +448,80 @@ def agg_list_like(self) -> DataFrame | Series: ) return concatenated.reindex(full_ordered_index, copy=False) + def hom_list_single_arg( + self, method: str, a: AggFuncTypeBase, result_dim: int | None + ) -> tuple[int | None, AggFuncTypeBase | None, DataFrame | Series | None]: + name = None + result = None + try: + if isinstance(a, (tuple, list)): + # Handle (name, value) pairs + name, a = a + else: + name = com.get_callable_name(a) or a + result = getattr(self.obj, method)(a) + if result_dim is None: + result_dim = getattr(result, "ndim", 0) + elif getattr(result, "ndim", 0) != result_dim: + raise ValueError("cannot combine transform and aggregation operations") + except (TypeError, DataError): + warnings.warn( + f"{name} did not aggregate successfully. If any error is " + "raised this will raise in a future version of pandas. " + "Drop these columns/ops to avoid this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return result_dim, name, result + + def hom_list_like(self, method: str) -> DataFrame | Series: + """ + Compute aggregation in the case of a list-like argument. + + Returns + ------- + Result of aggregation. + """ + from pandas.core.reshape.concat import concat + + obj = self.obj + arg = cast(List[AggFuncTypeBase], self.f) + + results = [] + keys = [] + result_dim = None + + for a in arg: + result_dim, name, new_res = self.hom_list_single_arg(method, a, result_dim) + if new_res is not None: + results.append(new_res) + keys.append(name) + + # if we are empty + if not len(results): + raise ValueError("no results") + + try: + concatenated = concat(results, keys=keys, axis=1, sort=False) + except TypeError: + # we are concatting non-NDFrame objects, + # e.g. a list of scalars + from pandas import Series + + result = Series(results, index=keys, name=obj.name) + return result + else: + # Concat uses the first index to determine the final indexing order. + # The union of a shorter first index with the other indices causes + # the index sorting to be different from the order of the aggregating + # functions. Reindex if this is the case. + index_size = concatenated.index.size + full_ordered_index = next( + result.index for result in results if result.index.size == index_size + ) + return concatenated.reindex(full_ordered_index, copy=False) + def agg_dict_like(self) -> DataFrame | Series: """ Compute aggregation in the case of a dict-like argument. diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index bf2d770ee1e7f..f49f869e0550d 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -526,6 +526,23 @@ def use_inf_as_na_cb(key): validator=is_one_of_factory(["block", "array"]), ) +use_hom_api = """ +: boolean + Whether to use the Higher Order Methods implementations. Currently experimental. + Defaults to False. +""" + + +with cf.config_prefix("mode"): + cf.register_option( + "use_hom_api", + # Get the default from an environment variable, if set, otherwise defaults + # to False. This environment variable can be set for testing. + os.environ.get("PANDAS_USE_HOM_API", "false").lower() == "true", + use_hom_api, + validator=is_bool, + ) + # user warnings chained_assignment = """ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 29411b9c722a9..5741b2d6d9596 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -26,6 +26,8 @@ import numpy as np +from pandas._config import get_option + from pandas._libs import reduction as libreduction from pandas._typing import ( ArrayLike, @@ -876,6 +878,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) result.columns = columns if result is None: + if get_option("use_hom_api"): + return self._hom_agg(func, args, kwargs) # grouper specific aggregations if self.grouper.nkeys > 1: @@ -926,6 +930,28 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) return result + def _hom_agg(self, func, args, kwargs): + if args or kwargs: + # test_pass_args_kwargs gets here (with and without as_index) + # can't return early + result = self._aggregate_frame(func, *args, **kwargs) + + elif self.axis == 1 and self.grouper.nkeys == 1: + # _aggregate_multiple_funcs does not allow self.axis == 1 + # Note: axis == 1 precludes 'not self.as_index', see __init__ + result = self._aggregate_frame(func) + return result + else: + # test_groupby_as_index_series_scalar gets here + # with 'not self.as_index' + return self._python_agg_general(func, *args, **kwargs) + + if not self.as_index: + self._insert_inaxis_grouper_inplace(result) + result.index = Index(range(len(result))) + + return result + agg = aggregate def _iterate_slices(self) -> Iterable[Series]: diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 98872571ae2bb..dedca296b30ce 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -13,6 +13,7 @@ Series, Timestamp, date_range, + get_option, ) import pandas._testing as tm from pandas.tests.frame.common import zip_frames @@ -682,6 +683,8 @@ def test_apply_dup_names_multi_agg(): # GH 21063 df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"]) expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"]) + if get_option("use_hom_api"): + expected = expected.T result = df.agg(["min"]) tm.assert_frame_equal(result, expected) @@ -1080,25 +1083,46 @@ def test_agg_transform(axis, float_frame): # list-like result = float_frame.apply([np.sqrt], axis=axis) expected = f_sqrt.copy() - if axis in {0, "index"}: - expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]]) + if get_option("use_hom_api"): + if axis in {0, "index"}: + expected.columns = MultiIndex.from_product( + [["sqrt"], float_frame.columns] + ) + else: + expected.index = MultiIndex.from_product([["sqrt"], float_frame.index]) else: - expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]]) + if axis in {0, "index"}: + expected.columns = MultiIndex.from_product( + [float_frame.columns, ["sqrt"]] + ) + else: + expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]]) tm.assert_frame_equal(result, expected) # multiple items in list # these are in the order as if we are applying both # functions per series and then concatting result = float_frame.apply([np.abs, np.sqrt], axis=axis) - expected = zip_frames([f_abs, f_sqrt], axis=other_axis) - if axis in {0, "index"}: - expected.columns = MultiIndex.from_product( - [float_frame.columns, ["absolute", "sqrt"]] - ) + if get_option("use_hom_api"): + expected = pd.concat([f_abs, f_sqrt], axis=other_axis) + if axis in {0, "index"}: + expected.columns = MultiIndex.from_product( + [["absolute", "sqrt"], float_frame.columns] + ) + else: + expected.index = MultiIndex.from_product( + [["absolute", "sqrt"], float_frame.index] + ) else: - expected.index = MultiIndex.from_product( - [float_frame.index, ["absolute", "sqrt"]] - ) + expected = zip_frames([f_abs, f_sqrt], axis=other_axis) + if axis in {0, "index"}: + expected.columns = MultiIndex.from_product( + [float_frame.columns, ["absolute", "sqrt"]] + ) + else: + expected.index = MultiIndex.from_product( + [float_frame.index, ["absolute", "sqrt"]] + ) tm.assert_frame_equal(result, expected) @@ -1110,6 +1134,8 @@ def test_demo(): expected = DataFrame( {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"] ) + if get_option("use_hom_api"): + expected = expected.T tm.assert_frame_equal(result, expected) @@ -1160,22 +1186,31 @@ def test_agg_multiple_mixed_no_warning(): }, index=["min", "sum"], ) + if get_option("use_hom_api"): + expected = expected.T + match = "Dropping of nuisance columns" + else: + match = "did not aggregate successfully" # sorted index - with tm.assert_produces_warning( - FutureWarning, match=r"\['D'\] did not aggregate successfully" - ): + with tm.assert_produces_warning(FutureWarning, match=match): result = mdf.agg(["min", "sum"]) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning( - FutureWarning, match=r"\['D'\] did not aggregate successfully" - ): + if get_option("use_hom_api"): + match = "Dropping of nuisance columns" + else: + match = "did not aggregate successfully" + + with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False): result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"]) # GH40420: the result of .agg should have an index that is sorted # according to the arguments provided to agg. - expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"]) + if get_option("use_hom_api"): + expected = expected.loc[["D", "C", "B", "A"], ["sum", "min"]] + else: + expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"]) tm.assert_frame_equal(result, expected) @@ -1194,6 +1229,8 @@ def test_agg_reduce(axis, float_frame): ) expected.columns = ["mean", "max", "sum"] expected = expected.T if axis in {0, "index"} else expected + if get_option("use_hom_api"): + expected = expected.T result = float_frame.agg(["mean", "max", "sum"], axis=axis) tm.assert_frame_equal(result, expected) @@ -1270,6 +1307,8 @@ def test_nuiscance_columns(): index=["min"], columns=df.columns, ) + if get_option("use_hom_api"): + expected = expected.T tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match="Select only valid"): @@ -1277,13 +1316,17 @@ def test_nuiscance_columns(): expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning( - FutureWarning, match=r"\['D'\] did not aggregate successfully" - ): + if get_option("use_hom_api"): + match = "Select only valid" + else: + match = "did not aggregate successfully" + with tm.assert_produces_warning(FutureWarning, match=match): result = df.agg(["sum"]) expected = DataFrame( [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"] ) + if get_option("use_hom_api"): + expected = expected.T tm.assert_frame_equal(result, expected) @@ -1323,8 +1366,12 @@ def test_non_callable_aggregates(how): } ) - tm.assert_frame_equal(result1, result2, check_like=True) - tm.assert_frame_equal(result2, expected, check_like=True) + if get_option("use_hom_api"): + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result1, expected.T) + else: + tm.assert_frame_equal(result1, result2, check_like=True) + tm.assert_frame_equal(result2, expected, check_like=True) # Just functional string arg is same as calling df.arg() result = getattr(df, how)("count") @@ -1361,7 +1408,9 @@ def func(group_col): tm.assert_series_equal(result, expected) result = df.agg([func]) - expected = expected.to_frame("func").T + expected = expected.to_frame("func") + if not get_option("use_hom_api"): + expected = expected.T tm.assert_frame_equal(result, expected) @@ -1474,14 +1523,20 @@ def test_apply_empty_list_reduce(): tm.assert_series_equal(result, expected) -def test_apply_no_suffix_index(): +def test_apply_no_suffix_index(request): # GH36189 pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"]) - result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) - expected = DataFrame( - {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "", ""] - ) - + result = pdf.apply([np.square, lambda x: x, lambda x: x]) + if get_option("use_hom_api"): + columns = MultiIndex.from_product( + [["square", "", ""], ["A", "B"]] + ) + expected = DataFrame(3 * [[16, 81, 4, 9, 4, 9]], columns=columns) + else: + columns = MultiIndex.from_product( + [["A", "B"], ["square", "", ""]] + ) + expected = DataFrame(3 * [[16, 4, 4, 81, 9, 9]], columns=columns) tm.assert_frame_equal(result, expected) @@ -1513,18 +1568,28 @@ def foo(s): aggs = ["sum", foo, "count", "min"] with tm.assert_produces_warning( - FutureWarning, match=r"\['item'\] did not aggregate successfully" + FutureWarning, match="did not aggregate successfully" ): result = df.agg(aggs) - expected = DataFrame( - { - "item": ["123456", np.nan, 6, "1"], - "att1": [21.0, 10.5, 6.0, 1.0], - "att2": [18.0, 9.0, 6.0, 0.0], - "att3": [17.0, 8.5, 6.0, 0.0], - }, - index=["sum", "foo", "count", "min"], - ) + if get_option("use_hom_api"): + expected = DataFrame( + { + "sum": ["123456", 21, 18, 17], + "count": [6, 6, 6, 6], + "min": ["1", 1, 0, 0], + }, + index=["item", "att1", "att2", "att3"], + ) + else: + expected = DataFrame( + { + "item": ["123456", np.nan, 6, "1"], + "att1": [21.0, 10.5, 6.0, 1.0], + "att2": [18.0, 9.0, 6.0, 0.0], + "att3": [17.0, 8.5, 6.0, 0.0], + }, + index=["sum", "foo", "count", "min"], + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 2ab553434873c..14832fdb6f265 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -18,6 +18,7 @@ MultiIndex, Series, concat, + get_option, to_datetime, ) import pandas._testing as tm @@ -383,12 +384,13 @@ def test_multiple_functions_tuples_and_non_tuples(df): expected = df.groupby("A")["C"].agg(ex_funcs) tm.assert_frame_equal(result, expected) + klass = None if get_option("use_hom_api") else FutureWarning with tm.assert_produces_warning( - FutureWarning, match=r"\['B'\] did not aggregate successfully" + klass, match=r"\['B'\] did not aggregate successfully" ): result = df.groupby("A").agg(funcs) with tm.assert_produces_warning( - FutureWarning, match=r"\['B'\] did not aggregate successfully" + klass, match=r"\['B'\] did not aggregate successfully" ): expected = df.groupby("A").agg(ex_funcs) tm.assert_frame_equal(result, expected) @@ -549,12 +551,18 @@ def test_order_aggregate_multiple_funcs(): # GH 25692 df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]}) - res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"]) - result = res.columns.levels[1] + if get_option("use_hom_api"): + # TODO (GH 35725): This will not raise when agg-must-agg is implemented + msg = "Cannot concat indices that do not have the same number of levels" + with pytest.raises(AssertionError, match=msg): + df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"]) + else: + res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"]) + result = res.columns.levels[1] - expected = Index(["sum", "max", "mean", "ohlc", "min"]) + expected = Index(["sum", "max", "mean", "ohlc", "min"]) - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize("dtype", [np.int64, np.uint64]) @@ -1267,7 +1275,10 @@ def test_nonagg_agg(): g = df.groupby("a") result = g.agg(["cumsum"]) - result.columns = result.columns.droplevel(-1) + if get_option("use_hom_api"): + result.columns = result.columns.droplevel(0) + else: + result.columns = result.columns.droplevel(-1) expected = g.agg("cumsum") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 06044ddd3f4b8..a8e036a5b654e 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import get_option + import pandas as pd from pandas import ( DataFrame, @@ -42,16 +44,21 @@ def test_agg_api(): def peak_to_peak(arr): return arr.max() - arr.min() + if get_option("use_hom_api"): + msg = "Dropping invalid columns" + else: + msg = r"\['key2'\] did not aggregate successfully" + with tm.assert_produces_warning( FutureWarning, - match=r"\['key2'\] did not aggregate successfully", + match=msg, ): expected = grouped.agg([peak_to_peak]) expected.columns = ["data1", "data2"] with tm.assert_produces_warning( FutureWarning, - match=r"\['key2'\] did not aggregate successfully", + match=msg, ): result = grouped.agg(peak_to_peak) tm.assert_frame_equal(result, expected) @@ -201,13 +208,21 @@ def test_aggregate_api_consistency(): tm.assert_frame_equal(result, expected, check_like=True) result = grouped.agg([np.sum, np.mean]) - expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1) - expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]]) + if get_option("use_hom_api"): + expected = pd.concat([c_sum, d_sum, c_mean, d_mean], axis=1) + expected.columns = MultiIndex.from_product([["sum", "mean"], ["C", "D"]]) + else: + expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1) + expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]]) tm.assert_frame_equal(result, expected, check_like=True) result = grouped[["D", "C"]].agg([np.sum, np.mean]) - expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1) - expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]]) + if get_option("use_hom_api"): + expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1) + expected.columns = MultiIndex.from_product([["sum", "mean"], ["D", "C"]]) + else: + expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1) + expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]]) tm.assert_frame_equal(result, expected, check_like=True) result = grouped.agg({"C": "mean", "D": "sum"}) @@ -393,7 +408,10 @@ def P1(a): g = df.groupby("date") expected = g.agg([P1]) - expected.columns = expected.columns.levels[0] + if get_option("use_hom_api"): + expected.columns = expected.columns.levels[1] + else: + expected.columns = expected.columns.levels[0] result = g.agg(P1) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index dbc38497d3bee..c492f9d3fcd21 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -14,6 +14,7 @@ Series, Timestamp, date_range, + get_option, ) import pandas._testing as tm import pandas.core.nanops as nanops @@ -1148,7 +1149,10 @@ def test_apply_to_nullable_integer_returns_float(values, function): tm.assert_frame_equal(result, expected) result = groups.agg([function]) - expected.columns = MultiIndex.from_tuples([("b", function)]) + if get_option("use_hom_api"): + expected.columns = MultiIndex.from_tuples([(function, "b")]) + else: + expected.columns = MultiIndex.from_tuples([("b", function)]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index fb2b9f0632f0d..ff677e0e51d1d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -19,6 +19,7 @@ Timedelta, Timestamp, date_range, + get_option, to_datetime, ) import pandas._testing as tm @@ -587,15 +588,23 @@ def test_frame_multi_key_function_list(): grouped = data.groupby(["A", "B"]) funcs = [np.mean, np.std] + klass = None if get_option("use_hom_api") else FutureWarning with tm.assert_produces_warning( - FutureWarning, match=r"\['C'\] did not aggregate successfully" + klass, match=r"\['C'\] did not aggregate successfully" ): agged = grouped.agg(funcs) - expected = pd.concat( - [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)], - keys=["D", "E", "F"], - axis=1, - ) + if get_option("use_hom_api"): + expected = pd.concat( + [grouped.agg(funcs[0]), grouped.agg(funcs[1])], + keys=["mean", "std"], + axis=1, + ) + else: + expected = pd.concat( + [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)], + keys=["D", "E", "F"], + axis=1, + ) assert isinstance(agged.index, MultiIndex) assert isinstance(expected.index, MultiIndex) tm.assert_frame_equal(agged, expected) @@ -2091,9 +2100,14 @@ def test_groupby_agg_ohlc_non_first(): index=date_range("2018-01-01", periods=2, freq="D", name="dti"), ) - result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"]) - - tm.assert_frame_equal(result, expected) + if get_option("use_hom_api"): + # TODO (GH 35725): This will not raise when agg-must-agg is implemented + msg = "Cannot concat indices that do not have the same number of levels" + with pytest.raises(AssertionError, match=msg): + df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"]) + else: + result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"]) + tm.assert_frame_equal(result, expected) def test_groupby_multiindex_nat(): diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py index 126ca05ca1546..a3332e275fe54 100644 --- a/pandas/tests/resample/test_deprecated.py +++ b/pandas/tests/resample/test_deprecated.py @@ -10,6 +10,7 @@ from pandas import ( DataFrame, Series, + get_option, ) import pandas._testing as tm from pandas.core.indexes.datetimes import date_range @@ -97,7 +98,10 @@ def test_resample_loffset_arg_type(frame, create_index, arg): result_agg = df.resample("2D", loffset="2H").agg(arg) if isinstance(arg, list): - expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) + if get_option("use_hom_api"): + expected.columns = pd.MultiIndex.from_tuples([("mean", "value")]) + else: + expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) tm.assert_frame_equal(result_agg, expected) @@ -216,7 +220,10 @@ def test_loffset_returns_datetimeindex(frame, kind, agg_arg): with tm.assert_produces_warning(FutureWarning): result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg) if isinstance(agg_arg, list): - expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) + if get_option("use_hom_api"): + expected.columns = pd.MultiIndex.from_tuples([("mean", "value")]) + else: + expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) tm.assert_frame_equal(result_agg, expected) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 86e0411ee3334..f25d618847bde 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -8,6 +8,7 @@ DataFrame, NamedAgg, Series, + get_option, ) import pandas._testing as tm from pandas.core.indexes.datetimes import date_range @@ -348,10 +349,17 @@ def test_agg(): b_std = r["B"].std() b_sum = r["B"].sum() - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) + if get_option("use_hom_api"): + expected = pd.concat([a_mean, b_mean, a_std, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([["mean", "std"], ["A", "B"]]) + else: + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) for t in cases: - warn = FutureWarning if t in cases[1:3] else None + if t in cases[1:3] and not get_option("use_hom_api"): + warn = FutureWarning + else: + warn = None with tm.assert_produces_warning( warn, match=r"\['date'\] did not aggregate successfully", @@ -630,11 +638,22 @@ def test_agg_with_datetime_index_list_agg_func(col_name): columns=[col_name], ) result = df.resample("1d").aggregate(["mean"]) - expected = DataFrame( - [47.5, 143.5, 195.5], - index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"), - columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]), - ) + if get_option("use_hom_api"): + expected = DataFrame( + [47.5, 143.5, 195.5], + index=date_range( + start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin" + ), + columns=pd.MultiIndex(levels=[["mean"], [col_name]], codes=[[0], [0]]), + ) + else: + expected = DataFrame( + [47.5, 143.5, 195.5], + index=date_range( + start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin" + ), + columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]), + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index a38cf8b067b27..060c4a36ca4be 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -8,7 +8,7 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning +from pandas._config import get_option import pandas as pd from pandas import ( @@ -1911,8 +1911,14 @@ def test_pivot_margins_name_unicode(self): frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek ) index = Index([1, 2, 3, greek], dtype="object", name="foo") - expected = DataFrame(index=index) - tm.assert_frame_equal(table, expected) + + if get_option("use_hom_api"): + expected = Series([1, 1, 1, 3], index=index) + expected.index.name = None + tm.assert_series_equal(table, expected) + else: + expected = DataFrame(index=index) + tm.assert_frame_equal(table, expected) def test_pivot_string_as_func(self): # GH #18713 diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index f84a579247630..95e15ed0a79c3 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -10,6 +10,7 @@ Timestamp, concat, date_range, + get_option, timedelta_range, ) import pandas._testing as tm @@ -90,8 +91,12 @@ def test_agg(): b_std = r["B"].std() result = r.aggregate([np.mean, np.std]) - expected = concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]]) + if get_option("use_hom_api"): + expected = concat([a_mean, b_mean, a_std, b_std], axis=1) + expected.columns = MultiIndex.from_product([["mean", "std"], ["A", "B"]]) + else: + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]]) tm.assert_frame_equal(result, expected) result = r.aggregate({"A": np.mean, "B": np.std}) @@ -147,7 +152,10 @@ def test_agg_consistency(): r = df.rolling(window=3) result = r.agg([np.sum, np.mean]).columns - expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]]) + if get_option("use_hom_api"): + expected = MultiIndex.from_product([["sum", "mean"], list("AB")]) + else: + expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]]) tm.assert_index_equal(result, expected) result = r["A"].agg([np.sum, np.mean]).columns From 77b6f1e70321d34d1f9e08cc6d269585309f3ac5 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 22 Jan 2022 20:50:38 -0500 Subject: [PATCH 2/5] use_hom_api -> api.use_hom --- pandas/core/apply.py | 2 +- pandas/core/config_init.py | 10 +++---- pandas/core/groupby/generic.py | 2 +- pandas/tests/apply/test_frame_apply.py | 30 +++++++++---------- .../tests/groupby/aggregate/test_aggregate.py | 6 ++-- pandas/tests/groupby/aggregate/test_other.py | 8 ++--- pandas/tests/groupby/test_function.py | 2 +- pandas/tests/groupby/test_groupby.py | 6 ++-- pandas/tests/resample/test_deprecated.py | 4 +-- pandas/tests/resample/test_resample_api.py | 6 ++-- pandas/tests/reshape/test_pivot.py | 4 ++- pandas/tests/window/test_api.py | 4 +-- 12 files changed, 43 insertions(+), 41 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 5dc080a3313b9..0bd7c22920186 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -171,7 +171,7 @@ def agg(self) -> DataFrame | Series | None: return self.agg_dict_like() elif is_list_like(arg): # we require a list, but not a 'str' - if get_option("use_hom_api"): + if get_option("api.use_hom"): return self.hom_list_like("agg") else: return self.agg_list_like() diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index f49f869e0550d..018a948239f22 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -526,20 +526,20 @@ def use_inf_as_na_cb(key): validator=is_one_of_factory(["block", "array"]), ) -use_hom_api = """ +use_hom_doc = """ : boolean Whether to use the Higher Order Methods implementations. Currently experimental. Defaults to False. """ -with cf.config_prefix("mode"): +with cf.config_prefix("api"): cf.register_option( - "use_hom_api", + "use_hom", # Get the default from an environment variable, if set, otherwise defaults # to False. This environment variable can be set for testing. - os.environ.get("PANDAS_USE_HOM_API", "false").lower() == "true", - use_hom_api, + os.environ.get("PANDAS_USE_HOM", "false").lower() == "true", + use_hom_doc, validator=is_bool, ) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5741b2d6d9596..089060495298e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -878,7 +878,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) result.columns = columns if result is None: - if get_option("use_hom_api"): + if get_option("api.use_hom"): return self._hom_agg(func, args, kwargs) # grouper specific aggregations diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index dedca296b30ce..5974de1ed5457 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -683,7 +683,7 @@ def test_apply_dup_names_multi_agg(): # GH 21063 df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"]) expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"]) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = expected.T result = df.agg(["min"]) @@ -1083,7 +1083,7 @@ def test_agg_transform(axis, float_frame): # list-like result = float_frame.apply([np.sqrt], axis=axis) expected = f_sqrt.copy() - if get_option("use_hom_api"): + if get_option("api.use_hom"): if axis in {0, "index"}: expected.columns = MultiIndex.from_product( [["sqrt"], float_frame.columns] @@ -1103,7 +1103,7 @@ def test_agg_transform(axis, float_frame): # these are in the order as if we are applying both # functions per series and then concatting result = float_frame.apply([np.abs, np.sqrt], axis=axis) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = pd.concat([f_abs, f_sqrt], axis=other_axis) if axis in {0, "index"}: expected.columns = MultiIndex.from_product( @@ -1134,7 +1134,7 @@ def test_demo(): expected = DataFrame( {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"] ) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = expected.T tm.assert_frame_equal(result, expected) @@ -1186,7 +1186,7 @@ def test_agg_multiple_mixed_no_warning(): }, index=["min", "sum"], ) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = expected.T match = "Dropping of nuisance columns" else: @@ -1197,7 +1197,7 @@ def test_agg_multiple_mixed_no_warning(): tm.assert_frame_equal(result, expected) - if get_option("use_hom_api"): + if get_option("api.use_hom"): match = "Dropping of nuisance columns" else: match = "did not aggregate successfully" @@ -1207,7 +1207,7 @@ def test_agg_multiple_mixed_no_warning(): # GH40420: the result of .agg should have an index that is sorted # according to the arguments provided to agg. - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = expected.loc[["D", "C", "B", "A"], ["sum", "min"]] else: expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"]) @@ -1229,7 +1229,7 @@ def test_agg_reduce(axis, float_frame): ) expected.columns = ["mean", "max", "sum"] expected = expected.T if axis in {0, "index"} else expected - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = expected.T result = float_frame.agg(["mean", "max", "sum"], axis=axis) @@ -1307,7 +1307,7 @@ def test_nuiscance_columns(): index=["min"], columns=df.columns, ) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = expected.T tm.assert_frame_equal(result, expected) @@ -1316,7 +1316,7 @@ def test_nuiscance_columns(): expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) tm.assert_series_equal(result, expected) - if get_option("use_hom_api"): + if get_option("api.use_hom"): match = "Select only valid" else: match = "did not aggregate successfully" @@ -1325,7 +1325,7 @@ def test_nuiscance_columns(): expected = DataFrame( [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"] ) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = expected.T tm.assert_frame_equal(result, expected) @@ -1366,7 +1366,7 @@ def test_non_callable_aggregates(how): } ) - if get_option("use_hom_api"): + if get_option("api.use_hom"): tm.assert_frame_equal(result2, expected) tm.assert_frame_equal(result1, expected.T) else: @@ -1409,7 +1409,7 @@ def func(group_col): result = df.agg([func]) expected = expected.to_frame("func") - if not get_option("use_hom_api"): + if not get_option("api.use_hom"): expected = expected.T tm.assert_frame_equal(result, expected) @@ -1527,7 +1527,7 @@ def test_apply_no_suffix_index(request): # GH36189 pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"]) result = pdf.apply([np.square, lambda x: x, lambda x: x]) - if get_option("use_hom_api"): + if get_option("api.use_hom"): columns = MultiIndex.from_product( [["square", "", ""], ["A", "B"]] ) @@ -1571,7 +1571,7 @@ def foo(s): FutureWarning, match="did not aggregate successfully" ): result = df.agg(aggs) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = DataFrame( { "sum": ["123456", 21, 18, 17], diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 14832fdb6f265..31f688f5d2b0c 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -384,7 +384,7 @@ def test_multiple_functions_tuples_and_non_tuples(df): expected = df.groupby("A")["C"].agg(ex_funcs) tm.assert_frame_equal(result, expected) - klass = None if get_option("use_hom_api") else FutureWarning + klass = None if get_option("api.use_hom") else FutureWarning with tm.assert_produces_warning( klass, match=r"\['B'\] did not aggregate successfully" ): @@ -551,7 +551,7 @@ def test_order_aggregate_multiple_funcs(): # GH 25692 df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]}) - if get_option("use_hom_api"): + if get_option("api.use_hom"): # TODO (GH 35725): This will not raise when agg-must-agg is implemented msg = "Cannot concat indices that do not have the same number of levels" with pytest.raises(AssertionError, match=msg): @@ -1275,7 +1275,7 @@ def test_nonagg_agg(): g = df.groupby("a") result = g.agg(["cumsum"]) - if get_option("use_hom_api"): + if get_option("api.use_hom"): result.columns = result.columns.droplevel(0) else: result.columns = result.columns.droplevel(-1) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index a8e036a5b654e..3b7dfdda46432 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -44,7 +44,7 @@ def test_agg_api(): def peak_to_peak(arr): return arr.max() - arr.min() - if get_option("use_hom_api"): + if get_option("api.use_hom"): msg = "Dropping invalid columns" else: msg = r"\['key2'\] did not aggregate successfully" @@ -208,7 +208,7 @@ def test_aggregate_api_consistency(): tm.assert_frame_equal(result, expected, check_like=True) result = grouped.agg([np.sum, np.mean]) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = pd.concat([c_sum, d_sum, c_mean, d_mean], axis=1) expected.columns = MultiIndex.from_product([["sum", "mean"], ["C", "D"]]) else: @@ -217,7 +217,7 @@ def test_aggregate_api_consistency(): tm.assert_frame_equal(result, expected, check_like=True) result = grouped[["D", "C"]].agg([np.sum, np.mean]) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1) expected.columns = MultiIndex.from_product([["sum", "mean"], ["D", "C"]]) else: @@ -408,7 +408,7 @@ def P1(a): g = df.groupby("date") expected = g.agg([P1]) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected.columns = expected.columns.levels[1] else: expected.columns = expected.columns.levels[0] diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index c492f9d3fcd21..dea467b10b157 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1149,7 +1149,7 @@ def test_apply_to_nullable_integer_returns_float(values, function): tm.assert_frame_equal(result, expected) result = groups.agg([function]) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected.columns = MultiIndex.from_tuples([(function, "b")]) else: expected.columns = MultiIndex.from_tuples([("b", function)]) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ff677e0e51d1d..3fefedba25597 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -588,12 +588,12 @@ def test_frame_multi_key_function_list(): grouped = data.groupby(["A", "B"]) funcs = [np.mean, np.std] - klass = None if get_option("use_hom_api") else FutureWarning + klass = None if get_option("api.use_hom") else FutureWarning with tm.assert_produces_warning( klass, match=r"\['C'\] did not aggregate successfully" ): agged = grouped.agg(funcs) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = pd.concat( [grouped.agg(funcs[0]), grouped.agg(funcs[1])], keys=["mean", "std"], @@ -2100,7 +2100,7 @@ def test_groupby_agg_ohlc_non_first(): index=date_range("2018-01-01", periods=2, freq="D", name="dti"), ) - if get_option("use_hom_api"): + if get_option("api.use_hom"): # TODO (GH 35725): This will not raise when agg-must-agg is implemented msg = "Cannot concat indices that do not have the same number of levels" with pytest.raises(AssertionError, match=msg): diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py index a3332e275fe54..bcce8c80444f0 100644 --- a/pandas/tests/resample/test_deprecated.py +++ b/pandas/tests/resample/test_deprecated.py @@ -98,7 +98,7 @@ def test_resample_loffset_arg_type(frame, create_index, arg): result_agg = df.resample("2D", loffset="2H").agg(arg) if isinstance(arg, list): - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected.columns = pd.MultiIndex.from_tuples([("mean", "value")]) else: expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) @@ -220,7 +220,7 @@ def test_loffset_returns_datetimeindex(frame, kind, agg_arg): with tm.assert_produces_warning(FutureWarning): result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg) if isinstance(agg_arg, list): - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected.columns = pd.MultiIndex.from_tuples([("mean", "value")]) else: expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index f25d618847bde..2f36d9d78de88 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -349,14 +349,14 @@ def test_agg(): b_std = r["B"].std() b_sum = r["B"].sum() - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = pd.concat([a_mean, b_mean, a_std, b_std], axis=1) expected.columns = pd.MultiIndex.from_product([["mean", "std"], ["A", "B"]]) else: expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) for t in cases: - if t in cases[1:3] and not get_option("use_hom_api"): + if t in cases[1:3] and not get_option("api.use_hom"): warn = FutureWarning else: warn = None @@ -638,7 +638,7 @@ def test_agg_with_datetime_index_list_agg_func(col_name): columns=[col_name], ) result = df.resample("1d").aggregate(["mean"]) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = DataFrame( [47.5, 143.5, 195.5], index=date_range( diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 060c4a36ca4be..2be67d827a65e 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -10,6 +10,8 @@ from pandas._config import get_option +from pandas.errors import PerformanceWarning + import pandas as pd from pandas import ( Categorical, @@ -1912,7 +1914,7 @@ def test_pivot_margins_name_unicode(self): ) index = Index([1, 2, 3, greek], dtype="object", name="foo") - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = Series([1, 1, 1, 3], index=index) expected.index.name = None tm.assert_series_equal(table, expected) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 95e15ed0a79c3..3f970ddb26346 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -91,7 +91,7 @@ def test_agg(): b_std = r["B"].std() result = r.aggregate([np.mean, np.std]) - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = concat([a_mean, b_mean, a_std, b_std], axis=1) expected.columns = MultiIndex.from_product([["mean", "std"], ["A", "B"]]) else: @@ -152,7 +152,7 @@ def test_agg_consistency(): r = df.rolling(window=3) result = r.agg([np.sum, np.mean]).columns - if get_option("use_hom_api"): + if get_option("api.use_hom"): expected = MultiIndex.from_product([["sum", "mean"], list("AB")]) else: expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]]) From 0d96b503a0812d335ff52c3e96e28e00c3e978ae Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 23 Jan 2022 13:39:56 -0500 Subject: [PATCH 3/5] Doc fixup --- doc/source/user_guide/homs_api.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/user_guide/homs_api.rst b/doc/source/user_guide/homs_api.rst index f319ab49433a6..f756149afa208 100644 --- a/doc/source/user_guide/homs_api.rst +++ b/doc/source/user_guide/homs_api.rst @@ -17,7 +17,7 @@ differences between the old and new behaviors, as well as providing some context each change that is being made. There are a great number of changes that are planned. In order to transition in a -reasonable manner for users, all changes are behind an experimental "use_hom_api" +reasonable manner for users, all changes are behind an experimental "api.use_hom" option. When enabled, pandas HOMs are subject to breaking changes without notice. Users can opt into the new behavior and provide feedback. Once the improvements have been made, this option will be declared no longer experimental. At this point, any @@ -46,11 +46,11 @@ This transpose no longer occurs, making the result more consistent. .. ipython:: python - with pd.option_context("use_hom_api", True): + with pd.option_context("api.use_hom", True): result = df.agg(["sum"]) result - with pd.option_context("use_hom_api", True): + with pd.option_context("api.use_hom", True): result = df.agg(["sum", "mean"]) result @@ -71,6 +71,6 @@ Now the levels are swapped, so that the columns for each aggregation are togethe .. ipython:: python - with pd.option_context("use_hom_api", True): + with pd.option_context("api.use_hom", True): result = df.groupby("a").agg(["sum", "min"]) result From e37716858bac8d35c8ea1a914f768ae8c190869a Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 24 Jan 2022 18:35:57 -0500 Subject: [PATCH 4/5] Do less in try, pytest fixture --- pandas/conftest.py | 8 +++ pandas/core/apply.py | 19 ++++--- pandas/tests/apply/test_frame_apply.py | 51 +++++++++---------- .../tests/groupby/aggregate/test_aggregate.py | 13 +++-- pandas/tests/groupby/aggregate/test_other.py | 16 +++--- pandas/tests/groupby/test_function.py | 5 +- pandas/tests/groupby/test_groupby.py | 11 ++-- pandas/tests/resample/test_deprecated.py | 9 ++-- pandas/tests/resample/test_resample_api.py | 11 ++-- pandas/tests/reshape/test_pivot.py | 6 +-- pandas/tests/window/test_api.py | 9 ++-- 11 files changed, 77 insertions(+), 81 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index e61d9ee18cadb..fb90fc04d29ae 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1766,3 +1766,11 @@ def using_array_manager(request): Fixture to check if the array manager is being used. """ return pd.options.mode.data_manager == "array" + + +@pytest.fixture +def using_hom_api(request): + """ + Fixture to check if the Higher Order Methods API is being used. + """ + return pd.options.api.use_hom diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 0bd7c22920186..25e3e0832f9ec 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -451,19 +451,14 @@ def agg_list_like(self) -> DataFrame | Series: def hom_list_single_arg( self, method: str, a: AggFuncTypeBase, result_dim: int | None ) -> tuple[int | None, AggFuncTypeBase | None, DataFrame | Series | None]: - name = None result = None + if isinstance(a, (tuple, list)): + # Handle (name, value) pairs + name, a = a + else: + name = com.get_callable_name(a) or a try: - if isinstance(a, (tuple, list)): - # Handle (name, value) pairs - name, a = a - else: - name = com.get_callable_name(a) or a result = getattr(self.obj, method)(a) - if result_dim is None: - result_dim = getattr(result, "ndim", 0) - elif getattr(result, "ndim", 0) != result_dim: - raise ValueError("cannot combine transform and aggregation operations") except (TypeError, DataError): warnings.warn( f"{name} did not aggregate successfully. If any error is " @@ -472,6 +467,10 @@ def hom_list_single_arg( FutureWarning, stacklevel=find_stack_level(), ) + if result_dim is None: + result_dim = getattr(result, "ndim", 0) + elif getattr(result, "ndim", 0) != result_dim: + raise ValueError("cannot combine transform and aggregation operations") return result_dim, name, result diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 5974de1ed5457..a6dc417fbfb8a 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -13,7 +13,6 @@ Series, Timestamp, date_range, - get_option, ) import pandas._testing as tm from pandas.tests.frame.common import zip_frames @@ -679,11 +678,11 @@ def test_apply_non_numpy_dtype_category(): tm.assert_frame_equal(result, df) -def test_apply_dup_names_multi_agg(): +def test_apply_dup_names_multi_agg(using_hom_api): # GH 21063 df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"]) expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"]) - if get_option("api.use_hom"): + if using_hom_api: expected = expected.T result = df.agg(["min"]) @@ -1067,7 +1066,7 @@ def test_consistency_for_boxed(box, int_frame_const_col): tm.assert_frame_equal(result, expected) -def test_agg_transform(axis, float_frame): +def test_agg_transform(axis, float_frame, using_hom_api): other_axis = 1 if axis in {0, "index"} else 0 with np.errstate(all="ignore"): @@ -1083,7 +1082,7 @@ def test_agg_transform(axis, float_frame): # list-like result = float_frame.apply([np.sqrt], axis=axis) expected = f_sqrt.copy() - if get_option("api.use_hom"): + if using_hom_api: if axis in {0, "index"}: expected.columns = MultiIndex.from_product( [["sqrt"], float_frame.columns] @@ -1103,7 +1102,7 @@ def test_agg_transform(axis, float_frame): # these are in the order as if we are applying both # functions per series and then concatting result = float_frame.apply([np.abs, np.sqrt], axis=axis) - if get_option("api.use_hom"): + if using_hom_api: expected = pd.concat([f_abs, f_sqrt], axis=other_axis) if axis in {0, "index"}: expected.columns = MultiIndex.from_product( @@ -1126,7 +1125,7 @@ def test_agg_transform(axis, float_frame): tm.assert_frame_equal(result, expected) -def test_demo(): +def test_demo(using_hom_api): # demonstration tests df = DataFrame({"A": range(5), "B": 5}) @@ -1134,7 +1133,7 @@ def test_demo(): expected = DataFrame( {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"] ) - if get_option("api.use_hom"): + if using_hom_api: expected = expected.T tm.assert_frame_equal(result, expected) @@ -1167,7 +1166,7 @@ def test_agg_with_name_as_column_name(): tm.assert_series_equal(result, expected) -def test_agg_multiple_mixed_no_warning(): +def test_agg_multiple_mixed_no_warning(using_hom_api): # GH 20909 mdf = DataFrame( { @@ -1186,7 +1185,7 @@ def test_agg_multiple_mixed_no_warning(): }, index=["min", "sum"], ) - if get_option("api.use_hom"): + if using_hom_api: expected = expected.T match = "Dropping of nuisance columns" else: @@ -1197,7 +1196,7 @@ def test_agg_multiple_mixed_no_warning(): tm.assert_frame_equal(result, expected) - if get_option("api.use_hom"): + if using_hom_api: match = "Dropping of nuisance columns" else: match = "did not aggregate successfully" @@ -1207,14 +1206,14 @@ def test_agg_multiple_mixed_no_warning(): # GH40420: the result of .agg should have an index that is sorted # according to the arguments provided to agg. - if get_option("api.use_hom"): + if using_hom_api: expected = expected.loc[["D", "C", "B", "A"], ["sum", "min"]] else: expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"]) tm.assert_frame_equal(result, expected) -def test_agg_reduce(axis, float_frame): +def test_agg_reduce(axis, float_frame, using_hom_api): other_axis = 1 if axis in {0, "index"} else 0 name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values() @@ -1229,7 +1228,7 @@ def test_agg_reduce(axis, float_frame): ) expected.columns = ["mean", "max", "sum"] expected = expected.T if axis in {0, "index"} else expected - if get_option("api.use_hom"): + if using_hom_api: expected = expected.T result = float_frame.agg(["mean", "max", "sum"], axis=axis) @@ -1285,7 +1284,7 @@ def test_agg_reduce(axis, float_frame): tm.assert_frame_equal(result, expected) -def test_nuiscance_columns(): +def test_nuiscance_columns(using_hom_api): # GH 15015 df = DataFrame( @@ -1307,7 +1306,7 @@ def test_nuiscance_columns(): index=["min"], columns=df.columns, ) - if get_option("api.use_hom"): + if using_hom_api: expected = expected.T tm.assert_frame_equal(result, expected) @@ -1316,7 +1315,7 @@ def test_nuiscance_columns(): expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) tm.assert_series_equal(result, expected) - if get_option("api.use_hom"): + if using_hom_api: match = "Select only valid" else: match = "did not aggregate successfully" @@ -1325,13 +1324,13 @@ def test_nuiscance_columns(): expected = DataFrame( [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"] ) - if get_option("api.use_hom"): + if using_hom_api: expected = expected.T tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("how", ["agg", "apply"]) -def test_non_callable_aggregates(how): +def test_non_callable_aggregates(how, using_hom_api): # GH 16405 # 'size' is a property of frame/series @@ -1366,7 +1365,7 @@ def test_non_callable_aggregates(how): } ) - if get_option("api.use_hom"): + if using_hom_api: tm.assert_frame_equal(result2, expected) tm.assert_frame_equal(result1, expected.T) else: @@ -1396,7 +1395,7 @@ def test_size_as_str(how, axis): tm.assert_series_equal(result, expected) -def test_agg_listlike_result(): +def test_agg_listlike_result(using_hom_api): # GH-29587 user defined function returning list-likes df = DataFrame({"A": [2, 2, 3], "B": [1.5, np.nan, 1.5], "C": ["foo", None, "bar"]}) @@ -1409,7 +1408,7 @@ def func(group_col): result = df.agg([func]) expected = expected.to_frame("func") - if not get_option("api.use_hom"): + if not using_hom_api: expected = expected.T tm.assert_frame_equal(result, expected) @@ -1523,11 +1522,11 @@ def test_apply_empty_list_reduce(): tm.assert_series_equal(result, expected) -def test_apply_no_suffix_index(request): +def test_apply_no_suffix_index(request, using_hom_api): # GH36189 pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"]) result = pdf.apply([np.square, lambda x: x, lambda x: x]) - if get_option("api.use_hom"): + if using_hom_api: columns = MultiIndex.from_product( [["square", "", ""], ["A", "B"]] ) @@ -1548,7 +1547,7 @@ def test_apply_raw_returns_string(): tm.assert_series_equal(result, expected) -def test_aggregation_func_column_order(): +def test_aggregation_func_column_order(using_hom_api): # GH40420: the result of .agg should have an index that is sorted # according to the arguments provided to agg. df = DataFrame( @@ -1571,7 +1570,7 @@ def foo(s): FutureWarning, match="did not aggregate successfully" ): result = df.agg(aggs) - if get_option("api.use_hom"): + if using_hom_api: expected = DataFrame( { "sum": ["123456", 21, 18, 17], diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 31f688f5d2b0c..131ec024bc851 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -18,7 +18,6 @@ MultiIndex, Series, concat, - get_option, to_datetime, ) import pandas._testing as tm @@ -375,7 +374,7 @@ def test_agg_multiple_functions_same_name_with_ohlc_present(): tm.assert_frame_equal(result, expected) -def test_multiple_functions_tuples_and_non_tuples(df): +def test_multiple_functions_tuples_and_non_tuples(df, using_hom_api): # #1359 funcs = [("foo", "mean"), "std"] ex_funcs = [("foo", "mean"), ("std", "std")] @@ -384,7 +383,7 @@ def test_multiple_functions_tuples_and_non_tuples(df): expected = df.groupby("A")["C"].agg(ex_funcs) tm.assert_frame_equal(result, expected) - klass = None if get_option("api.use_hom") else FutureWarning + klass = None if using_hom_api else FutureWarning with tm.assert_produces_warning( klass, match=r"\['B'\] did not aggregate successfully" ): @@ -547,11 +546,11 @@ def test_callable_result_dtype_series(keys, agg_index, input, dtype, method): tm.assert_series_equal(result, expected) -def test_order_aggregate_multiple_funcs(): +def test_order_aggregate_multiple_funcs(using_hom_api): # GH 25692 df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]}) - if get_option("api.use_hom"): + if using_hom_api: # TODO (GH 35725): This will not raise when agg-must-agg is implemented msg = "Cannot concat indices that do not have the same number of levels" with pytest.raises(AssertionError, match=msg): @@ -1268,14 +1267,14 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): tm.assert_frame_equal(result_df, expected_df) -def test_nonagg_agg(): +def test_nonagg_agg(using_hom_api): # GH 35490 - Single/Multiple agg of non-agg function give same results # TODO: agg should raise for functions that don't aggregate df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]}) g = df.groupby("a") result = g.agg(["cumsum"]) - if get_option("api.use_hom"): + if using_hom_api: result.columns = result.columns.droplevel(0) else: result.columns = result.columns.droplevel(-1) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 3b7dfdda46432..dfb7dfd7350ac 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -8,8 +8,6 @@ import numpy as np import pytest -from pandas._config import get_option - import pandas as pd from pandas import ( DataFrame, @@ -26,7 +24,7 @@ from pandas.io.formats.printing import pprint_thing -def test_agg_api(): +def test_agg_api(using_hom_api): # GH 6337 # https://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error # different api for agg when passed custom function with mixed frame @@ -44,7 +42,7 @@ def test_agg_api(): def peak_to_peak(arr): return arr.max() - arr.min() - if get_option("api.use_hom"): + if using_hom_api: msg = "Dropping invalid columns" else: msg = r"\['key2'\] did not aggregate successfully" @@ -183,7 +181,7 @@ def test_aggregate_float64_no_int64(): tm.assert_frame_equal(result, expected) -def test_aggregate_api_consistency(): +def test_aggregate_api_consistency(using_hom_api): # GH 9052 # make sure that the aggregates via dict # are consistent @@ -208,7 +206,7 @@ def test_aggregate_api_consistency(): tm.assert_frame_equal(result, expected, check_like=True) result = grouped.agg([np.sum, np.mean]) - if get_option("api.use_hom"): + if using_hom_api: expected = pd.concat([c_sum, d_sum, c_mean, d_mean], axis=1) expected.columns = MultiIndex.from_product([["sum", "mean"], ["C", "D"]]) else: @@ -217,7 +215,7 @@ def test_aggregate_api_consistency(): tm.assert_frame_equal(result, expected, check_like=True) result = grouped[["D", "C"]].agg([np.sum, np.mean]) - if get_option("api.use_hom"): + if using_hom_api: expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1) expected.columns = MultiIndex.from_product([["sum", "mean"], ["D", "C"]]) else: @@ -386,7 +384,7 @@ def bad(x): tm.assert_frame_equal(result, expected) -def test_agg_consistency(): +def test_agg_consistency(using_hom_api): # agg with ([]) and () not consistent # GH 6715 def P1(a): @@ -408,7 +406,7 @@ def P1(a): g = df.groupby("date") expected = g.agg([P1]) - if get_option("api.use_hom"): + if using_hom_api: expected.columns = expected.columns.levels[1] else: expected.columns = expected.columns.levels[0] diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index dea467b10b157..a62cf766acaea 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -14,7 +14,6 @@ Series, Timestamp, date_range, - get_option, ) import pandas._testing as tm import pandas.core.nanops as nanops @@ -1133,7 +1132,7 @@ def test_groupby_mean_no_overflow(): ], ) @pytest.mark.parametrize("function", ["mean", "median", "var"]) -def test_apply_to_nullable_integer_returns_float(values, function): +def test_apply_to_nullable_integer_returns_float(values, function, using_hom_api): # https://github.com/pandas-dev/pandas/issues/32219 output = 0.5 if function == "var" else 1.5 arr = np.array([output] * 3, dtype=float) @@ -1149,7 +1148,7 @@ def test_apply_to_nullable_integer_returns_float(values, function): tm.assert_frame_equal(result, expected) result = groups.agg([function]) - if get_option("api.use_hom"): + if using_hom_api: expected.columns = MultiIndex.from_tuples([(function, "b")]) else: expected.columns = MultiIndex.from_tuples([("b", function)]) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ef777ebc2aac6..cc6fe7bceb94a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -19,7 +19,6 @@ Timedelta, Timestamp, date_range, - get_option, to_datetime, ) import pandas._testing as tm @@ -552,7 +551,7 @@ def test_multi_key_multiple_functions(df): tm.assert_frame_equal(agged, expected) -def test_frame_multi_key_function_list(): +def test_frame_multi_key_function_list(using_hom_api): data = DataFrame( { "A": [ @@ -602,12 +601,12 @@ def test_frame_multi_key_function_list(): grouped = data.groupby(["A", "B"]) funcs = [np.mean, np.std] - klass = None if get_option("api.use_hom") else FutureWarning + klass = None if using_hom_api else FutureWarning with tm.assert_produces_warning( klass, match=r"\['C'\] did not aggregate successfully" ): agged = grouped.agg(funcs) - if get_option("api.use_hom"): + if using_hom_api: expected = pd.concat( [grouped.agg(funcs[0]), grouped.agg(funcs[1])], keys=["mean", "std"], @@ -2091,7 +2090,7 @@ def test_tuple_correct_keyerror(): df.groupby((7, 8)).mean() -def test_groupby_agg_ohlc_non_first(): +def test_groupby_agg_ohlc_non_first(using_hom_api): # GH 21716 df = DataFrame( [[1], [1]], @@ -2114,7 +2113,7 @@ def test_groupby_agg_ohlc_non_first(): index=date_range("2018-01-01", periods=2, freq="D", name="dti"), ) - if get_option("api.use_hom"): + if using_hom_api: # TODO (GH 35725): This will not raise when agg-must-agg is implemented msg = "Cannot concat indices that do not have the same number of levels" with pytest.raises(AssertionError, match=msg): diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py index bcce8c80444f0..7805f1b12b2f7 100644 --- a/pandas/tests/resample/test_deprecated.py +++ b/pandas/tests/resample/test_deprecated.py @@ -10,7 +10,6 @@ from pandas import ( DataFrame, Series, - get_option, ) import pandas._testing as tm from pandas.core.indexes.datetimes import date_range @@ -81,7 +80,7 @@ def test_deprecating_on_loffset_and_base(): @all_ts @pytest.mark.parametrize("arg", ["mean", {"value": "mean"}, ["mean"]]) -def test_resample_loffset_arg_type(frame, create_index, arg): +def test_resample_loffset_arg_type(frame, create_index, arg, using_hom_api): # GH 13218, 15002 df = frame expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)] @@ -98,7 +97,7 @@ def test_resample_loffset_arg_type(frame, create_index, arg): result_agg = df.resample("2D", loffset="2H").agg(arg) if isinstance(arg, list): - if get_option("api.use_hom"): + if using_hom_api: expected.columns = pd.MultiIndex.from_tuples([("mean", "value")]) else: expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) @@ -205,7 +204,7 @@ def test_resample_float_base(): @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) @pytest.mark.parametrize("agg_arg", ["mean", {"value": "mean"}, ["mean"]]) -def test_loffset_returns_datetimeindex(frame, kind, agg_arg): +def test_loffset_returns_datetimeindex(frame, kind, agg_arg, using_hom_api): # make sure passing loffset returns DatetimeIndex in all cases # basic method taken from Base.test_resample_loffset_arg_type() df = frame @@ -220,7 +219,7 @@ def test_loffset_returns_datetimeindex(frame, kind, agg_arg): with tm.assert_produces_warning(FutureWarning): result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg) if isinstance(agg_arg, list): - if get_option("api.use_hom"): + if using_hom_api: expected.columns = pd.MultiIndex.from_tuples([("mean", "value")]) else: expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 2f36d9d78de88..d44022cbe5541 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -8,7 +8,6 @@ DataFrame, NamedAgg, Series, - get_option, ) import pandas._testing as tm from pandas.core.indexes.datetimes import date_range @@ -322,7 +321,7 @@ def test_agg_consistency_int_str_column_mix(): # `Base` test class -def test_agg(): +def test_agg(using_hom_api): # test with all three Resampler apis and TimeGrouper np.random.seed(1234) @@ -349,14 +348,14 @@ def test_agg(): b_std = r["B"].std() b_sum = r["B"].sum() - if get_option("api.use_hom"): + if using_hom_api: expected = pd.concat([a_mean, b_mean, a_std, b_std], axis=1) expected.columns = pd.MultiIndex.from_product([["mean", "std"], ["A", "B"]]) else: expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) for t in cases: - if t in cases[1:3] and not get_option("api.use_hom"): + if t in cases[1:3] and not using_hom_api: warn = FutureWarning else: warn = None @@ -624,7 +623,7 @@ def test_selection_api_validation(): @pytest.mark.parametrize( "col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"] ) -def test_agg_with_datetime_index_list_agg_func(col_name): +def test_agg_with_datetime_index_list_agg_func(col_name, using_hom_api): # GH 22660 # The parametrized column names would get converted to dates by our # date parser. Some would result in OutOfBoundsError (ValueError) while @@ -638,7 +637,7 @@ def test_agg_with_datetime_index_list_agg_func(col_name): columns=[col_name], ) result = df.resample("1d").aggregate(["mean"]) - if get_option("api.use_hom"): + if using_hom_api: expected = DataFrame( [47.5, 143.5, 195.5], index=date_range( diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index eb317e13348df..4b85c999198ec 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -8,8 +8,6 @@ import numpy as np import pytest -from pandas._config import get_option - from pandas.errors import PerformanceWarning import pandas as pd @@ -1905,7 +1903,7 @@ def test_pivot_table_not_series(self): tm.assert_frame_equal(result, expected) - def test_pivot_margins_name_unicode(self): + def test_pivot_margins_name_unicode(self, using_hom_api): # issue #13292 greek = "\u0394\u03bf\u03ba\u03b9\u03bc\u03ae" frame = DataFrame({"foo": [1, 2, 3]}) @@ -1914,7 +1912,7 @@ def test_pivot_margins_name_unicode(self): ) index = Index([1, 2, 3, greek], dtype="object", name="foo") - if get_option("api.use_hom"): + if using_hom_api: expected = Series([1, 1, 1, 3], index=index) expected.index.name = None tm.assert_series_equal(table, expected) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 3f970ddb26346..b12dec7c25b03 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -10,7 +10,6 @@ Timestamp, concat, date_range, - get_option, timedelta_range, ) import pandas._testing as tm @@ -80,7 +79,7 @@ def test_skip_sum_object_raises(): tm.assert_frame_equal(result, expected) -def test_agg(): +def test_agg(using_hom_api): df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) r = df.rolling(window=3) @@ -91,7 +90,7 @@ def test_agg(): b_std = r["B"].std() result = r.aggregate([np.mean, np.std]) - if get_option("api.use_hom"): + if using_hom_api: expected = concat([a_mean, b_mean, a_std, b_std], axis=1) expected.columns = MultiIndex.from_product([["mean", "std"], ["A", "B"]]) else: @@ -146,13 +145,13 @@ def test_agg_apply(raw): tm.assert_frame_equal(result, expected, check_like=True) -def test_agg_consistency(): +def test_agg_consistency(using_hom_api): df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) r = df.rolling(window=3) result = r.agg([np.sum, np.mean]).columns - if get_option("api.use_hom"): + if using_hom_api: expected = MultiIndex.from_product([["sum", "mean"], list("AB")]) else: expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]]) From f996f5ed79c262f0caa59d24c63af131c9891e31 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 1 Feb 2022 20:36:44 -0500 Subject: [PATCH 5/5] Link to docstrings --- doc/source/user_guide/homs_api.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/source/user_guide/homs_api.rst b/doc/source/user_guide/homs_api.rst index f756149afa208..87525d84be56a 100644 --- a/doc/source/user_guide/homs_api.rst +++ b/doc/source/user_guide/homs_api.rst @@ -10,11 +10,15 @@ pandas Higher Order Methods pandas is experimenting with improving the behavior of higher order methods (HOMs). These are methods that take a function as an argument, often a user-defined function (UDF). -They include ``.apply``, ``.agg``, ``.transform``, and ``.filter``. The goal is to make -these methods behave in a more predictable and consistent manner, reducing the complexity -of their implementation, and improving performance where possible. This page details the -differences between the old and new behaviors, as well as providing some context behind -each change that is being made. +The modified methods include the following. + + - :meth:`DataFrame.agg` + - :meth:`.DataFrameGroupBy.aggregate` + +The goal is to make these methods behave in a more predictable and consistent manner, +reducing the complexity of their implementation, and improving performance where +possible. This page details the differences between the old and new behaviors, as well +as providing some context behind each change that is being made. There are a great number of changes that are planned. In order to transition in a reasonable manner for users, all changes are behind an experimental "api.use_hom"