From 488c72d93940e611ff537d8810ec979bcde2bf63 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Apr 2023 10:25:01 -0700 Subject: [PATCH 1/8] DEPR: to_pydatetime return Index[object] --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/indexes/accessors.py | 12 ++++++++++++ pandas/io/sql.py | 2 +- pandas/tests/extension/test_arrow.py | 4 +++- pandas/tests/series/accessors/test_cat_accessor.py | 3 +++ pandas/tests/series/accessors/test_dt_accessor.py | 8 ++++++-- 6 files changed, 26 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 743bb78c70c36..cb2a5343814d6 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -167,12 +167,12 @@ Deprecations - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`) - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`) +- Deprecated behavior of :meth:`Series.dt.to_pydatetime`, in a future version this will return a :class:`Series` containing python ``datetime`` objects instead of an ``ndarray`` of datetimes; this matches the behavior of other :meth:`Series.dt` properties (:issue:`20306`) - Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`) - Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`) - Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) -- .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 76666d62bc5c0..1b4fc61e6cb7e 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -7,9 +7,12 @@ TYPE_CHECKING, cast, ) +import warnings import numpy as np +from pandas.util._exceptions import find_stack_level + from pandas.core.dtypes.common import ( is_datetime64_dtype, is_integer_dtype, @@ -333,6 +336,15 @@ def to_pydatetime(self) -> np.ndarray: array([datetime.datetime(2018, 3, 10, 0, 0), datetime.datetime(2018, 3, 10, 0, 0)], dtype=object) """ + # GH#20306 + warnings.warn( + f"The behavior of {type(self).__name__}.to_pydatetime is deprecated, " + "in a future version this will return a Series containing python " + "datetime objects instead of an ndarray. To retain the old behavior, " + "call `np.array` on the result", + FutureWarning, + stacklevel=find_stack_level(), + ) return self._get_values().to_pydatetime() @property diff --git a/pandas/io/sql.py b/pandas/io/sql.py index ca2676488dd11..0e4764198654f 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -964,7 +964,7 @@ def insert_data(self) -> tuple[list[str], list[np.ndarray]]: for i, (_, ser) in enumerate(temp.items()): if ser.dtype.kind == "M": - d = ser.dt.to_pydatetime() + d = ser._values.to_pydatetime() elif ser.dtype.kind == "m": vals = ser._values if isinstance(vals, ArrowExtensionArray): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index df470d85a4fad..2931af94c6194 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2274,7 +2274,9 @@ def test_dt_to_pydatetime(): tm.assert_numpy_array_equal(result, expected) assert all(type(res) is datetime for res in result) - expected = ser.astype("datetime64[ns]").dt.to_pydatetime() + msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = ser.astype("datetime64[ns]").dt.to_pydatetime() tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py index a2ed590640465..cc64a9388fd7c 100644 --- a/pandas/tests/series/accessors/test_cat_accessor.py +++ b/pandas/tests/series/accessors/test_cat_accessor.py @@ -203,6 +203,9 @@ def test_dt_accessor_api_for_categorical(self, idx): if func == "to_period": # dropping TZ warnings.simplefilter("ignore", UserWarning) + if func == "to_pydatetime": + # deprecated to return Index[object] + warnings.simplefilter("ignore", FutureWarning) res = getattr(cat.dt, func)(*args, **kwargs) exp = getattr(ser.dt, func)(*args, **kwargs) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index fa8e184285616..3f957130b6020 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -115,7 +115,9 @@ def test_dt_namespace_accessor_datetime64(self, freq): for prop in ok_for_dt_methods: getattr(ser.dt, prop) - result = ser.dt.to_pydatetime() + msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object @@ -152,7 +154,9 @@ def test_dt_namespace_accessor_datetime64tz(self): for prop in ok_for_dt_methods: getattr(ser.dt, prop) - result = ser.dt.to_pydatetime() + msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object From 9503c6ffc50bbdbd0938442f1079575e3a47d51c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Apr 2023 12:54:15 -0700 Subject: [PATCH 2/8] fix pyarrow case --- pandas/io/sql.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0e4764198654f..07aeaffc05d0d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -964,7 +964,10 @@ def insert_data(self) -> tuple[list[str], list[np.ndarray]]: for i, (_, ser) in enumerate(temp.items()): if ser.dtype.kind == "M": - d = ser._values.to_pydatetime() + if isinstance(ser._values, ArrowExtensionArray): + d = ser.dt.to_pydatetime() + else: + d = ser._values.to_pydatetime() elif ser.dtype.kind == "m": vals = ser._values if isinstance(vals, ArrowExtensionArray): From ddde224f9627b3e5371375a88df6fb6cd25641b9 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Apr 2023 13:32:12 -0700 Subject: [PATCH 3/8] suppress doctest warning --- pandas/conftest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 70e1c317c2043..6ed334087fbc8 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -138,6 +138,9 @@ def pytest_collection_modifyitems(items, config) -> None: # Docstring divides by zero to show behavior difference ("missing.mask_zero_div_zero", "divide by zero encountered"), ( + "to_pydatetime", + "The behavior of DatetimeProperties.to_pydatetime is deprecated", + )( "pandas.core.generic.NDFrame.bool", "(Series|DataFrame).bool is now deprecated and will be removed " "in future version of pandas", From 8d46ef78a9caf3cc88b9e7a9ebc578740ac5c61a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Apr 2023 15:05:36 -0700 Subject: [PATCH 4/8] typo fixup --- pandas/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 6ed334087fbc8..669cfe9be5170 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -140,7 +140,8 @@ def pytest_collection_modifyitems(items, config) -> None: ( "to_pydatetime", "The behavior of DatetimeProperties.to_pydatetime is deprecated", - )( + ), + ( "pandas.core.generic.NDFrame.bool", "(Series|DataFrame).bool is now deprecated and will be removed " "in future version of pandas", From e6a8f8bcdb91401d33871c411b32d4c65b75abc4 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 Apr 2023 12:10:04 -0700 Subject: [PATCH 5/8] deprecate for arrow case --- pandas/core/indexes/accessors.py | 9 +++++++++ pandas/tests/extension/test_arrow.py | 4 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 1b4fc61e6cb7e..bf4da90efc17c 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -217,6 +217,15 @@ def _delegate_method(self, name: str, *args, **kwargs): return result def to_pydatetime(self): + # GH#20306 + warnings.warn( + f"The behavior of {type(self).__name__}.to_pydatetime is deprecated, " + "in a future version this will return a Series containing python " + "datetime objects instead of an ndarray. To retain the old behavior, " + "call `np.array` on the result", + FutureWarning, + stacklevel=find_stack_level(), + ) return cast(ArrowExtensionArray, self._parent.array)._dt_to_pydatetime() def isocalendar(self): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index e55dd32fc29f5..99f2a7d820dcb 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2287,7 +2287,9 @@ def test_dt_to_pydatetime(): data = [datetime(2022, 1, 1), datetime(2023, 1, 1)] ser = pd.Series(data, dtype=ArrowDtype(pa.timestamp("ns"))) - result = ser.dt.to_pydatetime() + msg = "The behavior of ArrowTemporalProperties.to_pydatetime is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.dt.to_pydatetime() expected = np.array(data, dtype=object) tm.assert_numpy_array_equal(result, expected) assert all(type(res) is datetime for res in result) From 273a890e99067095834b9131240a862d3fc5f180 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 Apr 2023 13:55:26 -0700 Subject: [PATCH 6/8] suppress warning --- pandas/io/sql.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 07aeaffc05d0d..0f5414a2eb5c6 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -965,7 +965,10 @@ def insert_data(self) -> tuple[list[str], list[np.ndarray]]: for i, (_, ser) in enumerate(temp.items()): if ser.dtype.kind == "M": if isinstance(ser._values, ArrowExtensionArray): - d = ser.dt.to_pydatetime() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + # GH#52459 to_pydatetime will return Index[object] + d = ser.dt.to_pydatetime() else: d = ser._values.to_pydatetime() elif ser.dtype.kind == "m": From fb7c987f7e6e9f3a9e448b502cf4d769a1bebff1 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 7 Apr 2023 10:09:24 -0700 Subject: [PATCH 7/8] call np.asarray --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0f5414a2eb5c6..c6318b93ab8f2 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -968,7 +968,7 @@ def insert_data(self) -> tuple[list[str], list[np.ndarray]]: with warnings.catch_warnings(): warnings.filterwarnings("ignore") # GH#52459 to_pydatetime will return Index[object] - d = ser.dt.to_pydatetime() + d = np.asarray(ser.dt.to_pydatetime(), dtype=object) else: d = ser._values.to_pydatetime() elif ser.dtype.kind == "m": From 71e01650cdea29884cd86c9457151fc385021f8d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 7 Apr 2023 17:03:34 -0700 Subject: [PATCH 8/8] Update pandas/io/sql.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c6318b93ab8f2..49ec1a7545168 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -966,7 +966,7 @@ def insert_data(self) -> tuple[list[str], list[np.ndarray]]: if ser.dtype.kind == "M": if isinstance(ser._values, ArrowExtensionArray): with warnings.catch_warnings(): - warnings.filterwarnings("ignore") + warnings.filterwarnings("ignore", category=FutureWarning) # GH#52459 to_pydatetime will return Index[object] d = np.asarray(ser.dt.to_pydatetime(), dtype=object) else: