diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 3fdb4af52a8c0..575113434b531 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -94,7 +94,7 @@ Other API changes - Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`) - Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`) - pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`) -- +- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`) .. --------------------------------------------------------------------------- .. _whatsnew_300.deprecations: diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index cd5e6e521b79f..7a08e4ad4b260 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -76,7 +76,7 @@ cdef _nat_rdivide_op(self, other): return NotImplemented -def __nat_unpickle(*args): +def _nat_unpickle(*args): # return constant defined in the module return c_NaT @@ -360,7 +360,7 @@ class NaTType(_NaT): return self.__reduce__() def __reduce__(self): - return (__nat_unpickle, (None, )) + return (_nat_unpickle, (None, )) def __rtruediv__(self, other): return _nat_rdivide_op(self, other) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index ff589ebba4cf6..f4698bee5cb02 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -1,12 +1,11 @@ """ -Support pre-0.12 series pickle compatibility. +Pickle compatibility to pandas version 1.0 """ from __future__ import annotations import contextlib -import copy import io -import pickle as pkl +import pickle from typing import ( TYPE_CHECKING, Any, @@ -17,7 +16,6 @@ from pandas._libs.arrays import NDArrayBacked from pandas._libs.tslibs import BaseOffset -from pandas import Index from pandas.core.arrays import ( DatetimeArray, PeriodArray, @@ -29,111 +27,20 @@ from collections.abc import Generator -def load_reduce(self) -> None: - stack = self.stack - args = stack.pop() - func = stack[-1] - - try: - stack[-1] = func(*args) - return - except TypeError as err: - # If we have a deprecated function, - # try to replace and try again. - - msg = "_reconstruct: First argument must be a sub-type of ndarray" - - if msg in str(err): - try: - cls = args[0] - stack[-1] = object.__new__(cls) - return - except TypeError: - pass - elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset): - # TypeError: object.__new__(Day) is not safe, use Day.__new__() - cls = args[0] - stack[-1] = cls.__new__(*args) - return - elif args and issubclass(args[0], PeriodArray): - cls = args[0] - stack[-1] = NDArrayBacked.__new__(*args) - return - - raise - - # If classes are moved, provide compat here. _class_locations_map = { - ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"), - # 15477 - ("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"), # Re-routing unpickle block logic to go through _unpickle_block instead # for pandas <= 1.3.5 ("pandas.core.internals.blocks", "new_block"): ( "pandas._libs.internals", "_unpickle_block", ), - ("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"), - ("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"), - # 10890 - ("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"), - ("pandas.sparse.series", "SparseTimeSeries"): ( - "pandas.core.sparse.series", - "SparseSeries", - ), - # 12588, extensions moving - ("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"), - ("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"), - # 18543 moving period - ("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"), - ("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"), - # 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype - ("pandas.tslib", "__nat_unpickle"): ( - "pandas._libs.tslibs.nattype", - "__nat_unpickle", - ), - ("pandas._libs.tslib", "__nat_unpickle"): ( + # Avoid Cython's warning "contradiction to to Python 'class private name' rules" + ("pandas._libs.tslibs.nattype", "__nat_unpickle"): ( "pandas._libs.tslibs.nattype", - "__nat_unpickle", - ), - # 15998 top-level dirs moving - ("pandas.sparse.array", "SparseArray"): ( - "pandas.core.arrays.sparse", - "SparseArray", - ), - ("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"), - ("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"), - ("pandas.indexes.numeric", "Int64Index"): ( - "pandas.core.indexes.base", - "Index", # updated in 50775 - ), - ("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"), - ("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"), - ("pandas.tseries.index", "_new_DatetimeIndex"): ( - "pandas.core.indexes.datetimes", - "_new_DatetimeIndex", - ), - ("pandas.tseries.index", "DatetimeIndex"): ( - "pandas.core.indexes.datetimes", - "DatetimeIndex", + "_nat_unpickle", ), - ("pandas.tseries.period", "PeriodIndex"): ( - "pandas.core.indexes.period", - "PeriodIndex", - ), - # 19269, arrays moving - ("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"), - # 19939, add timedeltaindex, float64index compat from 15998 move - ("pandas.tseries.tdi", "TimedeltaIndex"): ( - "pandas.core.indexes.timedeltas", - "TimedeltaIndex", - ), - ("pandas.indexes.numeric", "Float64Index"): ( - "pandas.core.indexes.base", - "Index", # updated in 50775 - ), - # 50775, remove Int64Index, UInt64Index & Float64Index from codabase + # 50775, remove Int64Index, UInt64Index & Float64Index from codebase ("pandas.core.indexes.numeric", "Int64Index"): ( "pandas.core.indexes.base", "Index", @@ -155,85 +62,55 @@ def load_reduce(self) -> None: # our Unpickler sub-class to override methods and some dispatcher # functions for compat and uses a non-public class of the pickle module. - - -class Unpickler(pkl._Unpickler): +class Unpickler(pickle._Unpickler): def find_class(self, module, name): - # override superclass key = (module, name) module, name = _class_locations_map.get(key, key) return super().find_class(module, name) + dispatch = pickle._Unpickler.dispatch.copy() -Unpickler.dispatch = copy.copy(Unpickler.dispatch) -Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce - - -def load_newobj(self) -> None: - args = self.stack.pop() - cls = self.stack[-1] - - # compat - if issubclass(cls, Index): - obj = object.__new__(cls) - elif issubclass(cls, DatetimeArray) and not args: - arr = np.array([], dtype="M8[ns]") - obj = cls.__new__(cls, arr, arr.dtype) - elif issubclass(cls, TimedeltaArray) and not args: - arr = np.array([], dtype="m8[ns]") - obj = cls.__new__(cls, arr, arr.dtype) - elif cls is BlockManager and not args: - obj = cls.__new__(cls, (), [], False) - else: - obj = cls.__new__(cls, *args) - - self.stack[-1] = obj - - -Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj - - -def load_newobj_ex(self) -> None: - kwargs = self.stack.pop() - args = self.stack.pop() - cls = self.stack.pop() - - # compat - if issubclass(cls, Index): - obj = object.__new__(cls) - else: - obj = cls.__new__(cls, *args, **kwargs) - self.append(obj) - + def load_reduce(self) -> None: + stack = self.stack # type: ignore[attr-defined] + args = stack.pop() + func = stack[-1] -try: - Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex -except (AttributeError, KeyError): - pass - - -def load(fh, encoding: str | None = None, is_verbose: bool = False) -> Any: - """ - Load a pickle, with a provided encoding, - - Parameters - ---------- - fh : a filelike object - encoding : an optional encoding - is_verbose : show exception output - """ - try: - fh.seek(0) - if encoding is not None: - up = Unpickler(fh, encoding=encoding) + try: + stack[-1] = func(*args) + except TypeError: + # If we have a deprecated function, + # try to replace and try again. + if args and isinstance(args[0], type) and issubclass(args[0], BaseOffset): + # TypeError: object.__new__(Day) is not safe, use Day.__new__() + cls = args[0] + stack[-1] = cls.__new__(*args) + return + elif args and issubclass(args[0], PeriodArray): + cls = args[0] + stack[-1] = NDArrayBacked.__new__(*args) + return + raise + + dispatch[pickle.REDUCE[0]] = load_reduce # type: ignore[assignment] + + def load_newobj(self) -> None: + args = self.stack.pop() # type: ignore[attr-defined] + cls = self.stack.pop() # type: ignore[attr-defined] + + # compat + if issubclass(cls, DatetimeArray) and not args: + arr = np.array([], dtype="M8[ns]") + obj = cls.__new__(cls, arr, arr.dtype) + elif issubclass(cls, TimedeltaArray) and not args: + arr = np.array([], dtype="m8[ns]") + obj = cls.__new__(cls, arr, arr.dtype) + elif cls is BlockManager and not args: + obj = cls.__new__(cls, (), [], False) else: - up = Unpickler(fh) - # "Unpickler" has no attribute "is_verbose" [attr-defined] - up.is_verbose = is_verbose # type: ignore[attr-defined] + obj = cls.__new__(cls, *args) + self.append(obj) # type: ignore[attr-defined] - return up.load() - except (ValueError, TypeError): - raise + dispatch[pickle.NEWOBJ[0]] = load_newobj # type: ignore[assignment] def loads( @@ -257,9 +134,9 @@ def patch_pickle() -> Generator[None, None, None]: """ Temporarily patch pickle to use our unpickler. """ - orig_loads = pkl.loads + orig_loads = pickle.loads try: - setattr(pkl, "loads", loads) + setattr(pickle, "loads", loads) yield finally: - setattr(pkl, "loads", orig_loads) + setattr(pickle, "loads", orig_loads) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index c565544075ecf..d37c77182d3fe 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -8,7 +8,7 @@ ) import warnings -from pandas.compat import pickle_compat as pc +from pandas.compat import pickle_compat from pandas.util._decorators import doc from pandas.core.shared_docs import _shared_docs @@ -158,7 +158,7 @@ def read_pickle( Notes ----- - read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3 + read_pickle is only guaranteed to be backwards compatible to pandas 1.0 provided the object was serialized with to_pickle. Examples @@ -195,7 +195,6 @@ def read_pickle( ) as handles: # 1) try standard library Pickle # 2) try pickle_compat (older pandas version) to handle subclass changes - try: with warnings.catch_warnings(record=True): # We want to silence any warnings about, e.g. moved modules. @@ -204,5 +203,6 @@ def read_pickle( except excs_to_catch: # e.g. # "No module named 'pandas.core.sparse.series'" - # "Can't get attribute '__nat_unpickle' on