Skip to content

CLN: Remove pickle support pre-pandas 1.0 #57555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Other API changes
- Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`)
- Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`)
- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
-
- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`)

.. ---------------------------------------------------------------------------
.. _whatsnew_300.deprecations:
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ cdef _nat_rdivide_op(self, other):
return NotImplemented


def __nat_unpickle(*args):
def _nat_unpickle(*args):
# return constant defined in the module
return c_NaT

Expand Down Expand Up @@ -360,7 +360,7 @@ class NaTType(_NaT):
return self.__reduce__()

def __reduce__(self):
return (__nat_unpickle, (None, ))
return (_nat_unpickle, (None, ))

def __rtruediv__(self, other):
return _nat_rdivide_op(self, other)
Expand Down
221 changes: 49 additions & 172 deletions pandas/compat/pickle_compat.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""
Support pre-0.12 series pickle compatibility.
Pickle compatibility to pandas version 1.0
"""
from __future__ import annotations

import contextlib
import copy
import io
import pickle as pkl
import pickle
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -17,7 +16,6 @@
from pandas._libs.arrays import NDArrayBacked
from pandas._libs.tslibs import BaseOffset

from pandas import Index
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
Expand All @@ -29,111 +27,20 @@
from collections.abc import Generator


def load_reduce(self) -> None:
stack = self.stack
args = stack.pop()
func = stack[-1]

try:
stack[-1] = func(*args)
return
except TypeError as err:
# If we have a deprecated function,
# try to replace and try again.

msg = "_reconstruct: First argument must be a sub-type of ndarray"

if msg in str(err):
try:
cls = args[0]
stack[-1] = object.__new__(cls)
return
except TypeError:
pass
elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset):
# TypeError: object.__new__(Day) is not safe, use Day.__new__()
cls = args[0]
stack[-1] = cls.__new__(*args)
return
elif args and issubclass(args[0], PeriodArray):
cls = args[0]
stack[-1] = NDArrayBacked.__new__(*args)
return

raise


# If classes are moved, provide compat here.
_class_locations_map = {
("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"),
# 15477
("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"),
# Re-routing unpickle block logic to go through _unpickle_block instead
# for pandas <= 1.3.5
("pandas.core.internals.blocks", "new_block"): (
"pandas._libs.internals",
"_unpickle_block",
),
("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"),
("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"),
# 10890
("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"),
("pandas.sparse.series", "SparseTimeSeries"): (
"pandas.core.sparse.series",
"SparseSeries",
),
# 12588, extensions moving
("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"),
("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"),
# 18543 moving period
("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"),
("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"),
# 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype
("pandas.tslib", "__nat_unpickle"): (
"pandas._libs.tslibs.nattype",
"__nat_unpickle",
),
("pandas._libs.tslib", "__nat_unpickle"): (
# Avoid Cython's warning "contradiction to to Python 'class private name' rules"
("pandas._libs.tslibs.nattype", "__nat_unpickle"): (
"pandas._libs.tslibs.nattype",
"__nat_unpickle",
),
# 15998 top-level dirs moving
("pandas.sparse.array", "SparseArray"): (
"pandas.core.arrays.sparse",
"SparseArray",
),
("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"),
("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"),
("pandas.indexes.numeric", "Int64Index"): (
"pandas.core.indexes.base",
"Index", # updated in 50775
),
("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"),
("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"),
("pandas.tseries.index", "_new_DatetimeIndex"): (
"pandas.core.indexes.datetimes",
"_new_DatetimeIndex",
),
("pandas.tseries.index", "DatetimeIndex"): (
"pandas.core.indexes.datetimes",
"DatetimeIndex",
"_nat_unpickle",
),
("pandas.tseries.period", "PeriodIndex"): (
"pandas.core.indexes.period",
"PeriodIndex",
),
# 19269, arrays moving
("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"),
# 19939, add timedeltaindex, float64index compat from 15998 move
("pandas.tseries.tdi", "TimedeltaIndex"): (
"pandas.core.indexes.timedeltas",
"TimedeltaIndex",
),
("pandas.indexes.numeric", "Float64Index"): (
"pandas.core.indexes.base",
"Index", # updated in 50775
),
# 50775, remove Int64Index, UInt64Index & Float64Index from codabase
# 50775, remove Int64Index, UInt64Index & Float64Index from codebase
("pandas.core.indexes.numeric", "Int64Index"): (
"pandas.core.indexes.base",
"Index",
Expand All @@ -155,85 +62,55 @@ def load_reduce(self) -> None:

# our Unpickler sub-class to override methods and some dispatcher
# functions for compat and uses a non-public class of the pickle module.


class Unpickler(pkl._Unpickler):
class Unpickler(pickle._Unpickler):
def find_class(self, module, name):
# override superclass
key = (module, name)
module, name = _class_locations_map.get(key, key)
return super().find_class(module, name)

dispatch = pickle._Unpickler.dispatch.copy()

Unpickler.dispatch = copy.copy(Unpickler.dispatch)
Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce


def load_newobj(self) -> None:
args = self.stack.pop()
cls = self.stack[-1]

# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
elif issubclass(cls, DatetimeArray) and not args:
arr = np.array([], dtype="M8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif issubclass(cls, TimedeltaArray) and not args:
arr = np.array([], dtype="m8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif cls is BlockManager and not args:
obj = cls.__new__(cls, (), [], False)
else:
obj = cls.__new__(cls, *args)

self.stack[-1] = obj


Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj


def load_newobj_ex(self) -> None:
kwargs = self.stack.pop()
args = self.stack.pop()
cls = self.stack.pop()

# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
else:
obj = cls.__new__(cls, *args, **kwargs)
self.append(obj)

def load_reduce(self) -> None:
stack = self.stack # type: ignore[attr-defined]
args = stack.pop()
func = stack[-1]

try:
Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
except (AttributeError, KeyError):
pass


def load(fh, encoding: str | None = None, is_verbose: bool = False) -> Any:
"""
Load a pickle, with a provided encoding,

Parameters
----------
fh : a filelike object
encoding : an optional encoding
is_verbose : show exception output
"""
try:
fh.seek(0)
if encoding is not None:
up = Unpickler(fh, encoding=encoding)
try:
stack[-1] = func(*args)
except TypeError:
# If we have a deprecated function,
# try to replace and try again.
if args and isinstance(args[0], type) and issubclass(args[0], BaseOffset):
# TypeError: object.__new__(Day) is not safe, use Day.__new__()
cls = args[0]
stack[-1] = cls.__new__(*args)
return
elif args and issubclass(args[0], PeriodArray):
cls = args[0]
stack[-1] = NDArrayBacked.__new__(*args)
return
raise

dispatch[pickle.REDUCE[0]] = load_reduce # type: ignore[assignment]

def load_newobj(self) -> None:
args = self.stack.pop() # type: ignore[attr-defined]
cls = self.stack.pop() # type: ignore[attr-defined]

# compat
if issubclass(cls, DatetimeArray) and not args:
arr = np.array([], dtype="M8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif issubclass(cls, TimedeltaArray) and not args:
arr = np.array([], dtype="m8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif cls is BlockManager and not args:
obj = cls.__new__(cls, (), [], False)
else:
up = Unpickler(fh)
# "Unpickler" has no attribute "is_verbose" [attr-defined]
up.is_verbose = is_verbose # type: ignore[attr-defined]
obj = cls.__new__(cls, *args)
self.append(obj) # type: ignore[attr-defined]

return up.load()
except (ValueError, TypeError):
raise
dispatch[pickle.NEWOBJ[0]] = load_newobj # type: ignore[assignment]


def loads(
Expand All @@ -257,9 +134,9 @@ def patch_pickle() -> Generator[None, None, None]:
"""
Temporarily patch pickle to use our unpickler.
"""
orig_loads = pkl.loads
orig_loads = pickle.loads
try:
setattr(pkl, "loads", loads)
setattr(pickle, "loads", loads)
yield
finally:
setattr(pkl, "loads", orig_loads)
setattr(pickle, "loads", orig_loads)
10 changes: 5 additions & 5 deletions pandas/io/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
import warnings

from pandas.compat import pickle_compat as pc
from pandas.compat import pickle_compat
from pandas.util._decorators import doc

from pandas.core.shared_docs import _shared_docs
Expand Down Expand Up @@ -158,7 +158,7 @@ def read_pickle(

Notes
-----
read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3
read_pickle is only guaranteed to be backwards compatible to pandas 1.0
provided the object was serialized with to_pickle.

Examples
Expand Down Expand Up @@ -195,7 +195,6 @@ def read_pickle(
) as handles:
# 1) try standard library Pickle
# 2) try pickle_compat (older pandas version) to handle subclass changes

try:
with warnings.catch_warnings(record=True):
# We want to silence any warnings about, e.g. moved modules.
Expand All @@ -204,5 +203,6 @@ def read_pickle(
except excs_to_catch:
# e.g.
# "No module named 'pandas.core.sparse.series'"
# "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib"
return pc.load(handles.handle, encoding=None)
# "Can't get attribute '_nat_unpickle' on <module 'pandas._libs.tslib"
handles.handle.seek(0)
return pickle_compat.Unpickler(handles.handle).load()