-
Notifications
You must be signed in to change notification settings - Fork 181
ENH some steps to make cloudpickle dynamic function/classes more deterministic #524
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
3cfaf20
0018fac
edbd021
f16a63f
124826b
00febd0
9e2325c
9e7fa91
7eec003
e90c088
cadb9f2
252989c
9510a0e
5e6314d
8df4ceb
5ed2b21
32191f4
2d3e917
9cb6bfc
e18678f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -126,7 +126,7 @@ def _lookup_class_or_track(class_tracker_id, class_def): | |
|
||
|
||
def register_pickle_by_value(module): | ||
"""Register a module to make it functions and classes picklable by value. | ||
"""Register a module to make its functions and classes picklable by value. | ||
|
||
By default, functions and classes that are attributes of an importable | ||
module are to be pickled by reference, that is relying on re-importing | ||
|
@@ -369,7 +369,7 @@ def func(): | |
# sys.modules. | ||
if name is not None and name.startswith(prefix): | ||
# check whether the function can address the sub-module | ||
tokens = set(name[len(prefix) :].split(".")) | ||
tokens = set(name[len(prefix):].split(".")) | ||
if not tokens - set(code.co_names): | ||
subimports.append(sys.modules[name]) | ||
return subimports | ||
|
@@ -409,7 +409,9 @@ def _walk_global_ops(code): | |
|
||
def _extract_class_dict(cls): | ||
"""Retrieve a copy of the dict of a class without the inherited method.""" | ||
clsdict = dict(cls.__dict__) # copy dict proxy to a dict | ||
# copy dict proxy to a dict. Sort the keys to make the pickle deterministic | ||
clsdict = {k: cls.__dict__[k] for k in sorted(cls.__dict__)} | ||
|
||
if len(cls.__bases__) == 1: | ||
inherited_dict = cls.__bases__[0].__dict__ | ||
else: | ||
|
@@ -533,9 +535,15 @@ class id will also reuse this class definition. | |
The "extra" variable is meant to be a dict (or None) that can be used for | ||
forward compatibility shall the need arise. | ||
""" | ||
# We need to intern the keys of the type_kwargs dict to avoid having | ||
# different pickles for the same dynamic class depending on whether it was | ||
# dynamically created or reconstructed from a pickled stream. | ||
type_kwargs = {sys.intern(k): v for k, v in type_kwargs.items()} | ||
|
||
skeleton_class = types.new_class( | ||
name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs) | ||
) | ||
|
||
return _lookup_class_or_track(class_tracker_id, skeleton_class) | ||
|
||
|
||
|
@@ -694,7 +702,9 @@ def _function_getstate(func): | |
# unpickling time by iterating over slotstate and calling setattr(func, | ||
# slotname, slotvalue) | ||
slotstate = { | ||
"__name__": func.__name__, | ||
# Intern the function names to be consistent with the method names that are | ||
# automatically interned with `setattr`. This is only the case for cpython. | ||
"__name__": sys.intern(func.__name__) if not PYPY else func.__name__, | ||
"__qualname__": func.__qualname__, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any idea why we don't need this treatment for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, at least for the latter, there are still things to solver. I will push a slight change to the test to show what I mean. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The issue with string interning is mostly due to collision between the string defined in two different places: typically method names that are interned as class attributes, vs the dynamic func name. |
||
"__annotations__": func.__annotations__, | ||
"__kwdefaults__": func.__kwdefaults__, | ||
|
@@ -802,6 +812,19 @@ def _code_reduce(obj): | |
# of the specific type from types, for example: | ||
# >>> from types import CodeType | ||
# >>> help(CodeType) | ||
|
||
# We need to intern the function names to be consistent with the method name, | ||
# which are interned automatically with `setattr`. This is only the case for cpython. | ||
co_name = sys.intern(obj.co_name) if not PYPY else obj.co_name | ||
|
||
# Create copies of these tuple to make cloudpickle payload deterministic. | ||
tomMoral marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# When creating a code object during load, copies of these four tuples are | ||
# created, while in the main process, these tuples can be shared. | ||
# By always creating copies, we make sure the resulting payload is deterministic. | ||
co_names = tuple(name for name in obj.co_names) | ||
co_varnames = tuple(name for name in obj.co_varnames) | ||
co_freevars = tuple(name for name in obj.co_freevars) | ||
co_cellvars = tuple(name for name in obj.co_cellvars) | ||
if hasattr(obj, "co_exceptiontable"): | ||
# Python 3.11 and later: there are some new attributes | ||
# related to the enhanced exceptions. | ||
|
@@ -814,16 +837,16 @@ def _code_reduce(obj): | |
obj.co_flags, | ||
obj.co_code, | ||
obj.co_consts, | ||
obj.co_names, | ||
obj.co_varnames, | ||
co_names, | ||
co_varnames, | ||
obj.co_filename, | ||
obj.co_name, | ||
co_name, | ||
obj.co_qualname, | ||
obj.co_firstlineno, | ||
obj.co_linetable, | ||
obj.co_exceptiontable, | ||
obj.co_freevars, | ||
obj.co_cellvars, | ||
co_freevars, | ||
co_cellvars, | ||
) | ||
elif hasattr(obj, "co_linetable"): | ||
# Python 3.10 and later: obj.co_lnotab is deprecated and constructor | ||
|
@@ -837,14 +860,14 @@ def _code_reduce(obj): | |
obj.co_flags, | ||
obj.co_code, | ||
obj.co_consts, | ||
obj.co_names, | ||
obj.co_varnames, | ||
co_names, | ||
co_varnames, | ||
obj.co_filename, | ||
obj.co_name, | ||
co_name, | ||
obj.co_firstlineno, | ||
obj.co_linetable, | ||
obj.co_freevars, | ||
obj.co_cellvars, | ||
co_freevars, | ||
co_cellvars, | ||
) | ||
elif hasattr(obj, "co_nmeta"): # pragma: no cover | ||
# "nogil" Python: modified attributes from 3.9 | ||
|
@@ -859,15 +882,15 @@ def _code_reduce(obj): | |
obj.co_flags, | ||
obj.co_code, | ||
obj.co_consts, | ||
obj.co_varnames, | ||
co_varnames, | ||
obj.co_filename, | ||
obj.co_name, | ||
co_name, | ||
obj.co_firstlineno, | ||
obj.co_lnotab, | ||
obj.co_exc_handlers, | ||
obj.co_jump_table, | ||
obj.co_freevars, | ||
obj.co_cellvars, | ||
co_freevars, | ||
co_cellvars, | ||
obj.co_free2reg, | ||
obj.co_cell2reg, | ||
) | ||
|
@@ -882,14 +905,14 @@ def _code_reduce(obj): | |
obj.co_flags, | ||
obj.co_code, | ||
obj.co_consts, | ||
obj.co_names, | ||
obj.co_varnames, | ||
co_names, | ||
co_varnames, | ||
obj.co_filename, | ||
obj.co_name, | ||
co_name, | ||
obj.co_firstlineno, | ||
obj.co_lnotab, | ||
obj.co_freevars, | ||
obj.co_cellvars, | ||
co_freevars, | ||
co_cellvars, | ||
) | ||
return types.CodeType, args | ||
|
||
|
@@ -1127,6 +1150,12 @@ def _class_setstate(obj, state): | |
if attrname == "_abc_impl": | ||
registry = attr | ||
else: | ||
# Note: attribute names are automatically interned in cpython. This means that to get | ||
# determinist pickling in subprocess, we need to make sure that the dynamic function names | ||
# are also interned since the Pickler's memoizer relies on physical object | ||
# identity to break cycles in the reference graph of the object being | ||
# serialized. | ||
# https://github.com/python/cpython/blob/main/Objects/object.c#L1060 | ||
tomMoral marked this conversation as resolved.
Show resolved
Hide resolved
|
||
setattr(obj, attrname, attr) | ||
if registry is not None: | ||
for subclass in registry: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
|
||
See: tests/test_backward_compat.py | ||
""" | ||
|
||
from . import cloudpickle | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
import pytest | ||
|
||
pytest.register_assert_rewrite("tests.testutils") |
Uh oh!
There was an error while loading. Please reload this page.