@@ -126,7 +126,7 @@ def _lookup_class_or_track(class_tracker_id, class_def):
126
126
127
127
128
128
def register_pickle_by_value (module ):
129
- """Register a module to make it functions and classes picklable by value.
129
+ """Register a module to make its functions and classes picklable by value.
130
130
131
131
By default, functions and classes that are attributes of an importable
132
132
module are to be pickled by reference, that is relying on re-importing
@@ -369,7 +369,7 @@ def func():
369
369
# sys.modules.
370
370
if name is not None and name .startswith (prefix ):
371
371
# check whether the function can address the sub-module
372
- tokens = set (name [len (prefix ) :].split ("." ))
372
+ tokens = set (name [len (prefix ):].split ("." ))
373
373
if not tokens - set (code .co_names ):
374
374
subimports .append (sys .modules [name ])
375
375
return subimports
@@ -409,7 +409,10 @@ def _walk_global_ops(code):
409
409
410
410
def _extract_class_dict (cls ):
411
411
"""Retrieve a copy of the dict of a class without the inherited method."""
412
- clsdict = dict (cls .__dict__ ) # copy dict proxy to a dict
412
+ # Hack to circumvent non-predictable memoization caused by string interning.
413
+ # See the inline comment in _class_setstate for details.
414
+ clsdict = {"" .join (k ): cls .__dict__ [k ] for k in sorted (cls .__dict__ )}
415
+
413
416
if len (cls .__bases__ ) == 1 :
414
417
inherited_dict = cls .__bases__ [0 ].__dict__
415
418
else :
@@ -533,9 +536,15 @@ class id will also reuse this class definition.
533
536
The "extra" variable is meant to be a dict (or None) that can be used for
534
537
forward compatibility shall the need arise.
535
538
"""
539
+ # We need to intern the keys of the type_kwargs dict to avoid having
540
+ # different pickles for the same dynamic class depending on whether it was
541
+ # dynamically created or reconstructed from a pickled stream.
542
+ type_kwargs = {sys .intern (k ): v for k , v in type_kwargs .items ()}
543
+
536
544
skeleton_class = types .new_class (
537
545
name , bases , {"metaclass" : type_constructor }, lambda ns : ns .update (type_kwargs )
538
546
)
547
+
539
548
return _lookup_class_or_track (class_tracker_id , skeleton_class )
540
549
541
550
@@ -694,7 +703,9 @@ def _function_getstate(func):
694
703
# unpickling time by iterating over slotstate and calling setattr(func,
695
704
# slotname, slotvalue)
696
705
slotstate = {
697
- "__name__" : func .__name__ ,
706
+ # Hack to circumvent non-predictable memoization caused by string interning.
707
+ # See the inline comment in _class_setstate for details.
708
+ "__name__" : "" .join (func .__name__ ),
698
709
"__qualname__" : func .__qualname__ ,
699
710
"__annotations__" : func .__annotations__ ,
700
711
"__kwdefaults__" : func .__kwdefaults__ ,
@@ -721,7 +732,9 @@ def _function_getstate(func):
721
732
)
722
733
slotstate ["__globals__" ] = f_globals
723
734
724
- state = func .__dict__
735
+ # Hack to circumvent non-predictable memoization caused by string interning.
736
+ # See the inline comment in _class_setstate for details.
737
+ state = {"" .join (k ): v for k , v in func .__dict__ .items ()}
725
738
return state , slotstate
726
739
727
740
@@ -802,6 +815,19 @@ def _code_reduce(obj):
802
815
# of the specific type from types, for example:
803
816
# >>> from types import CodeType
804
817
# >>> help(CodeType)
818
+
819
+ # Hack to circumvent non-predictable memoization caused by string interning.
820
+ # See the inline comment in _class_setstate for details.
821
+ co_name = "" .join (obj .co_name )
822
+
823
+ # Create shallow copies of these tuple to make cloudpickle payload deterministic.
824
+ # When creating a code object during load, copies of these four tuples are
825
+ # created, while in the main process, these tuples can be shared.
826
+ # By always creating copies, we make sure the resulting payload is deterministic.
827
+ co_names = tuple (name for name in obj .co_names )
828
+ co_varnames = tuple (name for name in obj .co_varnames )
829
+ co_freevars = tuple (name for name in obj .co_freevars )
830
+ co_cellvars = tuple (name for name in obj .co_cellvars )
805
831
if hasattr (obj , "co_exceptiontable" ):
806
832
# Python 3.11 and later: there are some new attributes
807
833
# related to the enhanced exceptions.
@@ -814,16 +840,16 @@ def _code_reduce(obj):
814
840
obj .co_flags ,
815
841
obj .co_code ,
816
842
obj .co_consts ,
817
- obj . co_names ,
818
- obj . co_varnames ,
843
+ co_names ,
844
+ co_varnames ,
819
845
obj .co_filename ,
820
- obj . co_name ,
846
+ co_name ,
821
847
obj .co_qualname ,
822
848
obj .co_firstlineno ,
823
849
obj .co_linetable ,
824
850
obj .co_exceptiontable ,
825
- obj . co_freevars ,
826
- obj . co_cellvars ,
851
+ co_freevars ,
852
+ co_cellvars ,
827
853
)
828
854
elif hasattr (obj , "co_linetable" ):
829
855
# Python 3.10 and later: obj.co_lnotab is deprecated and constructor
@@ -837,14 +863,14 @@ def _code_reduce(obj):
837
863
obj .co_flags ,
838
864
obj .co_code ,
839
865
obj .co_consts ,
840
- obj . co_names ,
841
- obj . co_varnames ,
866
+ co_names ,
867
+ co_varnames ,
842
868
obj .co_filename ,
843
- obj . co_name ,
869
+ co_name ,
844
870
obj .co_firstlineno ,
845
871
obj .co_linetable ,
846
- obj . co_freevars ,
847
- obj . co_cellvars ,
872
+ co_freevars ,
873
+ co_cellvars ,
848
874
)
849
875
elif hasattr (obj , "co_nmeta" ): # pragma: no cover
850
876
# "nogil" Python: modified attributes from 3.9
@@ -859,15 +885,15 @@ def _code_reduce(obj):
859
885
obj .co_flags ,
860
886
obj .co_code ,
861
887
obj .co_consts ,
862
- obj . co_varnames ,
888
+ co_varnames ,
863
889
obj .co_filename ,
864
- obj . co_name ,
890
+ co_name ,
865
891
obj .co_firstlineno ,
866
892
obj .co_lnotab ,
867
893
obj .co_exc_handlers ,
868
894
obj .co_jump_table ,
869
- obj . co_freevars ,
870
- obj . co_cellvars ,
895
+ co_freevars ,
896
+ co_cellvars ,
871
897
obj .co_free2reg ,
872
898
obj .co_cell2reg ,
873
899
)
@@ -882,14 +908,14 @@ def _code_reduce(obj):
882
908
obj .co_flags ,
883
909
obj .co_code ,
884
910
obj .co_consts ,
885
- obj . co_names ,
886
- obj . co_varnames ,
911
+ co_names ,
912
+ co_varnames ,
887
913
obj .co_filename ,
888
- obj . co_name ,
914
+ co_name ,
889
915
obj .co_firstlineno ,
890
916
obj .co_lnotab ,
891
- obj . co_freevars ,
892
- obj . co_cellvars ,
917
+ co_freevars ,
918
+ co_cellvars ,
893
919
)
894
920
return types .CodeType , args
895
921
@@ -1127,6 +1153,18 @@ def _class_setstate(obj, state):
1127
1153
if attrname == "_abc_impl" :
1128
1154
registry = attr
1129
1155
else :
1156
+ # Note: setting attribute names on a class automatically triggers their
1157
+ # interning in CPython:
1158
+ # https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957
1159
+ #
1160
+ # This means that to get deterministic pickling for a dynamic class that
1161
+ # was initially defined in a different Python process, the pickler
1162
+ # needs to ensure that dynamic class and function attribute names are
1163
+ # systematically copied into a non-interned version to avoid
1164
+ # unpredictable pickle payloads.
1165
+ #
1166
+ # Indeed the Pickler's memoizer relies on physical object identity to break
1167
+ # cycles in the reference graph of the object being serialized.
1130
1168
setattr (obj , attrname , attr )
1131
1169
if registry is not None :
1132
1170
for subclass in registry :
0 commit comments