Skip to content

Commit 4a5685d

Browse files
serhiy-storchakadiegorusso
authored andcommitted
pythongh-122213: Add notes for pickle serialization errors (pythonGH-122214)
This allows to identify the source of the error.
1 parent c0ba85f commit 4a5685d

File tree

5 files changed

+443
-100
lines changed

5 files changed

+443
-100
lines changed

Doc/whatsnew/3.14.rst

+3
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,9 @@ pickle
221221
* Set the default protocol version on the :mod:`pickle` module to 5.
222222
For more details, please see :ref:`pickle protocols <pickle-protocols>`.
223223

224+
* Add notes for pickle serialization errors that allow to identify the source
225+
of the error.
226+
(Contributed by Serhiy Storchaka in :gh:`122213`.)
224227

225228
symtable
226229
--------

Lib/pickle.py

+131-45
Original file line numberDiff line numberDiff line change
@@ -600,18 +600,22 @@ def save(self, obj, save_persistent_id=True):
600600
self.save_global(obj, rv)
601601
return
602602

603-
# Assert that reduce() returned a tuple
604-
if not isinstance(rv, tuple):
605-
raise PicklingError(f'__reduce__ must return a string or tuple, not {_T(rv)}')
606-
607-
# Assert that it returned an appropriately sized tuple
608-
l = len(rv)
609-
if not (2 <= l <= 6):
610-
raise PicklingError("tuple returned by __reduce__ "
611-
"must contain 2 through 6 elements")
612-
613-
# Save the reduce() output and finally memoize the object
614-
self.save_reduce(obj=obj, *rv)
603+
try:
604+
# Assert that reduce() returned a tuple
605+
if not isinstance(rv, tuple):
606+
raise PicklingError(f'__reduce__ must return a string or tuple, not {_T(rv)}')
607+
608+
# Assert that it returned an appropriately sized tuple
609+
l = len(rv)
610+
if not (2 <= l <= 6):
611+
raise PicklingError("tuple returned by __reduce__ "
612+
"must contain 2 through 6 elements")
613+
614+
# Save the reduce() output and finally memoize the object
615+
self.save_reduce(obj=obj, *rv)
616+
except BaseException as exc:
617+
exc.add_note(f'when serializing {_T(obj)} object')
618+
raise
615619

616620
def persistent_id(self, obj):
617621
# This exists so a subclass can override it
@@ -652,13 +656,25 @@ def save_reduce(self, func, args, state=None, listitems=None,
652656
raise PicklingError(f"first argument to __newobj_ex__() "
653657
f"must be {obj.__class__!r}, not {cls!r}")
654658
if self.proto >= 4:
655-
save(cls)
656-
save(args)
657-
save(kwargs)
659+
try:
660+
save(cls)
661+
except BaseException as exc:
662+
exc.add_note(f'when serializing {_T(obj)} class')
663+
raise
664+
try:
665+
save(args)
666+
save(kwargs)
667+
except BaseException as exc:
668+
exc.add_note(f'when serializing {_T(obj)} __new__ arguments')
669+
raise
658670
write(NEWOBJ_EX)
659671
else:
660672
func = partial(cls.__new__, cls, *args, **kwargs)
661-
save(func)
673+
try:
674+
save(func)
675+
except BaseException as exc:
676+
exc.add_note(f'when serializing {_T(obj)} reconstructor')
677+
raise
662678
save(())
663679
write(REDUCE)
664680
elif self.proto >= 2 and func_name == "__newobj__":
@@ -695,12 +711,28 @@ def save_reduce(self, func, args, state=None, listitems=None,
695711
raise PicklingError(f"first argument to __newobj__() "
696712
f"must be {obj.__class__!r}, not {cls!r}")
697713
args = args[1:]
698-
save(cls)
699-
save(args)
714+
try:
715+
save(cls)
716+
except BaseException as exc:
717+
exc.add_note(f'when serializing {_T(obj)} class')
718+
raise
719+
try:
720+
save(args)
721+
except BaseException as exc:
722+
exc.add_note(f'when serializing {_T(obj)} __new__ arguments')
723+
raise
700724
write(NEWOBJ)
701725
else:
702-
save(func)
703-
save(args)
726+
try:
727+
save(func)
728+
except BaseException as exc:
729+
exc.add_note(f'when serializing {_T(obj)} reconstructor')
730+
raise
731+
try:
732+
save(args)
733+
except BaseException as exc:
734+
exc.add_note(f'when serializing {_T(obj)} reconstructor arguments')
735+
raise
704736
write(REDUCE)
705737

706738
if obj is not None:
@@ -718,23 +750,35 @@ def save_reduce(self, func, args, state=None, listitems=None,
718750
# items and dict items (as (key, value) tuples), or None.
719751

720752
if listitems is not None:
721-
self._batch_appends(listitems)
753+
self._batch_appends(listitems, obj)
722754

723755
if dictitems is not None:
724-
self._batch_setitems(dictitems)
756+
self._batch_setitems(dictitems, obj)
725757

726758
if state is not None:
727759
if state_setter is None:
728-
save(state)
760+
try:
761+
save(state)
762+
except BaseException as exc:
763+
exc.add_note(f'when serializing {_T(obj)} state')
764+
raise
729765
write(BUILD)
730766
else:
731767
# If a state_setter is specified, call it instead of load_build
732768
# to update obj's with its previous state.
733769
# First, push state_setter and its tuple of expected arguments
734770
# (obj, state) onto the stack.
735-
save(state_setter)
771+
try:
772+
save(state_setter)
773+
except BaseException as exc:
774+
exc.add_note(f'when serializing {_T(obj)} state setter')
775+
raise
736776
save(obj) # simple BINGET opcode as obj is already memoized.
737-
save(state)
777+
try:
778+
save(state)
779+
except BaseException as exc:
780+
exc.add_note(f'when serializing {_T(obj)} state')
781+
raise
738782
write(TUPLE2)
739783
# Trigger a state_setter(obj, state) function call.
740784
write(REDUCE)
@@ -914,8 +958,12 @@ def save_tuple(self, obj):
914958
save = self.save
915959
memo = self.memo
916960
if n <= 3 and self.proto >= 2:
917-
for element in obj:
918-
save(element)
961+
for i, element in enumerate(obj):
962+
try:
963+
save(element)
964+
except BaseException as exc:
965+
exc.add_note(f'when serializing {_T(obj)} item {i}')
966+
raise
919967
# Subtle. Same as in the big comment below.
920968
if id(obj) in memo:
921969
get = self.get(memo[id(obj)][0])
@@ -929,8 +977,12 @@ def save_tuple(self, obj):
929977
# has more than 3 elements.
930978
write = self.write
931979
write(MARK)
932-
for element in obj:
933-
save(element)
980+
for i, element in enumerate(obj):
981+
try:
982+
save(element)
983+
except BaseException as exc:
984+
exc.add_note(f'when serializing {_T(obj)} item {i}')
985+
raise
934986

935987
if id(obj) in memo:
936988
# Subtle. d was not in memo when we entered save_tuple(), so
@@ -960,38 +1012,52 @@ def save_list(self, obj):
9601012
self.write(MARK + LIST)
9611013

9621014
self.memoize(obj)
963-
self._batch_appends(obj)
1015+
self._batch_appends(obj, obj)
9641016

9651017
dispatch[list] = save_list
9661018

9671019
_BATCHSIZE = 1000
9681020

969-
def _batch_appends(self, items):
1021+
def _batch_appends(self, items, obj):
9701022
# Helper to batch up APPENDS sequences
9711023
save = self.save
9721024
write = self.write
9731025

9741026
if not self.bin:
975-
for x in items:
976-
save(x)
1027+
for i, x in enumerate(items):
1028+
try:
1029+
save(x)
1030+
except BaseException as exc:
1031+
exc.add_note(f'when serializing {_T(obj)} item {i}')
1032+
raise
9771033
write(APPEND)
9781034
return
9791035

9801036
it = iter(items)
1037+
start = 0
9811038
while True:
9821039
tmp = list(islice(it, self._BATCHSIZE))
9831040
n = len(tmp)
9841041
if n > 1:
9851042
write(MARK)
986-
for x in tmp:
987-
save(x)
1043+
for i, x in enumerate(tmp, start):
1044+
try:
1045+
save(x)
1046+
except BaseException as exc:
1047+
exc.add_note(f'when serializing {_T(obj)} item {i}')
1048+
raise
9881049
write(APPENDS)
9891050
elif n:
990-
save(tmp[0])
1051+
try:
1052+
save(tmp[0])
1053+
except BaseException as exc:
1054+
exc.add_note(f'when serializing {_T(obj)} item {start}')
1055+
raise
9911056
write(APPEND)
9921057
# else tmp is empty, and we're done
9931058
if n < self._BATCHSIZE:
9941059
return
1060+
start += n
9951061

9961062
def save_dict(self, obj):
9971063
if self.bin:
@@ -1000,19 +1066,23 @@ def save_dict(self, obj):
10001066
self.write(MARK + DICT)
10011067

10021068
self.memoize(obj)
1003-
self._batch_setitems(obj.items())
1069+
self._batch_setitems(obj.items(), obj)
10041070

10051071
dispatch[dict] = save_dict
10061072

1007-
def _batch_setitems(self, items):
1073+
def _batch_setitems(self, items, obj):
10081074
# Helper to batch up SETITEMS sequences; proto >= 1 only
10091075
save = self.save
10101076
write = self.write
10111077

10121078
if not self.bin:
10131079
for k, v in items:
10141080
save(k)
1015-
save(v)
1081+
try:
1082+
save(v)
1083+
except BaseException as exc:
1084+
exc.add_note(f'when serializing {_T(obj)} item {k!r}')
1085+
raise
10161086
write(SETITEM)
10171087
return
10181088

@@ -1024,12 +1094,20 @@ def _batch_setitems(self, items):
10241094
write(MARK)
10251095
for k, v in tmp:
10261096
save(k)
1027-
save(v)
1097+
try:
1098+
save(v)
1099+
except BaseException as exc:
1100+
exc.add_note(f'when serializing {_T(obj)} item {k!r}')
1101+
raise
10281102
write(SETITEMS)
10291103
elif n:
10301104
k, v = tmp[0]
10311105
save(k)
1032-
save(v)
1106+
try:
1107+
save(v)
1108+
except BaseException as exc:
1109+
exc.add_note(f'when serializing {_T(obj)} item {k!r}')
1110+
raise
10331111
write(SETITEM)
10341112
# else tmp is empty, and we're done
10351113
if n < self._BATCHSIZE:
@@ -1052,8 +1130,12 @@ def save_set(self, obj):
10521130
n = len(batch)
10531131
if n > 0:
10541132
write(MARK)
1055-
for item in batch:
1056-
save(item)
1133+
try:
1134+
for item in batch:
1135+
save(item)
1136+
except BaseException as exc:
1137+
exc.add_note(f'when serializing {_T(obj)} element')
1138+
raise
10571139
write(ADDITEMS)
10581140
if n < self._BATCHSIZE:
10591141
return
@@ -1068,8 +1150,12 @@ def save_frozenset(self, obj):
10681150
return
10691151

10701152
write(MARK)
1071-
for item in obj:
1072-
save(item)
1153+
try:
1154+
for item in obj:
1155+
save(item)
1156+
except BaseException as exc:
1157+
exc.add_note(f'when serializing {_T(obj)} element')
1158+
raise
10731159

10741160
if id(obj) in self.memo:
10751161
# If the object is already in the memo, this means it is

0 commit comments

Comments
 (0)