Skip to content

Commit 44e4c47

Browse files
authored
GH-124715: Move trashcan mechanism into Py_Dealloc (GH-132280)
1 parent 0f23e84 commit 44e4c47

26 files changed

+88
-196
lines changed

Include/cpython/object.h

+3-70
Original file line numberDiff line numberDiff line change
@@ -429,81 +429,14 @@ PyAPI_FUNC(void) _Py_NO_RETURN _PyObject_AssertFailed(
429429
const char *function);
430430

431431

432-
/* Trashcan mechanism, thanks to Christian Tismer.
433-
434-
When deallocating a container object, it's possible to trigger an unbounded
435-
chain of deallocations, as each Py_DECREF in turn drops the refcount on "the
436-
next" object in the chain to 0. This can easily lead to stack overflows,
437-
especially in threads (which typically have less stack space to work with).
438-
439-
A container object can avoid this by bracketing the body of its tp_dealloc
440-
function with a pair of macros:
441-
442-
static void
443-
mytype_dealloc(mytype *p)
444-
{
445-
... declarations go here ...
446-
447-
PyObject_GC_UnTrack(p); // must untrack first
448-
Py_TRASHCAN_BEGIN(p, mytype_dealloc)
449-
... The body of the deallocator goes here, including all calls ...
450-
... to Py_DECREF on contained objects. ...
451-
Py_TRASHCAN_END // there should be no code after this
452-
}
453-
454-
CAUTION: Never return from the middle of the body! If the body needs to
455-
"get out early", put a label immediately before the Py_TRASHCAN_END
456-
call, and goto it. Else the call-depth counter (see below) will stay
457-
above 0 forever, and the trashcan will never get emptied.
458-
459-
How it works: The BEGIN macro increments a call-depth counter. So long
460-
as this counter is small, the body of the deallocator is run directly without
461-
further ado. But if the counter gets large, it instead adds p to a list of
462-
objects to be deallocated later, skips the body of the deallocator, and
463-
resumes execution after the END macro. The tp_dealloc routine then returns
464-
without deallocating anything (and so unbounded call-stack depth is avoided).
465-
466-
When the call stack finishes unwinding again, code generated by the END macro
467-
notices this, and calls another routine to deallocate all the objects that
468-
may have been added to the list of deferred deallocations. In effect, a
469-
chain of N deallocations is broken into (N-1)/(Py_TRASHCAN_HEADROOM-1) pieces,
470-
with the call stack never exceeding a depth of Py_TRASHCAN_HEADROOM.
471-
472-
Since the tp_dealloc of a subclass typically calls the tp_dealloc of the base
473-
class, we need to ensure that the trashcan is only triggered on the tp_dealloc
474-
of the actual class being deallocated. Otherwise we might end up with a
475-
partially-deallocated object. To check this, the tp_dealloc function must be
476-
passed as second argument to Py_TRASHCAN_BEGIN().
477-
*/
478-
479-
480432
PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op);
481433
PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(PyThreadState *tstate);
482434

483-
484-
/* Python 3.10 private API, invoked by the Py_TRASHCAN_BEGIN(). */
485-
486-
/* To avoid raising recursion errors during dealloc trigger trashcan before we reach
487-
* recursion limit. To avoid trashing, we don't attempt to empty the trashcan until
488-
* we have headroom above the trigger limit */
489-
#define Py_TRASHCAN_HEADROOM 50
490-
491-
/* Helper function for Py_TRASHCAN_BEGIN */
492435
PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count);
493436

494-
#define Py_TRASHCAN_BEGIN(op, dealloc) \
495-
do { \
496-
PyThreadState *tstate = PyThreadState_Get(); \
497-
if (_Py_ReachedRecursionLimitWithMargin(tstate, 2) && Py_TYPE(op)->tp_dealloc == (destructor)dealloc) { \
498-
_PyTrash_thread_deposit_object(tstate, (PyObject *)op); \
499-
break; \
500-
}
501-
/* The body of the deallocator is here. */
502-
#define Py_TRASHCAN_END \
503-
if (tstate->delete_later && !_Py_ReachedRecursionLimitWithMargin(tstate, 4)) { \
504-
_PyTrash_thread_destroy_chain(tstate); \
505-
} \
506-
} while (0);
437+
/* For backwards compatibility with the old trashcan mechanism */
438+
#define Py_TRASHCAN_BEGIN(op, dealloc)
439+
#define Py_TRASHCAN_END
507440

508441

509442
PyAPI_FUNC(void *) PyObject_GetItemData(PyObject *obj);

Include/internal/pycore_ceval.h

+1-25
Original file line numberDiff line numberDiff line change
@@ -196,25 +196,6 @@ extern void _PyEval_DeactivateOpCache(void);
196196

197197
/* --- _Py_EnterRecursiveCall() ----------------------------------------- */
198198

199-
#if !_Py__has_builtin(__builtin_frame_address) && !defined(_MSC_VER)
200-
static uintptr_t return_pointer_as_int(char* p) {
201-
return (uintptr_t)p;
202-
}
203-
#endif
204-
205-
static inline uintptr_t
206-
_Py_get_machine_stack_pointer(void) {
207-
#if _Py__has_builtin(__builtin_frame_address)
208-
return (uintptr_t)__builtin_frame_address(0);
209-
#elif defined(_MSC_VER)
210-
return (uintptr_t)_AddressOfReturnAddress();
211-
#else
212-
char here;
213-
/* Avoid compiler warning about returning stack address */
214-
return return_pointer_as_int(&here);
215-
#endif
216-
}
217-
218199
static inline int _Py_MakeRecCheck(PyThreadState *tstate) {
219200
uintptr_t here_addr = _Py_get_machine_stack_pointer();
220201
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
@@ -249,12 +230,7 @@ PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate);
249230
static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) {
250231
uintptr_t here_addr = _Py_get_machine_stack_pointer();
251232
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
252-
if (here_addr > _tstate->c_stack_soft_limit) {
253-
return 0;
254-
}
255-
if (_tstate->c_stack_hard_limit == 0) {
256-
_Py_InitializeRecursionLimits(tstate);
257-
}
233+
assert(_tstate->c_stack_hard_limit != 0);
258234
return here_addr <= _tstate->c_stack_soft_limit;
259235
}
260236

Include/internal/pycore_pystate.h

+29
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ extern "C" {
99
#endif
1010

1111
#include "pycore_typedefs.h" // _PyRuntimeState
12+
#include "pycore_tstate.h"
1213

1314

1415
// Values for PyThreadState.state. A thread must be in the "attached" state
@@ -299,6 +300,34 @@ _Py_AssertHoldsTstateFunc(const char *func)
299300
#define _Py_AssertHoldsTstate()
300301
#endif
301302

303+
#if !_Py__has_builtin(__builtin_frame_address) && !defined(_MSC_VER)
304+
static uintptr_t return_pointer_as_int(char* p) {
305+
return (uintptr_t)p;
306+
}
307+
#endif
308+
309+
static inline uintptr_t
310+
_Py_get_machine_stack_pointer(void) {
311+
#if _Py__has_builtin(__builtin_frame_address)
312+
return (uintptr_t)__builtin_frame_address(0);
313+
#elif defined(_MSC_VER)
314+
return (uintptr_t)_AddressOfReturnAddress();
315+
#else
316+
char here;
317+
/* Avoid compiler warning about returning stack address */
318+
return return_pointer_as_int(&here);
319+
#endif
320+
}
321+
322+
static inline intptr_t
323+
_Py_RecursionLimit_GetMargin(PyThreadState *tstate)
324+
{
325+
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
326+
assert(_tstate->c_stack_hard_limit != 0);
327+
intptr_t here_addr = _Py_get_machine_stack_pointer();
328+
return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, here_addr - (intptr_t)_tstate->c_stack_soft_limit, PYOS_STACK_MARGIN_SHIFT);
329+
}
330+
302331
#ifdef __cplusplus
303332
}
304333
#endif

Include/pythonrun.h

+12-4
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,25 @@ PyAPI_DATA(int) (*PyOS_InputHook)(void);
2626
* apart. In practice, that means it must be larger than the C
2727
* stack consumption of PyEval_EvalDefault */
2828
#if defined(_Py_ADDRESS_SANITIZER) || defined(_Py_THREAD_SANITIZER)
29-
# define PYOS_STACK_MARGIN 4096
29+
# define PYOS_LOG2_STACK_MARGIN 12
3030
#elif defined(Py_DEBUG) && defined(WIN32)
31-
# define PYOS_STACK_MARGIN 4096
31+
# define PYOS_LOG2_STACK_MARGIN 12
3232
#elif defined(__wasi__)
3333
/* Web assembly has two stacks, so this isn't really a size */
34-
# define PYOS_STACK_MARGIN 500
34+
# define PYOS_LOG2_STACK_MARGIN 9
3535
#else
36-
# define PYOS_STACK_MARGIN 2048
36+
# define PYOS_LOG2_STACK_MARGIN 11
3737
#endif
38+
#define PYOS_STACK_MARGIN (1 << PYOS_LOG2_STACK_MARGIN)
3839
#define PYOS_STACK_MARGIN_BYTES (PYOS_STACK_MARGIN * sizeof(void *))
3940

41+
#if SIZEOF_VOID_P == 8
42+
#define PYOS_STACK_MARGIN_SHIFT (PYOS_LOG2_STACK_MARGIN + 3)
43+
#else
44+
#define PYOS_STACK_MARGIN_SHIFT (PYOS_LOG2_STACK_MARGIN + 2)
45+
#endif
46+
47+
4048
#if defined(WIN32)
4149
#define USE_STACKCHECK
4250
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Prevents against stack overflows when calling :c:func:`Py_DECREF`. Third-party
2+
extension objects no longer need to use the "trashcan" mechanism, as
3+
protection is now built into the :c:func:`Py_DECREF` macro.

Modules/_elementtree.c

-2
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,6 @@ element_dealloc(PyObject *op)
689689

690690
/* bpo-31095: UnTrack is needed before calling any callbacks */
691691
PyObject_GC_UnTrack(self);
692-
Py_TRASHCAN_BEGIN(self, element_dealloc)
693692

694693
if (self->weakreflist != NULL)
695694
PyObject_ClearWeakRefs(op);
@@ -700,7 +699,6 @@ element_dealloc(PyObject *op)
700699

701700
tp->tp_free(self);
702701
Py_DECREF(tp);
703-
Py_TRASHCAN_END
704702
}
705703

706704
/* -------------------------------------------------------------------- */

Objects/descrobject.c

-2
Original file line numberDiff line numberDiff line change
@@ -1311,11 +1311,9 @@ wrapper_dealloc(PyObject *self)
13111311
{
13121312
wrapperobject *wp = (wrapperobject *)self;
13131313
PyObject_GC_UnTrack(wp);
1314-
Py_TRASHCAN_BEGIN(wp, wrapper_dealloc)
13151314
Py_XDECREF(wp->descr);
13161315
Py_XDECREF(wp->self);
13171316
PyObject_GC_Del(wp);
1318-
Py_TRASHCAN_END
13191317
}
13201318

13211319
static PyObject *

Objects/dictobject.c

-2
Original file line numberDiff line numberDiff line change
@@ -3285,7 +3285,6 @@ dict_dealloc(PyObject *self)
32853285

32863286
/* bpo-31095: UnTrack is needed before calling any callbacks */
32873287
PyObject_GC_UnTrack(mp);
3288-
Py_TRASHCAN_BEGIN(mp, dict_dealloc)
32893288
if (values != NULL) {
32903289
if (values->embedded == 0) {
32913290
for (i = 0, n = values->capacity; i < n; i++) {
@@ -3305,7 +3304,6 @@ dict_dealloc(PyObject *self)
33053304
else {
33063305
Py_TYPE(mp)->tp_free((PyObject *)mp);
33073306
}
3308-
Py_TRASHCAN_END
33093307
}
33103308

33113309

Objects/exceptions.c

-2
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,8 @@ BaseException_dealloc(PyObject *op)
150150
// bpo-44348: The trashcan mechanism prevents stack overflow when deleting
151151
// long chains of exceptions. For example, exceptions can be chained
152152
// through the __context__ attributes or the __traceback__ attribute.
153-
Py_TRASHCAN_BEGIN(self, BaseException_dealloc)
154153
(void)BaseException_clear(op);
155154
Py_TYPE(self)->tp_free(self);
156-
Py_TRASHCAN_END
157155
}
158156

159157
static int

Objects/frameobject.c

-2
Original file line numberDiff line numberDiff line change
@@ -1917,7 +1917,6 @@ frame_dealloc(PyObject *op)
19171917
_PyObject_GC_UNTRACK(f);
19181918
}
19191919

1920-
Py_TRASHCAN_BEGIN(f, frame_dealloc);
19211920
/* GH-106092: If f->f_frame was on the stack and we reached the maximum
19221921
* nesting depth for deallocations, the trashcan may have delayed this
19231922
* deallocation until after f->f_frame is freed. Avoid dereferencing
@@ -1942,7 +1941,6 @@ frame_dealloc(PyObject *op)
19421941
Py_CLEAR(f->f_locals_cache);
19431942
Py_CLEAR(f->f_overwritten_fast_locals);
19441943
PyObject_GC_Del(f);
1945-
Py_TRASHCAN_END;
19461944
}
19471945

19481946
static int

Objects/listobject.c

-2
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,6 @@ list_dealloc(PyObject *self)
550550
PyListObject *op = (PyListObject *)self;
551551
Py_ssize_t i;
552552
PyObject_GC_UnTrack(op);
553-
Py_TRASHCAN_BEGIN(op, list_dealloc)
554553
if (op->ob_item != NULL) {
555554
/* Do it backwards, for Christian Tismer.
556555
There's a simple test case where somehow this reduces
@@ -569,7 +568,6 @@ list_dealloc(PyObject *self)
569568
else {
570569
PyObject_GC_Del(op);
571570
}
572-
Py_TRASHCAN_END
573571
}
574572

575573
static PyObject *

Objects/methodobject.c

-4
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,7 @@ static void
166166
meth_dealloc(PyObject *self)
167167
{
168168
PyCFunctionObject *m = _PyCFunctionObject_CAST(self);
169-
// The Py_TRASHCAN mechanism requires that we be able to
170-
// call PyObject_GC_UnTrack twice on an object.
171169
PyObject_GC_UnTrack(m);
172-
Py_TRASHCAN_BEGIN(m, meth_dealloc);
173170
if (m->m_weakreflist != NULL) {
174171
PyObject_ClearWeakRefs((PyObject*) m);
175172
}
@@ -190,7 +187,6 @@ meth_dealloc(PyObject *self)
190187
assert(Py_IS_TYPE(self, &PyCFunction_Type));
191188
_Py_FREELIST_FREE(pycfunctionobject, m, PyObject_GC_Del);
192189
}
193-
Py_TRASHCAN_END;
194190
}
195191

196192
static PyObject *

Objects/object.c

+26-5
Original file line numberDiff line numberDiff line change
@@ -2913,13 +2913,15 @@ Py_ReprLeave(PyObject *obj)
29132913
void
29142914
_PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op)
29152915
{
2916-
_PyObject_ASSERT(op, _PyObject_IS_GC(op));
2917-
_PyObject_ASSERT(op, !_PyObject_GC_IS_TRACKED(op));
29182916
_PyObject_ASSERT(op, Py_REFCNT(op) == 0);
29192917
#ifdef Py_GIL_DISABLED
29202918
op->ob_tid = (uintptr_t)tstate->delete_later;
29212919
#else
2922-
_PyGCHead_SET_PREV(_Py_AS_GC(op), (PyGC_Head*)tstate->delete_later);
2920+
/* Store the delete_later pointer in the refcnt field.
2921+
* As this object may still be tracked by the GC,
2922+
* it is important that we never store 0 (NULL). */
2923+
uintptr_t refcnt = (uintptr_t)tstate->delete_later;
2924+
*((uintptr_t*)op) = refcnt+1;
29232925
#endif
29242926
tstate->delete_later = op;
29252927
}
@@ -2938,7 +2940,11 @@ _PyTrash_thread_destroy_chain(PyThreadState *tstate)
29382940
op->ob_tid = 0;
29392941
_Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, _Py_REF_MERGED);
29402942
#else
2941-
tstate->delete_later = (PyObject*) _PyGCHead_PREV(_Py_AS_GC(op));
2943+
/* Get the delete_later pointer from the refcnt field.
2944+
* See _PyTrash_thread_deposit_object(). */
2945+
uintptr_t refcnt = *((uintptr_t*)op);
2946+
tstate->delete_later = (PyObject *)(refcnt - 1);
2947+
op->ob_refcnt = 0;
29422948
#endif
29432949

29442950
/* Call the deallocator directly. This used to try to
@@ -3003,13 +3009,25 @@ _PyObject_AssertFailed(PyObject *obj, const char *expr, const char *msg,
30033009
}
30043010

30053011

3012+
/*
3013+
When deallocating a container object, it's possible to trigger an unbounded
3014+
chain of deallocations, as each Py_DECREF in turn drops the refcount on "the
3015+
next" object in the chain to 0. This can easily lead to stack overflows.
3016+
To avoid that, if the C stack is nearing its limit, instead of calling
3017+
dealloc on the object, it is added to a queue to be freed later when the
3018+
stack is shallower */
30063019
void
30073020
_Py_Dealloc(PyObject *op)
30083021
{
30093022
PyTypeObject *type = Py_TYPE(op);
30103023
destructor dealloc = type->tp_dealloc;
3011-
#ifdef Py_DEBUG
30123024
PyThreadState *tstate = _PyThreadState_GET();
3025+
intptr_t margin = _Py_RecursionLimit_GetMargin(tstate);
3026+
if (margin < 2) {
3027+
_PyTrash_thread_deposit_object(tstate, (PyObject *)op);
3028+
return;
3029+
}
3030+
#ifdef Py_DEBUG
30133031
#if !defined(Py_GIL_DISABLED) && !defined(Py_STACKREF_DEBUG)
30143032
/* This assertion doesn't hold for the free-threading build, as
30153033
* PyStackRef_CLOSE_SPECIALIZED is not implemented */
@@ -3051,6 +3069,9 @@ _Py_Dealloc(PyObject *op)
30513069
Py_XDECREF(old_exc);
30523070
Py_DECREF(type);
30533071
#endif
3072+
if (tstate->delete_later && margin >= 4) {
3073+
_PyTrash_thread_destroy_chain(tstate);
3074+
}
30543075
}
30553076

30563077

Objects/odictobject.c

-3
Original file line numberDiff line numberDiff line change
@@ -1389,16 +1389,13 @@ odict_dealloc(PyObject *op)
13891389
{
13901390
PyODictObject *self = _PyODictObject_CAST(op);
13911391
PyObject_GC_UnTrack(self);
1392-
Py_TRASHCAN_BEGIN(self, odict_dealloc)
13931392

13941393
Py_XDECREF(self->od_inst_dict);
13951394
if (self->od_weakreflist != NULL)
13961395
PyObject_ClearWeakRefs((PyObject *)self);
13971396

13981397
_odict_clear_nodes(self);
13991398
PyDict_Type.tp_dealloc((PyObject *)self);
1400-
1401-
Py_TRASHCAN_END
14021399
}
14031400

14041401
/* tp_repr */

0 commit comments

Comments
 (0)