Skip to content

Commit 052cb71

Browse files
authored
gh-124878: Fix race conditions during interpreter finalization (#130649)
The PyThreadState field gains a reference count field to avoid issues with PyThreadState being a dangling pointer to freed memory. The refcount starts with a value of two: one reference is owned by the interpreter's linked list of thread states and one reference is owned by the OS thread. The reference count is decremented when the thread state is removed from the interpreter's linked list and before the OS thread calls `PyThread_hang_thread()`. The thread that decrements it to zero frees the `PyThreadState` memory. The `holds_gil` field is moved out of the `_status` bit field, to avoid a data race where on thread calls `PyThreadState_Clear()`, modifying the `_status` bit field while the OS thread reads `holds_gil` when attempting to acquire the GIL. The `PyThreadState.state` field now has `_Py_THREAD_SHUTTING_DOWN` as a possible value. This corresponds to the `_PyThreadState_MustExit()` check. This avoids race conditions in the free threading build when checking `_PyThreadState_MustExit()`.
1 parent c6dd234 commit 052cb71

13 files changed

+109
-81
lines changed

Include/cpython/pystate.h

+5-3
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,14 @@ struct _ts {
8383
unsigned int bound_gilstate:1;
8484
/* Currently in use (maybe holds the GIL). */
8585
unsigned int active:1;
86-
/* Currently holds the GIL. */
87-
unsigned int holds_gil:1;
8886

8987
/* various stages of finalization */
9088
unsigned int finalizing:1;
9189
unsigned int cleared:1;
9290
unsigned int finalized:1;
9391

9492
/* padding to align to 4 bytes */
95-
unsigned int :23;
93+
unsigned int :24;
9694
} _status;
9795
#ifdef Py_BUILD_CORE
9896
# define _PyThreadState_WHENCE_NOTSET -1
@@ -103,6 +101,10 @@ struct _ts {
103101
# define _PyThreadState_WHENCE_GILSTATE 4
104102
# define _PyThreadState_WHENCE_EXEC 5
105103
#endif
104+
105+
/* Currently holds the GIL. Must be its own field to avoid data races */
106+
int holds_gil;
107+
106108
int _whence;
107109

108110
/* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED).

Include/internal/pycore_pystate.h

+16-5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ extern "C" {
2727
// "suspended" state. Only the thread performing a stop-the-world pause may
2828
// transition a thread from the "suspended" state back to the "detached" state.
2929
//
30+
// The "shutting down" state is used when the interpreter is being finalized.
31+
// Threads in this state can't do anything other than block the OS thread.
32+
// (See _PyThreadState_HangThread).
33+
//
3034
// State transition diagram:
3135
//
3236
// (bound thread) (stop-the-world thread)
@@ -37,9 +41,10 @@ extern "C" {
3741
//
3842
// The (bound thread) and (stop-the-world thread) labels indicate which thread
3943
// is allowed to perform the transition.
40-
#define _Py_THREAD_DETACHED 0
41-
#define _Py_THREAD_ATTACHED 1
42-
#define _Py_THREAD_SUSPENDED 2
44+
#define _Py_THREAD_DETACHED 0
45+
#define _Py_THREAD_ATTACHED 1
46+
#define _Py_THREAD_SUSPENDED 2
47+
#define _Py_THREAD_SHUTTING_DOWN 3
4348

4449

4550
/* Check if the current thread is the main thread.
@@ -118,7 +123,8 @@ extern _Py_thread_local PyThreadState *_Py_tss_tstate;
118123
extern int _PyThreadState_CheckConsistency(PyThreadState *tstate);
119124
#endif
120125

121-
int _PyThreadState_MustExit(PyThreadState *tstate);
126+
extern int _PyThreadState_MustExit(PyThreadState *tstate);
127+
extern void _PyThreadState_HangThread(PyThreadState *tstate);
122128

123129
// Export for most shared extensions, used via _PyThreadState_GET() static
124130
// inline function.
@@ -169,6 +175,11 @@ extern void _PyThreadState_Detach(PyThreadState *tstate);
169175
// to the "detached" state.
170176
extern void _PyThreadState_Suspend(PyThreadState *tstate);
171177

178+
// Mark the thread state as "shutting down". This is used during interpreter
179+
// and runtime finalization. The thread may no longer attach to the
180+
// interpreter and will instead block via _PyThreadState_HangThread().
181+
extern void _PyThreadState_SetShuttingDown(PyThreadState *tstate);
182+
172183
// Perform a stop-the-world pause for all threads in the all interpreters.
173184
//
174185
// Threads in the "attached" state are paused and transitioned to the "GC"
@@ -238,7 +249,7 @@ PyAPI_FUNC(PyThreadState *) _PyThreadState_NewBound(
238249
PyInterpreterState *interp,
239250
int whence);
240251
extern PyThreadState * _PyThreadState_RemoveExcept(PyThreadState *tstate);
241-
extern void _PyThreadState_DeleteList(PyThreadState *list);
252+
extern void _PyThreadState_DeleteList(PyThreadState *list, int is_after_fork);
242253
extern void _PyThreadState_ClearMimallocHeaps(PyThreadState *tstate);
243254

244255
// Export for '_testinternalcapi' shared extension

Include/internal/pycore_runtime_init.h

+2
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ extern PyTypeObject _PyExc_MemoryError;
171171
#define _PyThreadStateImpl_INIT \
172172
{ \
173173
.base = _PyThreadState_INIT, \
174+
/* The thread and the interpreter's linked list hold a reference */ \
175+
.refcount = 2, \
174176
}
175177

176178
#define _PyThreadState_INIT \

Include/internal/pycore_tstate.h

+4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ typedef struct _PyThreadStateImpl {
2121
// semi-public fields are in PyThreadState.
2222
PyThreadState base;
2323

24+
// The reference count field is used to synchronize deallocation of the
25+
// thread state during runtime finalization.
26+
Py_ssize_t refcount;
27+
2428
// These are addresses, but we need to convert to ints to avoid UB.
2529
uintptr_t c_stack_top;
2630
uintptr_t c_stack_soft_limit;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix race conditions during runtime finalization that could lead to accessing
2+
freed memory.

Modules/_threadmodule.c

-3
Original file line numberDiff line numberDiff line change
@@ -333,9 +333,6 @@ thread_run(void *boot_raw)
333333
// _PyRuntimeState_SetFinalizing() is called. At this point, all Python
334334
// threads must exit, except of the thread calling Py_Finalize() which
335335
// holds the GIL and must not exit.
336-
//
337-
// At this stage, tstate can be a dangling pointer (point to freed memory),
338-
// it's ok to call _PyThreadState_MustExit() with a dangling pointer.
339336
if (_PyThreadState_MustExit(tstate)) {
340337
// Don't call PyThreadState_Clear() nor _PyThreadState_DeleteCurrent().
341338
// These functions are called on tstate indirectly by Py_Finalize()

Modules/posixmodule.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ PyOS_AfterFork_Child(void)
715715
// may call destructors.
716716
PyThreadState *list = _PyThreadState_RemoveExcept(tstate);
717717
_PyEval_StartTheWorldAll(&_PyRuntime);
718-
_PyThreadState_DeleteList(list);
718+
_PyThreadState_DeleteList(list, /*is_after_fork=*/1);
719719

720720
_PyImport_ReInitLock(tstate->interp);
721721
_PyImport_ReleaseLock(tstate->interp);

Python/ceval_gil.c

+9-10
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
#include "pycore_pyerrors.h" // _PyErr_GetRaisedException()
77
#include "pycore_pylifecycle.h" // _PyErr_Print()
88
#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
9+
#include "pycore_pystate.h" // PyThread_hang_thread()
910
#include "pycore_pystats.h" // _Py_PrintSpecializationStats()
10-
#include "pycore_pythread.h" // PyThread_hang_thread()
1111

1212
/*
1313
Notes about the implementation:
@@ -206,7 +206,7 @@ drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil)
206206
_Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1);
207207
_Py_atomic_store_int_relaxed(&gil->locked, 0);
208208
if (tstate != NULL) {
209-
tstate->_status.holds_gil = 0;
209+
tstate->holds_gil = 0;
210210
}
211211
COND_SIGNAL(gil->cond);
212212
MUTEX_UNLOCK(gil->mutex);
@@ -231,7 +231,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate, int final_release)
231231
// Check if we have the GIL before dropping it. tstate will be NULL if
232232
// take_gil() detected that this thread has been destroyed, in which case
233233
// we know we have the GIL.
234-
if (tstate != NULL && !tstate->_status.holds_gil) {
234+
if (tstate != NULL && !tstate->holds_gil) {
235235
return;
236236
}
237237
#endif
@@ -296,15 +296,14 @@ take_gil(PyThreadState *tstate)
296296
thread which called Py_Finalize(), this thread cannot continue.
297297
298298
This code path can be reached by a daemon thread after Py_Finalize()
299-
completes. In this case, tstate is a dangling pointer: points to
300-
PyThreadState freed memory.
299+
completes.
301300
302301
This used to call a *thread_exit API, but that was not safe as it
303302
lacks stack unwinding and local variable destruction important to
304303
C++. gh-87135: The best that can be done is to hang the thread as
305304
the public APIs calling this have no error reporting mechanism (!).
306305
*/
307-
PyThread_hang_thread();
306+
_PyThreadState_HangThread(tstate);
308307
}
309308

310309
assert(_PyThreadState_CheckConsistency(tstate));
@@ -353,7 +352,7 @@ take_gil(PyThreadState *tstate)
353352
}
354353
// gh-87135: hang the thread as *thread_exit() is not a safe
355354
// API. It lacks stack unwind and local variable destruction.
356-
PyThread_hang_thread();
355+
_PyThreadState_HangThread(tstate);
357356
}
358357
assert(_PyThreadState_CheckConsistency(tstate));
359358

@@ -404,11 +403,11 @@ take_gil(PyThreadState *tstate)
404403
/* tstate could be a dangling pointer, so don't pass it to
405404
drop_gil(). */
406405
drop_gil(interp, NULL, 1);
407-
PyThread_hang_thread();
406+
_PyThreadState_HangThread(tstate);
408407
}
409408
assert(_PyThreadState_CheckConsistency(tstate));
410409

411-
tstate->_status.holds_gil = 1;
410+
tstate->holds_gil = 1;
412411
_Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT);
413412
update_eval_breaker_for_thread(interp, tstate);
414413

@@ -460,7 +459,7 @@ PyEval_ThreadsInitialized(void)
460459
static inline int
461460
current_thread_holds_gil(struct _gil_runtime_state *gil, PyThreadState *tstate)
462461
{
463-
int holds_gil = tstate->_status.holds_gil;
462+
int holds_gil = tstate->holds_gil;
464463

465464
// holds_gil is the source of truth; check that last_holder and gil->locked
466465
// are consistent with it.

Python/pylifecycle.c

+12-7
Original file line numberDiff line numberDiff line change
@@ -2036,18 +2036,23 @@ _Py_Finalize(_PyRuntimeState *runtime)
20362036

20372037
// XXX Call something like _PyImport_Disable() here?
20382038

2039-
/* Destroy the state of all threads of the interpreter, except of the
2039+
/* Remove the state of all threads of the interpreter, except for the
20402040
current thread. In practice, only daemon threads should still be alive,
20412041
except if wait_for_thread_shutdown() has been cancelled by CTRL+C.
2042-
Clear frames of other threads to call objects destructors. Destructors
2043-
will be called in the current Python thread. Since
2044-
_PyRuntimeState_SetFinalizing() has been called, no other Python thread
2045-
can take the GIL at this point: if they try, they will exit
2046-
immediately. We start the world once we are the only thread state left,
2042+
We start the world once we are the only thread state left,
20472043
before we call destructors. */
20482044
PyThreadState *list = _PyThreadState_RemoveExcept(tstate);
2045+
for (PyThreadState *p = list; p != NULL; p = p->next) {
2046+
_PyThreadState_SetShuttingDown(p);
2047+
}
20492048
_PyEval_StartTheWorldAll(runtime);
2050-
_PyThreadState_DeleteList(list);
2049+
2050+
/* Clear frames of other threads to call objects destructors. Destructors
2051+
will be called in the current Python thread. Since
2052+
_PyRuntimeState_SetFinalizing() has been called, no other Python thread
2053+
can take the GIL at this point: if they try, they will hang in
2054+
_PyThreadState_HangThread. */
2055+
_PyThreadState_DeleteList(list, /*is_after_fork=*/0);
20512056

20522057
/* At this point no Python code should be running at all.
20532058
The only thread state left should be the main thread of the main

Python/pystate.c

+57-39
Original file line numberDiff line numberDiff line change
@@ -1474,6 +1474,15 @@ free_threadstate(_PyThreadStateImpl *tstate)
14741474
}
14751475
}
14761476

1477+
static void
1478+
decref_threadstate(_PyThreadStateImpl *tstate)
1479+
{
1480+
if (_Py_atomic_add_ssize(&tstate->refcount, -1) == 1) {
1481+
// The last reference to the thread state is gone.
1482+
free_threadstate(tstate);
1483+
}
1484+
}
1485+
14771486
/* Get the thread state to a minimal consistent state.
14781487
Further init happens in pylifecycle.c before it can be used.
14791488
All fields not initialized here are expected to be zeroed out,
@@ -1938,8 +1947,12 @@ _PyThreadState_RemoveExcept(PyThreadState *tstate)
19381947
// Deletes the thread states in the linked list `list`.
19391948
//
19401949
// This is intended to be used in conjunction with _PyThreadState_RemoveExcept.
1950+
//
1951+
// If `is_after_fork` is true, the thread states are immediately freed.
1952+
// Otherwise, they are decref'd because they may still be referenced by an
1953+
// OS thread.
19411954
void
1942-
_PyThreadState_DeleteList(PyThreadState *list)
1955+
_PyThreadState_DeleteList(PyThreadState *list, int is_after_fork)
19431956
{
19441957
// The world can't be stopped because we PyThreadState_Clear() can
19451958
// call destructors.
@@ -1949,7 +1962,12 @@ _PyThreadState_DeleteList(PyThreadState *list)
19491962
for (p = list; p; p = next) {
19501963
next = p->next;
19511964
PyThreadState_Clear(p);
1952-
free_threadstate((_PyThreadStateImpl *)p);
1965+
if (is_after_fork) {
1966+
free_threadstate((_PyThreadStateImpl *)p);
1967+
}
1968+
else {
1969+
decref_threadstate((_PyThreadStateImpl *)p);
1970+
}
19531971
}
19541972
}
19551973

@@ -2082,12 +2100,19 @@ static void
20822100
tstate_wait_attach(PyThreadState *tstate)
20832101
{
20842102
do {
2085-
int expected = _Py_THREAD_SUSPENDED;
2086-
2087-
// Wait until we're switched out of SUSPENDED to DETACHED.
2088-
_PyParkingLot_Park(&tstate->state, &expected, sizeof(tstate->state),
2089-
/*timeout=*/-1, NULL, /*detach=*/0);
2090-
2103+
int state = _Py_atomic_load_int_relaxed(&tstate->state);
2104+
if (state == _Py_THREAD_SUSPENDED) {
2105+
// Wait until we're switched out of SUSPENDED to DETACHED.
2106+
_PyParkingLot_Park(&tstate->state, &state, sizeof(tstate->state),
2107+
/*timeout=*/-1, NULL, /*detach=*/0);
2108+
}
2109+
else if (state == _Py_THREAD_SHUTTING_DOWN) {
2110+
// We're shutting down, so we can't attach.
2111+
_PyThreadState_HangThread(tstate);
2112+
}
2113+
else {
2114+
assert(state == _Py_THREAD_DETACHED);
2115+
}
20912116
// Once we're back in DETACHED we can re-attach
20922117
} while (!tstate_try_attach(tstate));
20932118
}
@@ -2118,7 +2143,7 @@ _PyThreadState_Attach(PyThreadState *tstate)
21182143
tstate_activate(tstate);
21192144

21202145
#ifdef Py_GIL_DISABLED
2121-
if (_PyEval_IsGILEnabled(tstate) && !tstate->_status.holds_gil) {
2146+
if (_PyEval_IsGILEnabled(tstate) && !tstate->holds_gil) {
21222147
// The GIL was enabled between our call to _PyEval_AcquireLock()
21232148
// and when we attached (the GIL can't go from enabled to disabled
21242149
// here because only a thread holding the GIL can disable
@@ -2201,6 +2226,15 @@ _PyThreadState_Suspend(PyThreadState *tstate)
22012226
HEAD_UNLOCK(runtime);
22022227
}
22032228

2229+
void
2230+
_PyThreadState_SetShuttingDown(PyThreadState *tstate)
2231+
{
2232+
_Py_atomic_store_int(&tstate->state, _Py_THREAD_SHUTTING_DOWN);
2233+
#ifdef Py_GIL_DISABLED
2234+
_PyParkingLot_UnparkAll(&tstate->state);
2235+
#endif
2236+
}
2237+
22042238
// Decrease stop-the-world counter of remaining number of threads that need to
22052239
// pause. If we are the final thread to pause, notify the requesting thread.
22062240
static void
@@ -3001,43 +3035,27 @@ _PyThreadState_CheckConsistency(PyThreadState *tstate)
30013035
#endif
30023036

30033037

3004-
// Check if a Python thread must exit immediately, rather than taking the GIL
3005-
// if Py_Finalize() has been called.
3038+
// Check if a Python thread must call _PyThreadState_HangThread(), rather than
3039+
// taking the GIL or attaching to the interpreter if Py_Finalize() has been
3040+
// called.
30063041
//
30073042
// When this function is called by a daemon thread after Py_Finalize() has been
3008-
// called, the GIL does no longer exist.
3009-
//
3010-
// tstate can be a dangling pointer (point to freed memory): only tstate value
3011-
// is used, the pointer is not deferenced.
3043+
// called, the GIL may no longer exist.
30123044
//
30133045
// tstate must be non-NULL.
30143046
int
30153047
_PyThreadState_MustExit(PyThreadState *tstate)
30163048
{
3017-
/* bpo-39877: Access _PyRuntime directly rather than using
3018-
tstate->interp->runtime to support calls from Python daemon threads.
3019-
After Py_Finalize() has been called, tstate can be a dangling pointer:
3020-
point to PyThreadState freed memory. */
3021-
unsigned long finalizing_id = _PyRuntimeState_GetFinalizingID(&_PyRuntime);
3022-
PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(&_PyRuntime);
3023-
if (finalizing == NULL) {
3024-
// XXX This isn't completely safe from daemon thraeds,
3025-
// since tstate might be a dangling pointer.
3026-
finalizing = _PyInterpreterState_GetFinalizing(tstate->interp);
3027-
finalizing_id = _PyInterpreterState_GetFinalizingID(tstate->interp);
3028-
}
3029-
// XXX else check &_PyRuntime._main_interpreter._initial_thread
3030-
if (finalizing == NULL) {
3031-
return 0;
3032-
}
3033-
else if (finalizing == tstate) {
3034-
return 0;
3035-
}
3036-
else if (finalizing_id == PyThread_get_thread_ident()) {
3037-
/* gh-109793: we must have switched interpreters. */
3038-
return 0;
3039-
}
3040-
return 1;
3049+
int state = _Py_atomic_load_int_relaxed(&tstate->state);
3050+
return state == _Py_THREAD_SHUTTING_DOWN;
3051+
}
3052+
3053+
void
3054+
_PyThreadState_HangThread(PyThreadState *tstate)
3055+
{
3056+
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
3057+
decref_threadstate(tstate_impl);
3058+
PyThread_hang_thread();
30413059
}
30423060

30433061
/********************/

Python/qsbr.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ _Py_qsbr_unregister(PyThreadState *tstate)
241241
// gh-119369: GIL must be released (if held) to prevent deadlocks, because
242242
// we might not have an active tstate, which means that blocking on PyMutex
243243
// locks will not implicitly release the GIL.
244-
assert(!tstate->_status.holds_gil);
244+
assert(!tstate->holds_gil);
245245

246246
PyMutex_Lock(&shared->mutex);
247247
// NOTE: we must load (or reload) the thread state's qbsr inside the mutex

0 commit comments

Comments
 (0)