Skip to content

Commit 1c0a104

Browse files
gh-126914: Store the Preallocated Thread State's Pointer in a PyInterpreterState Field (gh-126989)
This approach eliminates the originally reported race. It also gets rid of the deadlock reported in gh-96071, so we can remove the workaround added then.
1 parent 824afbf commit 1c0a104

File tree

4 files changed

+82
-48
lines changed

4 files changed

+82
-48
lines changed

Diff for: Include/internal/pycore_interp.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ struct _is {
130130
uint64_t next_unique_id;
131131
/* The linked list of threads, newest first. */
132132
PyThreadState *head;
133+
_PyThreadStateImpl *preallocated;
133134
/* The thread currently executing in the __main__ module, if any. */
134135
PyThreadState *main;
135136
/* Used in Modules/_threadmodule.c. */
@@ -278,9 +279,10 @@ struct _is {
278279
struct _Py_interp_cached_objects cached_objects;
279280
struct _Py_interp_static_objects static_objects;
280281

282+
Py_ssize_t _interactive_src_count;
283+
281284
/* the initial PyInterpreterState.threads.head */
282285
_PyThreadStateImpl _initial_thread;
283-
Py_ssize_t _interactive_src_count;
284286
};
285287

286288

Diff for: Include/internal/pycore_runtime_init.h

+3
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ extern PyTypeObject _PyExc_MemoryError;
118118
{ \
119119
.id_refcount = -1, \
120120
._whence = _PyInterpreterState_WHENCE_NOTSET, \
121+
.threads = { \
122+
.preallocated = &(INTERP)._initial_thread, \
123+
}, \
121124
.imports = IMPORTS_INIT, \
122125
.ceval = { \
123126
.recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \

Diff for: Lib/test/test_interpreters/test_stress.py

+30
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def test_create_many_sequential(self):
2323
alive.append(interp)
2424

2525
@support.requires_resource('cpu')
26+
@threading_helper.requires_working_threading()
2627
def test_create_many_threaded(self):
2728
alive = []
2829
def task():
@@ -32,6 +33,35 @@ def task():
3233
with threading_helper.start_threads(threads):
3334
pass
3435

36+
@support.requires_resource('cpu')
37+
@threading_helper.requires_working_threading()
38+
def test_many_threads_running_interp_in_other_interp(self):
39+
interp = interpreters.create()
40+
41+
script = f"""if True:
42+
import _interpreters
43+
_interpreters.run_string({interp.id}, '1')
44+
"""
45+
46+
def run():
47+
interp = interpreters.create()
48+
alreadyrunning = (f'{interpreters.InterpreterError}: '
49+
'interpreter already running')
50+
success = False
51+
while not success:
52+
try:
53+
interp.exec(script)
54+
except interpreters.ExecutionFailed as exc:
55+
if exc.excinfo.msg != 'interpreter already running':
56+
raise # re-raise
57+
assert exc.excinfo.type.__name__ == 'InterpreterError'
58+
else:
59+
success = True
60+
61+
threads = (threading.Thread(target=run) for _ in range(200))
62+
with threading_helper.start_threads(threads):
63+
pass
64+
3565

3666
if __name__ == '__main__':
3767
# Test needs to be a package, so we can do relative imports.

Diff for: Python/pystate.c

+46-47
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,8 @@ init_interpreter(PyInterpreterState *interp,
629629
assert(next != NULL || (interp == runtime->interpreters.main));
630630
interp->next = next;
631631

632+
interp->threads.preallocated = &interp->_initial_thread;
633+
632634
// We would call _PyObject_InitState() at this point
633635
// if interp->feature_flags were alredy set.
634636

@@ -766,7 +768,6 @@ PyInterpreterState_New(void)
766768
return interp;
767769
}
768770

769-
770771
static void
771772
interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
772773
{
@@ -910,6 +911,9 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
910911
// XXX Once we have one allocator per interpreter (i.e.
911912
// per-interpreter GC) we must ensure that all of the interpreter's
912913
// objects have been cleaned up at the point.
914+
915+
// We could clear interp->threads.freelist here
916+
// if it held more than just the initial thread state.
913917
}
914918

915919

@@ -1386,22 +1390,45 @@ allocate_chunk(int size_in_bytes, _PyStackChunk* previous)
13861390
return res;
13871391
}
13881392

1393+
static void
1394+
reset_threadstate(_PyThreadStateImpl *tstate)
1395+
{
1396+
// Set to _PyThreadState_INIT directly?
1397+
memcpy(tstate,
1398+
&initial._main_interpreter._initial_thread,
1399+
sizeof(*tstate));
1400+
}
1401+
13891402
static _PyThreadStateImpl *
1390-
alloc_threadstate(void)
1403+
alloc_threadstate(PyInterpreterState *interp)
13911404
{
1392-
return PyMem_RawCalloc(1, sizeof(_PyThreadStateImpl));
1405+
_PyThreadStateImpl *tstate;
1406+
1407+
// Try the preallocated tstate first.
1408+
tstate = _Py_atomic_exchange_ptr(&interp->threads.preallocated, NULL);
1409+
1410+
// Fall back to the allocator.
1411+
if (tstate == NULL) {
1412+
tstate = PyMem_RawCalloc(1, sizeof(_PyThreadStateImpl));
1413+
if (tstate == NULL) {
1414+
return NULL;
1415+
}
1416+
reset_threadstate(tstate);
1417+
}
1418+
return tstate;
13931419
}
13941420

13951421
static void
13961422
free_threadstate(_PyThreadStateImpl *tstate)
13971423
{
1424+
PyInterpreterState *interp = tstate->base.interp;
13981425
// The initial thread state of the interpreter is allocated
13991426
// as part of the interpreter state so should not be freed.
1400-
if (tstate == &tstate->base.interp->_initial_thread) {
1401-
// Restore to _PyThreadState_INIT.
1402-
memcpy(tstate,
1403-
&initial._main_interpreter._initial_thread,
1404-
sizeof(*tstate));
1427+
if (tstate == &interp->_initial_thread) {
1428+
// Make it available again.
1429+
reset_threadstate(tstate);
1430+
assert(interp->threads.preallocated == NULL);
1431+
_Py_atomic_store_ptr(&interp->threads.preallocated, tstate);
14051432
}
14061433
else {
14071434
PyMem_RawFree(tstate);
@@ -1492,66 +1519,38 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
14921519
static PyThreadState *
14931520
new_threadstate(PyInterpreterState *interp, int whence)
14941521
{
1495-
_PyThreadStateImpl *tstate;
1496-
_PyRuntimeState *runtime = interp->runtime;
1497-
// We don't need to allocate a thread state for the main interpreter
1498-
// (the common case), but doing it later for the other case revealed a
1499-
// reentrancy problem (deadlock). So for now we always allocate before
1500-
// taking the interpreters lock. See GH-96071.
1501-
_PyThreadStateImpl *new_tstate = alloc_threadstate();
1502-
int used_newtstate;
1503-
if (new_tstate == NULL) {
1522+
// Allocate the thread state.
1523+
_PyThreadStateImpl *tstate = alloc_threadstate(interp);
1524+
if (tstate == NULL) {
15041525
return NULL;
15051526
}
1527+
15061528
#ifdef Py_GIL_DISABLED
15071529
Py_ssize_t qsbr_idx = _Py_qsbr_reserve(interp);
15081530
if (qsbr_idx < 0) {
1509-
PyMem_RawFree(new_tstate);
1531+
free_threadstate(tstate);
15101532
return NULL;
15111533
}
15121534
int32_t tlbc_idx = _Py_ReserveTLBCIndex(interp);
15131535
if (tlbc_idx < 0) {
1514-
PyMem_RawFree(new_tstate);
1536+
free_threadstate(tstate);
15151537
return NULL;
15161538
}
15171539
#endif
15181540

15191541
/* We serialize concurrent creation to protect global state. */
1520-
HEAD_LOCK(runtime);
1542+
HEAD_LOCK(interp->runtime);
15211543

1544+
// Initialize the new thread state.
15221545
interp->threads.next_unique_id += 1;
15231546
uint64_t id = interp->threads.next_unique_id;
1547+
init_threadstate(tstate, interp, id, whence);
15241548

1525-
// Allocate the thread state and add it to the interpreter.
1549+
// Add the new thread state to the interpreter.
15261550
PyThreadState *old_head = interp->threads.head;
1527-
if (old_head == NULL) {
1528-
// It's the interpreter's initial thread state.
1529-
used_newtstate = 0;
1530-
tstate = &interp->_initial_thread;
1531-
}
1532-
// XXX Re-use interp->_initial_thread if not in use?
1533-
else {
1534-
// Every valid interpreter must have at least one thread.
1535-
assert(id > 1);
1536-
assert(old_head->prev == NULL);
1537-
used_newtstate = 1;
1538-
tstate = new_tstate;
1539-
// Set to _PyThreadState_INIT.
1540-
memcpy(tstate,
1541-
&initial._main_interpreter._initial_thread,
1542-
sizeof(*tstate));
1543-
}
1544-
1545-
init_threadstate(tstate, interp, id, whence);
15461551
add_threadstate(interp, (PyThreadState *)tstate, old_head);
15471552

1548-
HEAD_UNLOCK(runtime);
1549-
if (!used_newtstate) {
1550-
// Must be called with lock unlocked to avoid re-entrancy deadlock.
1551-
PyMem_RawFree(new_tstate);
1552-
}
1553-
else {
1554-
}
1553+
HEAD_UNLOCK(interp->runtime);
15551554

15561555
#ifdef Py_GIL_DISABLED
15571556
// Must be called with lock unlocked to avoid lock ordering deadlocks.

0 commit comments

Comments
 (0)