Skip to content

gh-118332: Fix deadlock involving stop the world #118412

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Include/internal/pycore_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,10 @@ PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);

// Wait for the event to be set, or until the timeout expires. If the event is
// already set, then this returns immediately. Returns 1 if the event was set,
// and 0 if the timeout expired or thread was interrupted.
PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns);
// and 0 if the timeout expired or thread was interrupted. If `detach` is
// true, then the thread will detach/release the GIL while waiting.
PyAPI_FUNC(int)
PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach);

// _PyRawMutex implements a word-sized mutex that that does not depend on the
// parking lot API, and therefore can be used in the parking lot
Expand Down
85 changes: 85 additions & 0 deletions Modules/_testinternalcapi/test_critical_sections.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,90 @@ test_critical_sections_threads(PyObject *self, PyObject *Py_UNUSED(args))
Py_DECREF(test_data.obj1);
Py_RETURN_NONE;
}

static void
pysleep(int ms)
{
#ifdef MS_WINDOWS
Sleep(ms);
#else
usleep(ms * 1000);
#endif
}

struct test_data_gc {
PyObject *obj;
Py_ssize_t num_threads;
Py_ssize_t id;
Py_ssize_t countdown;
PyEvent done_event;
PyEvent ready;
};

static void
thread_gc(void *arg)
{
struct test_data_gc *test_data = arg;
PyGILState_STATE gil = PyGILState_Ensure();

Py_ssize_t id = _Py_atomic_add_ssize(&test_data->id, 1);
if (id == test_data->num_threads - 1) {
_PyEvent_Notify(&test_data->ready);
}
else {
// wait for all test threads to more reliably reproduce the issue.
PyEvent_Wait(&test_data->ready);
}

if (id == 0) {
Py_BEGIN_CRITICAL_SECTION(test_data->obj);
// pause long enough that the lock would be handed off directly to
// a waiting thread.
pysleep(5);
PyGC_Collect();
Py_END_CRITICAL_SECTION();
}
else if (id == 1) {
pysleep(1);
Py_BEGIN_CRITICAL_SECTION(test_data->obj);
pysleep(1);
Py_END_CRITICAL_SECTION();
}
else if (id == 2) {
// sleep long enough so that thread 0 is waiting to stop the world
pysleep(6);
Py_BEGIN_CRITICAL_SECTION(test_data->obj);
pysleep(1);
Py_END_CRITICAL_SECTION();
}

PyGILState_Release(gil);
if (_Py_atomic_add_ssize(&test_data->countdown, -1) == 1) {
// last thread to finish sets done_event
_PyEvent_Notify(&test_data->done_event);
}
}

static PyObject *
test_critical_sections_gc(PyObject *self, PyObject *Py_UNUSED(args))
{
// gh-118332: Contended critical sections should not deadlock with GC
const Py_ssize_t NUM_THREADS = 3;
struct test_data_gc test_data = {
.obj = PyDict_New(),
.countdown = NUM_THREADS,
.num_threads = NUM_THREADS,
};
assert(test_data.obj != NULL);

for (int i = 0; i < NUM_THREADS; i++) {
PyThread_start_new_thread(&thread_gc, &test_data);
}
PyEvent_Wait(&test_data.done_event);
Py_DECREF(test_data.obj);
Py_RETURN_NONE;
}

#endif

static PyMethodDef test_methods[] = {
Expand All @@ -212,6 +296,7 @@ static PyMethodDef test_methods[] = {
{"test_critical_sections_suspend", test_critical_sections_suspend, METH_NOARGS},
#ifdef Py_CAN_START_THREADS
{"test_critical_sections_threads", test_critical_sections_threads, METH_NOARGS},
{"test_critical_sections_gc", test_critical_sections_gc, METH_NOARGS},
#endif
{NULL, NULL} /* sentinel */
};
Expand Down
3 changes: 2 additions & 1 deletion Modules/_threadmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,8 @@ ThreadHandle_join(ThreadHandle *self, PyTime_t timeout_ns)

// Wait until the deadline for the thread to exit.
PyTime_t deadline = timeout_ns != -1 ? _PyDeadline_Init(timeout_ns) : 0;
while (!PyEvent_WaitTimed(&self->thread_is_exiting, timeout_ns)) {
int detach = 1;
while (!PyEvent_WaitTimed(&self->thread_is_exiting, timeout_ns, detach)) {
if (deadline) {
// _PyDeadline_Get will return a negative value if the deadline has
// been exceeded.
Expand Down
6 changes: 3 additions & 3 deletions Python/lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,12 @@ _PyEvent_Notify(PyEvent *evt)
void
PyEvent_Wait(PyEvent *evt)
{
while (!PyEvent_WaitTimed(evt, -1))
while (!PyEvent_WaitTimed(evt, -1, /*detach=*/1))
;
}

int
PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns)
PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach)
{
for (;;) {
uint8_t v = _Py_atomic_load_uint8(&evt->v);
Expand All @@ -298,7 +298,7 @@ PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns)

uint8_t expected = _Py_HAS_PARKED;
(void) _PyParkingLot_Park(&evt->v, &expected, sizeof(evt->v),
timeout_ns, NULL, 1);
timeout_ns, NULL, detach);

return _Py_atomic_load_uint8(&evt->v) == _Py_LOCKED;
}
Expand Down
3 changes: 2 additions & 1 deletion Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2238,7 +2238,8 @@ stop_the_world(struct _stoptheworld_state *stw)
}

PyTime_t wait_ns = 1000*1000; // 1ms (arbitrary, may need tuning)
if (PyEvent_WaitTimed(&stw->stop_event, wait_ns)) {
int detach = 0;
if (PyEvent_WaitTimed(&stw->stop_event, wait_ns, detach)) {
assert(stw->thread_countdown == 0);
break;
}
Expand Down
Loading