Skip to content

Commit a5b6574

Browse files
ericsnowcurrentlymiss-islington
authored andcommitted
pythongh-116510: Fix a Crash Due to Shared Immortal Interned Strings (pythongh-124865)
Fix a crash caused by immortal interned strings being shared between sub-interpreters that use basic single-phase init. In that case, the string can be used by an interpreter that outlives the interpreter that created and interned it. For interpreters that share obmalloc state, also share the interned dict with the main interpreter. This is an un-revert of pythongh-124646 that then addresses the Py_TRACE_REFS failures identified by pythongh-124785. (cherry picked from commit f2cb399) Co-authored-by: Eric Snow <[email protected]>
1 parent 47fa3f4 commit a5b6574

File tree

3 files changed

+91
-6
lines changed

3 files changed

+91
-6
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a crash caused by immortal interned strings being shared between
2+
sub-interpreters that use basic single-phase init. In that case, the string
3+
can be used by an interpreter that outlives the interpreter that created and
4+
interned it. For interpreters that share obmalloc state, also share the
5+
interned dict with the main interpreter.

Objects/object.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2481,6 +2481,42 @@ _Py_ResurrectReference(PyObject *op)
24812481

24822482

24832483
#ifdef Py_TRACE_REFS
2484+
/* Make sure the ref is associated with the right interpreter.
2485+
* This only needs special attention for heap-allocated objects
2486+
* that have been immortalized, and only when the object might
2487+
* outlive the interpreter where it was created. That means the
2488+
* object was necessarily created using a global allocator
2489+
* (i.e. from the main interpreter). Thus in that specific case
2490+
* we move the object over to the main interpreter's refchain.
2491+
*
2492+
* This was added for the sake of the immortal interned strings,
2493+
* where legacy subinterpreters share the main interpreter's
2494+
* interned dict (and allocator), and therefore the strings can
2495+
* outlive the subinterpreter.
2496+
*
2497+
* It may make sense to fold this into _Py_SetImmortalUntracked(),
2498+
* but that requires further investigation. In the meantime, it is
2499+
* up to the caller to know if this is needed. There should be
2500+
* very few cases.
2501+
*/
2502+
void
2503+
_Py_NormalizeImmortalReference(PyObject *op)
2504+
{
2505+
assert(_Py_IsImmortal(op));
2506+
PyInterpreterState *interp = _PyInterpreterState_GET();
2507+
if (!_PyRefchain_IsTraced(interp, op)) {
2508+
return;
2509+
}
2510+
PyInterpreterState *main_interp = _PyInterpreterState_Main();
2511+
if (interp != main_interp
2512+
&& interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC)
2513+
{
2514+
assert(!_PyRefchain_IsTraced(main_interp, op));
2515+
_PyRefchain_Remove(interp, op);
2516+
_PyRefchain_Trace(main_interp, op);
2517+
}
2518+
}
2519+
24842520
void
24852521
_Py_ForgetReference(PyObject *op)
24862522
{

Objects/unicodeobject.c

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -277,13 +277,37 @@ hashtable_unicode_compare(const void *key1, const void *key2)
277277
}
278278
}
279279

280+
/* Return true if this interpreter should share the main interpreter's
281+
intern_dict. That's important for interpreters which load basic
282+
single-phase init extension modules (m_size == -1). There could be interned
283+
immortal strings that are shared between interpreters, due to the
284+
PyDict_Update(mdict, m_copy) call in import_find_extension().
285+
286+
It's not safe to deallocate those strings until all interpreters that
287+
potentially use them are freed. By storing them in the main interpreter, we
288+
ensure they get freed after all other interpreters are freed.
289+
*/
290+
static bool
291+
has_shared_intern_dict(PyInterpreterState *interp)
292+
{
293+
PyInterpreterState *main_interp = _PyInterpreterState_Main();
294+
return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC;
295+
}
296+
280297
static int
281298
init_interned_dict(PyInterpreterState *interp)
282299
{
283300
assert(get_interned_dict(interp) == NULL);
284-
PyObject *interned = interned = PyDict_New();
285-
if (interned == NULL) {
286-
return -1;
301+
PyObject *interned;
302+
if (has_shared_intern_dict(interp)) {
303+
interned = get_interned_dict(_PyInterpreterState_Main());
304+
Py_INCREF(interned);
305+
}
306+
else {
307+
interned = PyDict_New();
308+
if (interned == NULL) {
309+
return -1;
310+
}
287311
}
288312
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
289313
return 0;
@@ -294,7 +318,10 @@ clear_interned_dict(PyInterpreterState *interp)
294318
{
295319
PyObject *interned = get_interned_dict(interp);
296320
if (interned != NULL) {
297-
PyDict_Clear(interned);
321+
if (!has_shared_intern_dict(interp)) {
322+
// only clear if the dict belongs to this interpreter
323+
PyDict_Clear(interned);
324+
}
298325
Py_DECREF(interned);
299326
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
300327
}
@@ -15098,6 +15125,10 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p)
1509815125
assert(*p);
1509915126
}
1510015127

15128+
#ifdef Py_TRACE_REFS
15129+
extern void _Py_NormalizeImmortalReference(PyObject *);
15130+
#endif
15131+
1510115132
static void
1510215133
immortalize_interned(PyObject *s)
1510315134
{
@@ -15113,6 +15144,10 @@ immortalize_interned(PyObject *s)
1511315144
#endif
1511415145
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL;
1511515146
_Py_SetImmortal(s);
15147+
#ifdef Py_TRACE_REFS
15148+
/* Make sure the ref is associated with the right interpreter. */
15149+
_Py_NormalizeImmortalReference(s);
15150+
#endif
1511615151
}
1511715152

1511815153
static /* non-null */ PyObject*
@@ -15306,6 +15341,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1530615341
}
1530715342
assert(PyDict_CheckExact(interned));
1530815343

15344+
if (has_shared_intern_dict(interp)) {
15345+
// the dict doesn't belong to this interpreter, skip the debug
15346+
// checks on it and just clear the pointer to it
15347+
clear_interned_dict(interp);
15348+
return;
15349+
}
15350+
1530915351
#ifdef INTERNED_STATS
1531015352
fprintf(stderr, "releasing %zd interned strings\n",
1531115353
PyDict_GET_SIZE(interned));
@@ -15827,8 +15869,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1582715869
{
1582815870
struct _Py_unicode_state *state = &interp->unicode;
1582915871

15830-
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
15831-
assert(get_interned_dict(interp) == NULL);
15872+
if (!has_shared_intern_dict(interp)) {
15873+
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
15874+
assert(get_interned_dict(interp) == NULL);
15875+
}
1583215876

1583315877
_PyUnicode_FiniEncodings(&state->fs_codec);
1583415878

0 commit comments

Comments
 (0)