Skip to content

Commit 8d04512

Browse files
wip
1 parent 0f0c305 commit 8d04512

File tree

1 file changed

+59
-53
lines changed

1 file changed

+59
-53
lines changed

Python/import.c

+59-53
Original file line numberDiff line numberDiff line change
@@ -1156,48 +1156,22 @@ _PyImport_CheckSubinterpIncompatibleExtensionAllowed(const char *name)
11561156
}
11571157

11581158
static PyThreadState *
1159-
maybe_switch_to_main_interpreter(PyThreadState *tstate)
1159+
switch_to_main_interpreter(PyThreadState *tstate)
11601160
{
1161-
PyThreadState *main_tstate = tstate;
11621161
if (_Py_IsMainInterpreter(tstate->interp)) {
1163-
/* There's no need to switch. */
1164-
}
1165-
else if (check_multi_interp_extensions(tstate->interp)) {
1166-
/*
1167-
If the module is single-phase init then the import will fail.
1168-
However, the module's init function will still get run.
1169-
That means it may still store state in the shared-object/DLL
1170-
address space (which never gets closed/cleared), including
1171-
objects (e.g. static types).
1172-
1173-
This is a problem for isolated subinterpreters since each
1174-
has its own object allocator. If the loaded shared-object
1175-
still holds a reference to an object after the corresponding
1176-
interpreter has finalized then either we must let it leak
1177-
or else any later use of that object by another interpreter
1178-
(or across multiple init-fini cycles) will crash the process.
1179-
1180-
We avoid the problem by first loading the module
1181-
in the main interpreter.
1182-
1183-
Here's another complication we avoid: the module's init
1184-
function might register callbacks, whether in Python
1185-
(e.g. sys.stdin, atexit) or in linked libraries.
1186-
Thus we cannot just dlclose() the module
1187-
in this error case.
1188-
*/
1189-
main_tstate = PyThreadState_New(_PyInterpreterState_Main());
1190-
if (main_tstate == NULL) {
1191-
return NULL;
1192-
}
1193-
main_tstate->_whence = _PyThreadState_WHENCE_EXEC;
1162+
return tstate;
1163+
}
1164+
PyThreadState *main_tstate = PyThreadState_New(_PyInterpreterState_Main());
1165+
if (main_tstate == NULL) {
1166+
return NULL;
1167+
}
1168+
main_tstate->_whence = _PyThreadState_WHENCE_EXEC;
11941169
#ifndef NDEBUG
1195-
PyThreadState *old_tstate = PyThreadState_Swap(main_tstate);
1196-
assert(old_tstate == tstate);
1170+
PyThreadState *old_tstate = PyThreadState_Swap(main_tstate);
1171+
assert(old_tstate == tstate);
11971172
#else
1198-
(void)PyThreadState_Swap(main_tstate);
1173+
(void)PyThreadState_Swap(main_tstate);
11991174
#endif
1200-
}
12011175
return main_tstate;
12021176
}
12031177

@@ -1670,29 +1644,59 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
16701644
* multi-phase init until after we call its init function. Even
16711645
* in isolated interpreters (that do not support single-phase init),
16721646
* the init function will run without restriction. For multi-phase
1673-
* init modules that isn't a problem because the init function runs
1674-
* PyModuleDef_Init() on the module's def and then returns it.
1647+
* init modules that isn't a problem because the init function only
1648+
* runs PyModuleDef_Init() on the module's def and then returns it.
16751649
*
16761650
* However, for single-phase init the module's init function will
16771651
* create the module, create other objects (and allocate other
16781652
* memory), populate it and its module state, and initialze static
16791653
* types. Some modules store other objects and data in global C
16801654
* variables and register callbacks with the runtime/stdlib or
1681-
* event external libraries. That's a problem for isolated
1682-
* interpreters since all of that happens and only then will
1683-
* the import fail. Memory will leak, callbacks will still
1684-
* get used, and sometimes there will be memory access
1685-
* violations and use-after-free crashes.
1655+
* even external libraries (which is part of why we can't just
1656+
* dlclose() the module in the error case). That's a problem
1657+
* for isolated interpreters since all of the above happens
1658+
* and only then * will the import fail. Memory will leak,
1659+
* callbacks will still get used, and sometimes there
1660+
* will be crashes (memory access violations
1661+
* and use-after-free).
16861662
*
1687-
* To avoid that, we make sure the module's init function is always
1688-
* run first with the main interpreter active. If it was already
1689-
* the main interpreter then we can continue loading the module
1690-
* like normal. Otherwise, right after the init function, we switch
1691-
* back to the subinterpreter, check for single-phase init, and
1692-
* then continue loading like normal. */
1693-
PyThreadState *main_tstate = maybe_switch_to_main_interpreter(tstate);
1694-
1695-
struct _Py_ext_module_loader_result res;
1663+
* To put it another way, if the module is single-phase init
1664+
* then the import will probably break interpreter isolation
1665+
* and should fail ASAP. However, the module's init function
1666+
* will still get run. That means it may still store state
1667+
* in the shared-object/DLL address space (which never gets
1668+
* closed/cleared), including objects (e.g. static types).
1669+
* This is a problem for isolated subinterpreters since each
1670+
* has its own object allocator. If the loaded shared-object
1671+
* still holds a reference to an object after the corresponding
1672+
* interpreter has finalized then either we must let it leak
1673+
* or else any later use of that object by another interpreter
1674+
* (or across multiple init-fini cycles) will crash the process.
1675+
*
1676+
* To avoid all of that, we make sure the module's init function
1677+
* is always run first with the main interpreter active. If it was
1678+
* already the main interpreter then we can continue loading the
1679+
* module like normal. Otherwise, right after the init function,
1680+
* we take care of some import state bookkeeping, switch back
1681+
* to the subinterpreter, check for single-phase init,
1682+
* and then continue loading like normal. */
1683+
1684+
PyThreadState *main_tstate = NULL;
1685+
if (!_Py_IsMainInterpreter(tstate->interp)) {
1686+
/* We *could* leave in place a legacy interpreter here
1687+
* (one that shares obmalloc/GIL with main interp),
1688+
* but there isn't a big advantage, we anticipate
1689+
* such interpreters will be increasingly uncommon,
1690+
* and the code is a bit simpler if we always switch
1691+
* to the main interpreter. */
1692+
main_tstate = switch_to_main_interpreter(tstate);
1693+
if (main_tstate == NULL) {
1694+
goto finally;
1695+
}
1696+
assert(main_tstate != tstate);
1697+
// XXX Get import lock.
1698+
}
1699+
16961700
int rc = _PyImport_RunModInitFunc(p0, info, &res);
16971701
if (rc < 0) {
16981702
/* We discard res.def. */
@@ -1790,6 +1794,8 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
17901794
/* Now we finish up with kind-specific operations. */
17911795
if (res.kind == _Py_ext_module_loader_result_MULTIPHASE) {
17921796
assert(mod == NULL);
1797+
/* Note that we cheat a little by not repeating the calls
1798+
* to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */
17931799
mod = PyModule_FromDefAndSpec(def, spec);
17941800
if (mod == NULL) {
17951801
goto finally;

0 commit comments

Comments
 (0)