Skip to content

Commit c1d5643

Browse files
ericsnowcurrentlySonicField
authored andcommitted
pythongh-117953: Always Run Extension Init Func in Main Interpreter First (pythongh-118157)
This change makes sure all extension/builtin modules have their init function run first by the main interpreter before proceeding with import in the original interpreter (main or otherwise). This means when the import of a single-phase init module fails in an isolated subinterpreter, it won't tie any global state/callbacks to the subinterpreter.
1 parent 69baf02 commit c1d5643

File tree

4 files changed

+216
-69
lines changed

4 files changed

+216
-69
lines changed

Lib/test/test_import/__init__.py

+7-11
Original file line numberDiff line numberDiff line change
@@ -2454,10 +2454,6 @@ def setUpClass(cls):
24542454
# Start fresh.
24552455
cls.clean_up()
24562456

2457-
@classmethod
2458-
def tearDownClass(cls):
2459-
restore__testsinglephase()
2460-
24612457
def tearDown(self):
24622458
# Clean up the module.
24632459
self.clean_up()
@@ -3014,20 +3010,20 @@ def test_basic_multiple_interpreters_deleted_no_reset(self):
30143010
# * alive in 0 interpreters
30153011
# * module def in _PyRuntime.imports.extensions
30163012
# * mod init func ran for the first time (since reset)
3017-
# * m_copy is NULL (claered when the interpreter was destroyed)
3013+
# * m_copy is still set (owned by main interpreter)
30183014
# * module's global state was initialized, not reset
30193015

30203016
# Use a subinterpreter that sticks around.
30213017
loaded_interp1 = self.import_in_subinterp(interpid1)
30223018
self.check_common(loaded_interp1)
3023-
self.check_semi_fresh(loaded_interp1, loaded_main, base)
3019+
self.check_copied(loaded_interp1, base)
30243020

30253021
# At this point:
30263022
# * alive in 1 interpreter (interp1)
30273023
# * module def still in _PyRuntime.imports.extensions
3028-
# * mod init func ran for the second time (since reset)
3029-
# * m_copy was copied from interp1 (was NULL)
3030-
# * module's global state was updated, not reset
3024+
# * mod init func did not run again
3025+
# * m_copy was not changed
3026+
# * module's global state was not touched
30313027

30323028
# Use a subinterpreter while the previous one is still alive.
30333029
loaded_interp2 = self.import_in_subinterp(interpid2)
@@ -3038,8 +3034,8 @@ def test_basic_multiple_interpreters_deleted_no_reset(self):
30383034
# * alive in 2 interpreters (interp1, interp2)
30393035
# * module def still in _PyRuntime.imports.extensions
30403036
# * mod init func did not run again
3041-
# * m_copy was copied from interp2 (was from interp1)
3042-
# * module's global state was updated, not reset
3037+
# * m_copy was not changed
3038+
# * module's global state was not touched
30433039

30443040
@requires_subinterpreters
30453041
def test_basic_multiple_interpreters_reset_each(self):
+4-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
2-
from test.support import load_package_tests
2+
from test.support import load_package_tests, Py_GIL_DISABLED
33

4-
def load_tests(*args):
5-
return load_package_tests(os.path.dirname(__file__), *args)
4+
if not Py_GIL_DISABLED:
5+
def load_tests(*args):
6+
return load_package_tests(os.path.dirname(__file__), *args)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
When a builtin or extension module is imported for the first time, while a
2+
subinterpreter is active, the module's init function is now run by the main
3+
interpreter first before import continues in the subinterpreter.
4+
Consequently, single-phase init modules now fail in an isolated
5+
subinterpreter without the init function running under that interpreter,
6+
whereas before it would run under the subinterpreter *before* failing,
7+
potentially leaving behind global state and callbacks and otherwise leaving
8+
the module in an inconsistent state.

Python/import.c

+197-55
Original file line numberDiff line numberDiff line change
@@ -1154,9 +1154,6 @@ init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict)
11541154
return -1;
11551155
}
11561156
// XXX We may want to make the copy immortal.
1157-
// XXX This incref shouldn't be necessary. We are decref'ing
1158-
// one to many times somewhere.
1159-
Py_INCREF(copied);
11601157

11611158
value->_m_dict = (struct cached_m_dict){
11621159
.copied=copied,
@@ -1580,6 +1577,26 @@ _PyImport_CheckGILForModule(PyObject* module, PyObject *module_name)
15801577
}
15811578
#endif
15821579

1580+
static PyThreadState *
1581+
switch_to_main_interpreter(PyThreadState *tstate)
1582+
{
1583+
if (_Py_IsMainInterpreter(tstate->interp)) {
1584+
return tstate;
1585+
}
1586+
PyThreadState *main_tstate = PyThreadState_New(_PyInterpreterState_Main());
1587+
if (main_tstate == NULL) {
1588+
return NULL;
1589+
}
1590+
main_tstate->_whence = _PyThreadState_WHENCE_EXEC;
1591+
#ifndef NDEBUG
1592+
PyThreadState *old_tstate = PyThreadState_Swap(main_tstate);
1593+
assert(old_tstate == tstate);
1594+
#else
1595+
(void)PyThreadState_Swap(main_tstate);
1596+
#endif
1597+
return main_tstate;
1598+
}
1599+
15831600
static PyObject *
15841601
get_core_module_dict(PyInterpreterState *interp,
15851602
PyObject *name, PyObject *path)
@@ -1936,24 +1953,171 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
19361953
struct extensions_cache_value *cached = NULL;
19371954
const char *name_buf = PyBytes_AS_STRING(info->name_encoded);
19381955

1956+
/* We cannot know if the module is single-phase init or
1957+
* multi-phase init until after we call its init function. Even
1958+
* in isolated interpreters (that do not support single-phase init),
1959+
* the init function will run without restriction. For multi-phase
1960+
* init modules that isn't a problem because the init function only
1961+
* runs PyModuleDef_Init() on the module's def and then returns it.
1962+
*
1963+
* However, for single-phase init the module's init function will
1964+
* create the module, create other objects (and allocate other
1965+
* memory), populate it and its module state, and initialze static
1966+
* types. Some modules store other objects and data in global C
1967+
* variables and register callbacks with the runtime/stdlib or
1968+
* even external libraries (which is part of why we can't just
1969+
* dlclose() the module in the error case). That's a problem
1970+
* for isolated interpreters since all of the above happens
1971+
* and only then * will the import fail. Memory will leak,
1972+
* callbacks will still get used, and sometimes there
1973+
* will be crashes (memory access violations
1974+
* and use-after-free).
1975+
*
1976+
* To put it another way, if the module is single-phase init
1977+
* then the import will probably break interpreter isolation
1978+
* and should fail ASAP. However, the module's init function
1979+
* will still get run. That means it may still store state
1980+
* in the shared-object/DLL address space (which never gets
1981+
* closed/cleared), including objects (e.g. static types).
1982+
* This is a problem for isolated subinterpreters since each
1983+
* has its own object allocator. If the loaded shared-object
1984+
* still holds a reference to an object after the corresponding
1985+
* interpreter has finalized then either we must let it leak
1986+
* or else any later use of that object by another interpreter
1987+
* (or across multiple init-fini cycles) will crash the process.
1988+
*
1989+
* To avoid all of that, we make sure the module's init function
1990+
* is always run first with the main interpreter active. If it was
1991+
* already the main interpreter then we can continue loading the
1992+
* module like normal. Otherwise, right after the init function,
1993+
* we take care of some import state bookkeeping, switch back
1994+
* to the subinterpreter, check for single-phase init,
1995+
* and then continue loading like normal. */
1996+
1997+
bool switched = false;
1998+
/* We *could* leave in place a legacy interpreter here
1999+
* (one that shares obmalloc/GIL with main interp),
2000+
* but there isn't a big advantage, we anticipate
2001+
* such interpreters will be increasingly uncommon,
2002+
* and the code is a bit simpler if we always switch
2003+
* to the main interpreter. */
2004+
PyThreadState *main_tstate = switch_to_main_interpreter(tstate);
2005+
if (main_tstate == NULL) {
2006+
return NULL;
2007+
}
2008+
else if (main_tstate != tstate) {
2009+
switched = true;
2010+
/* In the switched case, we could play it safe
2011+
* by getting the main interpreter's import lock here.
2012+
* It's unlikely to matter though. */
2013+
}
2014+
19392015
struct _Py_ext_module_loader_result res;
1940-
if (_PyImport_RunModInitFunc(p0, info, &res) < 0) {
2016+
int rc = _PyImport_RunModInitFunc(p0, info, &res);
2017+
if (rc < 0) {
19412018
/* We discard res.def. */
19422019
assert(res.module == NULL);
1943-
_Py_ext_module_loader_result_apply_error(&res, name_buf);
1944-
return NULL;
19452020
}
1946-
assert(!PyErr_Occurred());
1947-
assert(res.err == NULL);
2021+
else {
2022+
assert(!PyErr_Occurred());
2023+
assert(res.err == NULL);
2024+
2025+
mod = res.module;
2026+
res.module = NULL;
2027+
def = res.def;
2028+
assert(def != NULL);
2029+
2030+
/* Do anything else that should be done
2031+
* while still using the main interpreter. */
2032+
if (res.kind == _Py_ext_module_kind_SINGLEPHASE) {
2033+
/* Remember the filename as the __file__ attribute */
2034+
if (info->filename != NULL) {
2035+
// XXX There's a refleak somewhere with the filename.
2036+
// Until we can track it down, we intern it.
2037+
PyObject *filename = Py_NewRef(info->filename);
2038+
PyUnicode_InternInPlace(&filename);
2039+
if (PyModule_AddObjectRef(mod, "__file__", filename) < 0) {
2040+
PyErr_Clear(); /* Not important enough to report */
2041+
}
2042+
}
2043+
2044+
/* Update global import state. */
2045+
assert(def->m_base.m_index != 0);
2046+
struct singlephase_global_update singlephase = {
2047+
// XXX Modules that share a def should each get their own index,
2048+
// whereas currently they share (which means the per-interpreter
2049+
// cache is less reliable than it should be).
2050+
.m_index=def->m_base.m_index,
2051+
.origin=info->origin,
2052+
#ifdef Py_GIL_DISABLED
2053+
.md_gil=((PyModuleObject *)mod)->md_gil,
2054+
#endif
2055+
};
2056+
// gh-88216: Extensions and def->m_base.m_copy can be updated
2057+
// when the extension module doesn't support sub-interpreters.
2058+
if (def->m_size == -1) {
2059+
/* We will reload from m_copy. */
2060+
assert(def->m_base.m_init == NULL);
2061+
singlephase.m_dict = PyModule_GetDict(mod);
2062+
assert(singlephase.m_dict != NULL);
2063+
}
2064+
else {
2065+
/* We will reload via the init function. */
2066+
assert(def->m_size >= 0);
2067+
assert(def->m_base.m_copy == NULL);
2068+
singlephase.m_init = p0;
2069+
}
2070+
cached = update_global_state_for_extension(
2071+
tstate, info->path, info->name, def, &singlephase);
2072+
if (cached == NULL) {
2073+
assert(PyErr_Occurred());
2074+
goto main_finally;
2075+
}
2076+
}
2077+
}
19482078

1949-
mod = res.module;
1950-
res.module = NULL;
1951-
def = res.def;
1952-
assert(def != NULL);
2079+
main_finally:
2080+
/* Switch back to the subinterpreter. */
2081+
if (switched) {
2082+
assert(main_tstate != tstate);
2083+
2084+
/* Handle any exceptions, which we cannot propagate directly
2085+
* to the subinterpreter. */
2086+
if (PyErr_Occurred()) {
2087+
if (PyErr_ExceptionMatches(PyExc_MemoryError)) {
2088+
/* We trust it will be caught again soon. */
2089+
PyErr_Clear();
2090+
}
2091+
else {
2092+
/* Printing the exception should be sufficient. */
2093+
PyErr_PrintEx(0);
2094+
}
2095+
}
2096+
2097+
/* Any module we got from the init function will have to be
2098+
* reloaded in the subinterpreter. */
2099+
Py_CLEAR(mod);
2100+
2101+
PyThreadState_Clear(main_tstate);
2102+
(void)PyThreadState_Swap(tstate);
2103+
PyThreadState_Delete(main_tstate);
2104+
}
2105+
2106+
/*****************************************************************/
2107+
/* At this point we are back to the interpreter we started with. */
2108+
/*****************************************************************/
2109+
2110+
/* Finally we handle the error return from _PyImport_RunModInitFunc(). */
2111+
if (rc < 0) {
2112+
_Py_ext_module_loader_result_apply_error(&res, name_buf);
2113+
goto error;
2114+
}
19532115

19542116
if (res.kind == _Py_ext_module_kind_MULTIPHASE) {
19552117
assert_multiphase_def(def);
19562118
assert(mod == NULL);
2119+
/* Note that we cheat a little by not repeating the calls
2120+
* to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */
19572121
mod = PyModule_FromDefAndSpec(def, spec);
19582122
if (mod == NULL) {
19592123
goto error;
@@ -1962,57 +2126,35 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
19622126
else {
19632127
assert(res.kind == _Py_ext_module_kind_SINGLEPHASE);
19642128
assert_singlephase_def(def);
1965-
assert(PyModule_Check(mod));
19662129

19672130
if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) {
19682131
goto error;
19692132
}
2133+
assert(!PyErr_Occurred());
19702134

1971-
/* Remember the filename as the __file__ attribute */
1972-
if (info->filename != NULL) {
1973-
if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) {
1974-
PyErr_Clear(); /* Not important enough to report */
2135+
if (switched) {
2136+
/* We switched to the main interpreter to run the init
2137+
* function, so now we will "reload" the module from the
2138+
* cached data using the original subinterpreter. */
2139+
assert(mod == NULL);
2140+
mod = reload_singlephase_extension(tstate, cached, info);
2141+
if (mod == NULL) {
2142+
goto error;
19752143
}
1976-
}
1977-
1978-
/* Update global import state. */
1979-
assert(def->m_base.m_index != 0);
1980-
struct singlephase_global_update singlephase = {
1981-
// XXX Modules that share a def should each get their own index,
1982-
// whereas currently they share (which means the per-interpreter
1983-
// cache is less reliable than it should be).
1984-
.m_index=def->m_base.m_index,
1985-
.origin=info->origin,
1986-
#ifdef Py_GIL_DISABLED
1987-
.md_gil=((PyModuleObject *)mod)->md_gil,
1988-
#endif
1989-
};
1990-
// gh-88216: Extensions and def->m_base.m_copy can be updated
1991-
// when the extension module doesn't support sub-interpreters.
1992-
if (def->m_size == -1) {
1993-
/* We will reload from m_copy. */
1994-
assert(def->m_base.m_init == NULL);
1995-
singlephase.m_dict = PyModule_GetDict(mod);
1996-
assert(singlephase.m_dict != NULL);
2144+
assert(!PyErr_Occurred());
2145+
assert(PyModule_Check(mod));
19972146
}
19982147
else {
1999-
/* We will reload via the init function. */
2000-
assert(def->m_size >= 0);
2001-
assert(def->m_base.m_copy == NULL);
2002-
singlephase.m_init = p0;
2003-
}
2004-
cached = update_global_state_for_extension(
2005-
tstate, info->path, info->name, def, &singlephase);
2006-
if (cached == NULL) {
2007-
goto error;
2008-
}
2009-
2010-
/* Update per-interpreter import state. */
2011-
PyObject *modules = get_modules_dict(tstate, true);
2012-
if (finish_singlephase_extension(
2013-
tstate, mod, cached, info->name, modules) < 0)
2014-
{
2015-
goto error;
2148+
assert(mod != NULL);
2149+
assert(PyModule_Check(mod));
2150+
2151+
/* Update per-interpreter import state. */
2152+
PyObject *modules = get_modules_dict(tstate, true);
2153+
if (finish_singlephase_extension(
2154+
tstate, mod, cached, info->name, modules) < 0)
2155+
{
2156+
goto error;
2157+
}
20162158
}
20172159
}
20182160

0 commit comments

Comments
 (0)