@@ -1154,9 +1154,6 @@ init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict)
1154
1154
return -1 ;
1155
1155
}
1156
1156
// XXX We may want to make the copy immortal.
1157
- // XXX This incref shouldn't be necessary. We are decref'ing
1158
- // one to many times somewhere.
1159
- Py_INCREF (copied );
1160
1157
1161
1158
value -> _m_dict = (struct cached_m_dict ){
1162
1159
.copied = copied ,
@@ -1580,6 +1577,26 @@ _PyImport_CheckGILForModule(PyObject* module, PyObject *module_name)
1580
1577
}
1581
1578
#endif
1582
1579
1580
+ static PyThreadState *
1581
+ switch_to_main_interpreter (PyThreadState * tstate )
1582
+ {
1583
+ if (_Py_IsMainInterpreter (tstate -> interp )) {
1584
+ return tstate ;
1585
+ }
1586
+ PyThreadState * main_tstate = PyThreadState_New (_PyInterpreterState_Main ());
1587
+ if (main_tstate == NULL ) {
1588
+ return NULL ;
1589
+ }
1590
+ main_tstate -> _whence = _PyThreadState_WHENCE_EXEC ;
1591
+ #ifndef NDEBUG
1592
+ PyThreadState * old_tstate = PyThreadState_Swap (main_tstate );
1593
+ assert (old_tstate == tstate );
1594
+ #else
1595
+ (void )PyThreadState_Swap (main_tstate );
1596
+ #endif
1597
+ return main_tstate ;
1598
+ }
1599
+
1583
1600
static PyObject *
1584
1601
get_core_module_dict (PyInterpreterState * interp ,
1585
1602
PyObject * name , PyObject * path )
@@ -1936,24 +1953,171 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
1936
1953
struct extensions_cache_value * cached = NULL ;
1937
1954
const char * name_buf = PyBytes_AS_STRING (info -> name_encoded );
1938
1955
1956
+ /* We cannot know if the module is single-phase init or
1957
+ * multi-phase init until after we call its init function. Even
1958
+ * in isolated interpreters (that do not support single-phase init),
1959
+ * the init function will run without restriction. For multi-phase
1960
+ * init modules that isn't a problem because the init function only
1961
+ * runs PyModuleDef_Init() on the module's def and then returns it.
1962
+ *
1963
+ * However, for single-phase init the module's init function will
1964
+ * create the module, create other objects (and allocate other
1965
+ * memory), populate it and its module state, and initialze static
1966
+ * types. Some modules store other objects and data in global C
1967
+ * variables and register callbacks with the runtime/stdlib or
1968
+ * even external libraries (which is part of why we can't just
1969
+ * dlclose() the module in the error case). That's a problem
1970
+ * for isolated interpreters since all of the above happens
1971
+ * and only then * will the import fail. Memory will leak,
1972
+ * callbacks will still get used, and sometimes there
1973
+ * will be crashes (memory access violations
1974
+ * and use-after-free).
1975
+ *
1976
+ * To put it another way, if the module is single-phase init
1977
+ * then the import will probably break interpreter isolation
1978
+ * and should fail ASAP. However, the module's init function
1979
+ * will still get run. That means it may still store state
1980
+ * in the shared-object/DLL address space (which never gets
1981
+ * closed/cleared), including objects (e.g. static types).
1982
+ * This is a problem for isolated subinterpreters since each
1983
+ * has its own object allocator. If the loaded shared-object
1984
+ * still holds a reference to an object after the corresponding
1985
+ * interpreter has finalized then either we must let it leak
1986
+ * or else any later use of that object by another interpreter
1987
+ * (or across multiple init-fini cycles) will crash the process.
1988
+ *
1989
+ * To avoid all of that, we make sure the module's init function
1990
+ * is always run first with the main interpreter active. If it was
1991
+ * already the main interpreter then we can continue loading the
1992
+ * module like normal. Otherwise, right after the init function,
1993
+ * we take care of some import state bookkeeping, switch back
1994
+ * to the subinterpreter, check for single-phase init,
1995
+ * and then continue loading like normal. */
1996
+
1997
+ bool switched = false;
1998
+ /* We *could* leave in place a legacy interpreter here
1999
+ * (one that shares obmalloc/GIL with main interp),
2000
+ * but there isn't a big advantage, we anticipate
2001
+ * such interpreters will be increasingly uncommon,
2002
+ * and the code is a bit simpler if we always switch
2003
+ * to the main interpreter. */
2004
+ PyThreadState * main_tstate = switch_to_main_interpreter (tstate );
2005
+ if (main_tstate == NULL ) {
2006
+ return NULL ;
2007
+ }
2008
+ else if (main_tstate != tstate ) {
2009
+ switched = true;
2010
+ /* In the switched case, we could play it safe
2011
+ * by getting the main interpreter's import lock here.
2012
+ * It's unlikely to matter though. */
2013
+ }
2014
+
1939
2015
struct _Py_ext_module_loader_result res ;
1940
- if (_PyImport_RunModInitFunc (p0 , info , & res ) < 0 ) {
2016
+ int rc = _PyImport_RunModInitFunc (p0 , info , & res );
2017
+ if (rc < 0 ) {
1941
2018
/* We discard res.def. */
1942
2019
assert (res .module == NULL );
1943
- _Py_ext_module_loader_result_apply_error (& res , name_buf );
1944
- return NULL ;
1945
2020
}
1946
- assert (!PyErr_Occurred ());
1947
- assert (res .err == NULL );
2021
+ else {
2022
+ assert (!PyErr_Occurred ());
2023
+ assert (res .err == NULL );
2024
+
2025
+ mod = res .module ;
2026
+ res .module = NULL ;
2027
+ def = res .def ;
2028
+ assert (def != NULL );
2029
+
2030
+ /* Do anything else that should be done
2031
+ * while still using the main interpreter. */
2032
+ if (res .kind == _Py_ext_module_kind_SINGLEPHASE ) {
2033
+ /* Remember the filename as the __file__ attribute */
2034
+ if (info -> filename != NULL ) {
2035
+ // XXX There's a refleak somewhere with the filename.
2036
+ // Until we can track it down, we intern it.
2037
+ PyObject * filename = Py_NewRef (info -> filename );
2038
+ PyUnicode_InternInPlace (& filename );
2039
+ if (PyModule_AddObjectRef (mod , "__file__" , filename ) < 0 ) {
2040
+ PyErr_Clear (); /* Not important enough to report */
2041
+ }
2042
+ }
2043
+
2044
+ /* Update global import state. */
2045
+ assert (def -> m_base .m_index != 0 );
2046
+ struct singlephase_global_update singlephase = {
2047
+ // XXX Modules that share a def should each get their own index,
2048
+ // whereas currently they share (which means the per-interpreter
2049
+ // cache is less reliable than it should be).
2050
+ .m_index = def -> m_base .m_index ,
2051
+ .origin = info -> origin ,
2052
+ #ifdef Py_GIL_DISABLED
2053
+ .md_gil = ((PyModuleObject * )mod )-> md_gil ,
2054
+ #endif
2055
+ };
2056
+ // gh-88216: Extensions and def->m_base.m_copy can be updated
2057
+ // when the extension module doesn't support sub-interpreters.
2058
+ if (def -> m_size == -1 ) {
2059
+ /* We will reload from m_copy. */
2060
+ assert (def -> m_base .m_init == NULL );
2061
+ singlephase .m_dict = PyModule_GetDict (mod );
2062
+ assert (singlephase .m_dict != NULL );
2063
+ }
2064
+ else {
2065
+ /* We will reload via the init function. */
2066
+ assert (def -> m_size >= 0 );
2067
+ assert (def -> m_base .m_copy == NULL );
2068
+ singlephase .m_init = p0 ;
2069
+ }
2070
+ cached = update_global_state_for_extension (
2071
+ tstate , info -> path , info -> name , def , & singlephase );
2072
+ if (cached == NULL ) {
2073
+ assert (PyErr_Occurred ());
2074
+ goto main_finally ;
2075
+ }
2076
+ }
2077
+ }
1948
2078
1949
- mod = res .module ;
1950
- res .module = NULL ;
1951
- def = res .def ;
1952
- assert (def != NULL );
2079
+ main_finally :
2080
+ /* Switch back to the subinterpreter. */
2081
+ if (switched ) {
2082
+ assert (main_tstate != tstate );
2083
+
2084
+ /* Handle any exceptions, which we cannot propagate directly
2085
+ * to the subinterpreter. */
2086
+ if (PyErr_Occurred ()) {
2087
+ if (PyErr_ExceptionMatches (PyExc_MemoryError )) {
2088
+ /* We trust it will be caught again soon. */
2089
+ PyErr_Clear ();
2090
+ }
2091
+ else {
2092
+ /* Printing the exception should be sufficient. */
2093
+ PyErr_PrintEx (0 );
2094
+ }
2095
+ }
2096
+
2097
+ /* Any module we got from the init function will have to be
2098
+ * reloaded in the subinterpreter. */
2099
+ Py_CLEAR (mod );
2100
+
2101
+ PyThreadState_Clear (main_tstate );
2102
+ (void )PyThreadState_Swap (tstate );
2103
+ PyThreadState_Delete (main_tstate );
2104
+ }
2105
+
2106
+ /*****************************************************************/
2107
+ /* At this point we are back to the interpreter we started with. */
2108
+ /*****************************************************************/
2109
+
2110
+ /* Finally we handle the error return from _PyImport_RunModInitFunc(). */
2111
+ if (rc < 0 ) {
2112
+ _Py_ext_module_loader_result_apply_error (& res , name_buf );
2113
+ goto error ;
2114
+ }
1953
2115
1954
2116
if (res .kind == _Py_ext_module_kind_MULTIPHASE ) {
1955
2117
assert_multiphase_def (def );
1956
2118
assert (mod == NULL );
2119
+ /* Note that we cheat a little by not repeating the calls
2120
+ * to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */
1957
2121
mod = PyModule_FromDefAndSpec (def , spec );
1958
2122
if (mod == NULL ) {
1959
2123
goto error ;
@@ -1962,57 +2126,35 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
1962
2126
else {
1963
2127
assert (res .kind == _Py_ext_module_kind_SINGLEPHASE );
1964
2128
assert_singlephase_def (def );
1965
- assert (PyModule_Check (mod ));
1966
2129
1967
2130
if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed (name_buf ) < 0 ) {
1968
2131
goto error ;
1969
2132
}
2133
+ assert (!PyErr_Occurred ());
1970
2134
1971
- /* Remember the filename as the __file__ attribute */
1972
- if (info -> filename != NULL ) {
1973
- if (PyModule_AddObjectRef (mod , "__file__" , info -> filename ) < 0 ) {
1974
- PyErr_Clear (); /* Not important enough to report */
2135
+ if (switched ) {
2136
+ /* We switched to the main interpreter to run the init
2137
+ * function, so now we will "reload" the module from the
2138
+ * cached data using the original subinterpreter. */
2139
+ assert (mod == NULL );
2140
+ mod = reload_singlephase_extension (tstate , cached , info );
2141
+ if (mod == NULL ) {
2142
+ goto error ;
1975
2143
}
1976
- }
1977
-
1978
- /* Update global import state. */
1979
- assert (def -> m_base .m_index != 0 );
1980
- struct singlephase_global_update singlephase = {
1981
- // XXX Modules that share a def should each get their own index,
1982
- // whereas currently they share (which means the per-interpreter
1983
- // cache is less reliable than it should be).
1984
- .m_index = def -> m_base .m_index ,
1985
- .origin = info -> origin ,
1986
- #ifdef Py_GIL_DISABLED
1987
- .md_gil = ((PyModuleObject * )mod )-> md_gil ,
1988
- #endif
1989
- };
1990
- // gh-88216: Extensions and def->m_base.m_copy can be updated
1991
- // when the extension module doesn't support sub-interpreters.
1992
- if (def -> m_size == -1 ) {
1993
- /* We will reload from m_copy. */
1994
- assert (def -> m_base .m_init == NULL );
1995
- singlephase .m_dict = PyModule_GetDict (mod );
1996
- assert (singlephase .m_dict != NULL );
2144
+ assert (!PyErr_Occurred ());
2145
+ assert (PyModule_Check (mod ));
1997
2146
}
1998
2147
else {
1999
- /* We will reload via the init function. */
2000
- assert (def -> m_size >= 0 );
2001
- assert (def -> m_base .m_copy == NULL );
2002
- singlephase .m_init = p0 ;
2003
- }
2004
- cached = update_global_state_for_extension (
2005
- tstate , info -> path , info -> name , def , & singlephase );
2006
- if (cached == NULL ) {
2007
- goto error ;
2008
- }
2009
-
2010
- /* Update per-interpreter import state. */
2011
- PyObject * modules = get_modules_dict (tstate , true);
2012
- if (finish_singlephase_extension (
2013
- tstate , mod , cached , info -> name , modules ) < 0 )
2014
- {
2015
- goto error ;
2148
+ assert (mod != NULL );
2149
+ assert (PyModule_Check (mod ));
2150
+
2151
+ /* Update per-interpreter import state. */
2152
+ PyObject * modules = get_modules_dict (tstate , true);
2153
+ if (finish_singlephase_extension (
2154
+ tstate , mod , cached , info -> name , modules ) < 0 )
2155
+ {
2156
+ goto error ;
2157
+ }
2016
2158
}
2017
2159
}
2018
2160
0 commit comments