|
| 1 | +diff --git a/CMakeLists.txt b/CMakeLists.txt |
| 2 | +index 87ec103..f2f1d19 100644 |
| 3 | +--- a/CMakeLists.txt |
| 4 | ++++ b/CMakeLists.txt |
| 5 | +@@ -132,6 +132,7 @@ set(PYBIND11_HEADERS |
| 6 | + include/pybind11/embed.h |
| 7 | + include/pybind11/eval.h |
| 8 | + include/pybind11/gil.h |
| 9 | ++ include/pybind11/gil_safe_call_once.h |
| 10 | + include/pybind11/iostream.h |
| 11 | + include/pybind11/functional.h |
| 12 | + include/pybind11/numpy.h |
| 13 | +diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h |
| 14 | +index 31a54c7..8906c1f 100644 |
| 15 | +--- a/include/pybind11/detail/common.h |
| 16 | ++++ b/include/pybind11/detail/common.h |
| 17 | +@@ -118,6 +118,14 @@ |
| 18 | + # endif |
| 19 | + #endif |
| 20 | + |
| 21 | ++#if defined(PYBIND11_CPP20) |
| 22 | ++# define PYBIND11_CONSTINIT constinit |
| 23 | ++# define PYBIND11_DTOR_CONSTEXPR constexpr |
| 24 | ++#else |
| 25 | ++# define PYBIND11_CONSTINIT |
| 26 | ++# define PYBIND11_DTOR_CONSTEXPR |
| 27 | ++#endif |
| 28 | ++ |
| 29 | + // Compiler version assertions |
| 30 | + #if defined(__INTEL_COMPILER) |
| 31 | + # if __INTEL_COMPILER < 1800 |
| 32 | +diff --git a/include/pybind11/gil.h b/include/pybind11/gil.h |
| 33 | +index 570a558..da22f48 100644 |
| 34 | +--- a/include/pybind11/gil.h |
| 35 | ++++ b/include/pybind11/gil.h |
| 36 | +@@ -11,6 +11,8 @@ |
| 37 | + |
| 38 | + #include "detail/common.h" |
| 39 | + |
| 40 | ++#include <cassert> |
| 41 | ++ |
| 42 | + #if defined(WITH_THREAD) && !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT) |
| 43 | + # include "detail/internals.h" |
| 44 | + #endif |
| 45 | +@@ -137,7 +139,9 @@ private: |
| 46 | + |
| 47 | + class gil_scoped_release { |
| 48 | + public: |
| 49 | ++ // PRECONDITION: The GIL must be held when this constructor is called. |
| 50 | + explicit gil_scoped_release(bool disassoc = false) : disassoc(disassoc) { |
| 51 | ++ assert(PyGILState_Check()); |
| 52 | + // `get_internals()` must be called here unconditionally in order to initialize |
| 53 | + // `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an |
| 54 | + // initialization race could occur as multiple threads try `gil_scoped_acquire`. |
| 55 | +@@ -201,7 +205,11 @@ class gil_scoped_release { |
| 56 | + PyThreadState *state; |
| 57 | + |
| 58 | + public: |
| 59 | +- gil_scoped_release() : state{PyEval_SaveThread()} {} |
| 60 | ++ // PRECONDITION: The GIL must be held when this constructor is called. |
| 61 | ++ gil_scoped_release() { |
| 62 | ++ assert(PyGILState_Check()); |
| 63 | ++ state = PyEval_SaveThread(); |
| 64 | ++ } |
| 65 | + gil_scoped_release(const gil_scoped_release &) = delete; |
| 66 | + gil_scoped_release &operator=(const gil_scoped_release &) = delete; |
| 67 | + ~gil_scoped_release() { PyEval_RestoreThread(state); } |
| 68 | +diff --git a/include/pybind11/gil_safe_call_once.h b/include/pybind11/gil_safe_call_once.h |
| 69 | +new file mode 100644 |
| 70 | +index 0000000..58b90b8 |
| 71 | +--- /dev/null |
| 72 | ++++ b/include/pybind11/gil_safe_call_once.h |
| 73 | +@@ -0,0 +1,90 @@ |
| 74 | ++// Copyright (c) 2023 The pybind Community. |
| 75 | ++ |
| 76 | ++ |
| 77 | ++#include "detail/common.h" |
| 78 | ++#include "gil.h" |
| 79 | ++ |
| 80 | ++#include <cassert> |
| 81 | ++#include <mutex> |
| 82 | ++ |
| 83 | ++PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) |
| 84 | ++ |
| 85 | ++// Use the `gil_safe_call_once_and_store` class below instead of the naive |
| 86 | ++// |
| 87 | ++// static auto imported_obj = py::module_::import("module_name"); // BAD, DO NOT USE! |
| 88 | ++// |
| 89 | ++// which has two serious issues: |
| 90 | ++// |
| 91 | ++// 1. Py_DECREF() calls potentially after the Python interpreter was finalized already, and |
| 92 | ++// 2. deadlocks in multi-threaded processes (because of missing lock ordering). |
| 93 | ++// |
| 94 | ++// The following alternative avoids both problems: |
| 95 | ++// |
| 96 | ++// PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store<py::object> storage; |
| 97 | ++// auto &imported_obj = storage // Do NOT make this `static`! |
| 98 | ++// .call_once_and_store_result([]() { |
| 99 | ++// return py::module_::import("module_name"); |
| 100 | ++// }) |
| 101 | ++// .get_stored(); |
| 102 | ++// |
| 103 | ++// The parameter of `call_once_and_store_result()` must be callable. It can make |
| 104 | ++// CPython API calls, and in particular, it can temporarily release the GIL. |
| 105 | ++// |
| 106 | ++// `T` can be any C++ type, it does not have to involve CPython API types. |
| 107 | ++// |
| 108 | ++// The behavior with regard to signals, e.g. `SIGINT` (`KeyboardInterrupt`), |
| 109 | ++// is not ideal. If the main thread is the one to actually run the `Callable`, |
| 110 | ++// then a `KeyboardInterrupt` will interrupt it if it is running normal Python |
| 111 | ++// code. The situation is different if a non-main thread runs the |
| 112 | ++// `Callable`, and then the main thread starts waiting for it to complete: |
| 113 | ++// a `KeyboardInterrupt` will not interrupt the non-main thread, but it will |
| 114 | ++// get processed only when it is the main thread's turn again and it is running |
| 115 | ++// normal Python code. However, this will be unnoticeable for quick call-once |
| 116 | ++// functions, which is usually the case. |
| 117 | ++template <typename T> |
| 118 | ++class gil_safe_call_once_and_store { |
| 119 | ++public: |
| 120 | ++ // PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called. |
| 121 | ++ template <typename Callable> |
| 122 | ++ gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn) { |
| 123 | ++ if (!is_initialized_) { // This read is guarded by the GIL. |
| 124 | ++ // Multiple threads may enter here, because the GIL is released in the next line and |
| 125 | ++ // CPython API calls in the `fn()` call below may release and reacquire the GIL. |
| 126 | ++ gil_scoped_release gil_rel; // Needed to establish lock ordering. |
| 127 | ++ std::call_once(once_flag_, [&] { |
| 128 | ++ // Only one thread will ever enter here. |
| 129 | ++ gil_scoped_acquire gil_acq; |
| 130 | ++ ::new (storage_) T(fn()); // fn may release, but will reacquire, the GIL. |
| 131 | ++ is_initialized_ = true; // This write is guarded by the GIL. |
| 132 | ++ }); |
| 133 | ++ // All threads will observe `is_initialized_` as true here. |
| 134 | ++ } |
| 135 | ++ // Intentionally not returning `T &` to ensure the calling code is self-documenting. |
| 136 | ++ return *this; |
| 137 | ++ } |
| 138 | ++ |
| 139 | ++ // This must only be called after `call_once_and_store_result()` was called. |
| 140 | ++ T &get_stored() { |
| 141 | ++ assert(is_initialized_); |
| 142 | ++ PYBIND11_WARNING_PUSH |
| 143 | ++#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5 |
| 144 | ++ // Needed for gcc 4.8.5 |
| 145 | ++ PYBIND11_WARNING_DISABLE_GCC("-Wstrict-aliasing") |
| 146 | ++#endif |
| 147 | ++ return *reinterpret_cast<T *>(storage_); |
| 148 | ++ PYBIND11_WARNING_POP |
| 149 | ++ } |
| 150 | ++ |
| 151 | ++ constexpr gil_safe_call_once_and_store() = default; |
| 152 | ++ PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() = default; |
| 153 | ++ |
| 154 | ++private: |
| 155 | ++ alignas(T) char storage_[sizeof(T)] = {}; |
| 156 | ++ std::once_flag once_flag_ = {}; |
| 157 | ++ bool is_initialized_ = false; |
| 158 | ++ // The `is_initialized_`-`storage_` pair is very similar to `std::optional`, |
| 159 | ++ // but the latter does not have the triviality properties of former, |
| 160 | ++ // therefore `std::optional` is not a viable alternative here. |
| 161 | ++}; |
| 162 | ++ |
| 163 | ++PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) |
| 164 | +diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h |
| 165 | +index 36077ec..1217c8d 100644 |
| 166 | +--- a/include/pybind11/numpy.h |
| 167 | ++++ b/include/pybind11/numpy.h |
| 168 | +@@ -10,7 +10,10 @@ |
| 169 | + #pragma once |
| 170 | + |
| 171 | + #include "pybind11.h" |
| 172 | ++#include "detail/common.h" |
| 173 | + #include "complex.h" |
| 174 | ++#include "gil_safe_call_once.h" |
| 175 | ++#include "pytypes.h" |
| 176 | + |
| 177 | + #include <algorithm> |
| 178 | + #include <array> |
| 179 | +@@ -120,6 +123,20 @@ inline numpy_internals &get_numpy_internals() { |
| 180 | + return *ptr; |
| 181 | + } |
| 182 | + |
| 183 | ++PYBIND11_NOINLINE module_ import_numpy_core_submodule(const char *submodule_name) { |
| 184 | ++ module_ numpy = module_::import("numpy"); |
| 185 | ++ str version_string = numpy.attr("__version__"); |
| 186 | ++ |
| 187 | ++ module_ numpy_lib = module_::import("numpy.lib"); |
| 188 | ++ object numpy_version = numpy_lib.attr("NumpyVersion")(version_string); |
| 189 | ++ int major_version = numpy_version.attr("major").cast<int>(); |
| 190 | ++ |
| 191 | ++ /* `numpy.core` was renamed to `numpy._core` in NumPy 2.0 as it officially |
| 192 | ++ became a private module. */ |
| 193 | ++ std::string numpy_core_path = major_version >= 2 ? "numpy._core" : "numpy.core"; |
| 194 | ++ return module_::import((numpy_core_path + "." + submodule_name).c_str()); |
| 195 | ++} |
| 196 | ++ |
| 197 | + template <typename T> |
| 198 | + struct same_size { |
| 199 | + template <typename U> |
| 200 | +@@ -192,8 +209,8 @@ struct npy_api { |
| 201 | + }; |
| 202 | + |
| 203 | + static npy_api &get() { |
| 204 | +- static npy_api api = lookup(); |
| 205 | +- return api; |
| 206 | ++ PYBIND11_CONSTINIT static gil_safe_call_once_and_store<npy_api> storage; |
| 207 | ++ return storage.call_once_and_store_result(lookup).get_stored(); |
| 208 | + } |
| 209 | + |
| 210 | + bool PyArray_Check_(PyObject *obj) const { |
| 211 | +@@ -263,9 +280,13 @@ private: |
| 212 | + }; |
| 213 | + |
| 214 | + static npy_api lookup() { |
| 215 | +- module_ m = module_::import("numpy.core.multiarray"); |
| 216 | ++ module_ m = detail::import_numpy_core_submodule("multiarray"); |
| 217 | + auto c = m.attr("_ARRAY_API"); |
| 218 | + void **api_ptr = (void **) PyCapsule_GetPointer(c.ptr(), nullptr); |
| 219 | ++ if (api_ptr == nullptr) { |
| 220 | ++ raise_from(PyExc_SystemError, "FAILURE obtaining numpy _ARRAY_API pointer."); |
| 221 | ++ throw error_already_set(); |
| 222 | ++ } |
| 223 | + npy_api api; |
| 224 | + #define DECL_NPY_API(Func) api.Func##_ = (decltype(api.Func##_)) api_ptr[API_##Func]; |
| 225 | + DECL_NPY_API(PyArray_GetNDArrayCFeatureVersion); |
| 226 | +@@ -625,13 +646,14 @@ public: |
| 227 | + char flags() const { return detail::array_descriptor_proxy(m_ptr)->flags; } |
| 228 | + |
| 229 | + private: |
| 230 | +- static object _dtype_from_pep3118() { |
| 231 | +- static PyObject *obj = module_::import("numpy.core._internal") |
| 232 | +- .attr("_dtype_from_pep3118") |
| 233 | +- .cast<object>() |
| 234 | +- .release() |
| 235 | +- .ptr(); |
| 236 | +- return reinterpret_borrow<object>(obj); |
| 237 | ++ static object &_dtype_from_pep3118() { |
| 238 | ++ PYBIND11_CONSTINIT static gil_safe_call_once_and_store<object> storage; |
| 239 | ++ return storage |
| 240 | ++ .call_once_and_store_result([]() { |
| 241 | ++ return detail::import_numpy_core_submodule("_internal") |
| 242 | ++ .attr("_dtype_from_pep3118"); |
| 243 | ++ }) |
| 244 | ++ .get_stored(); |
| 245 | + } |
| 246 | + |
| 247 | + dtype strip_padding(ssize_t itemsize) { |
0 commit comments