Skip to content

Commit f66de38

Browse files
committed
Apply pybind11 patch to avoid deadlock
This applies the following patches to pybind11: - pybind/pybind11#4857 - pybind/pybind11#4877 to avoid deadlock when using pybind11 without importing numpy in multi-threaded environment.
1 parent 44c39dc commit f66de38

File tree

4 files changed

+496
-0
lines changed

4 files changed

+496
-0
lines changed

cpp/cmake/deps/pybind11.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ if (NOT TARGET deps::pybind11)
1919
GIT_REPOSITORY https://github.com/pybind/pybind11.git
2020
GIT_TAG v2.11.1
2121
GIT_SHALLOW TRUE
22+
PATCH_COMMAND git apply "${CMAKE_CURRENT_LIST_DIR}/pybind11_pr4857_4877.patch" || true
2223
)
2324
FetchContent_GetProperties(deps-pybind11)
2425
if (NOT deps-pybind11_POPULATED)
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
diff --git a/CMakeLists.txt b/CMakeLists.txt
2+
index 87ec103..f2f1d19 100644
3+
--- a/CMakeLists.txt
4+
+++ b/CMakeLists.txt
5+
@@ -132,6 +132,7 @@ set(PYBIND11_HEADERS
6+
include/pybind11/embed.h
7+
include/pybind11/eval.h
8+
include/pybind11/gil.h
9+
+ include/pybind11/gil_safe_call_once.h
10+
include/pybind11/iostream.h
11+
include/pybind11/functional.h
12+
include/pybind11/numpy.h
13+
diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h
14+
index 31a54c7..8906c1f 100644
15+
--- a/include/pybind11/detail/common.h
16+
+++ b/include/pybind11/detail/common.h
17+
@@ -118,6 +118,14 @@
18+
# endif
19+
#endif
20+
21+
+#if defined(PYBIND11_CPP20)
22+
+# define PYBIND11_CONSTINIT constinit
23+
+# define PYBIND11_DTOR_CONSTEXPR constexpr
24+
+#else
25+
+# define PYBIND11_CONSTINIT
26+
+# define PYBIND11_DTOR_CONSTEXPR
27+
+#endif
28+
+
29+
// Compiler version assertions
30+
#if defined(__INTEL_COMPILER)
31+
# if __INTEL_COMPILER < 1800
32+
diff --git a/include/pybind11/gil.h b/include/pybind11/gil.h
33+
index 570a558..da22f48 100644
34+
--- a/include/pybind11/gil.h
35+
+++ b/include/pybind11/gil.h
36+
@@ -11,6 +11,8 @@
37+
38+
#include "detail/common.h"
39+
40+
+#include <cassert>
41+
+
42+
#if defined(WITH_THREAD) && !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
43+
# include "detail/internals.h"
44+
#endif
45+
@@ -137,7 +139,9 @@ private:
46+
47+
class gil_scoped_release {
48+
public:
49+
+ // PRECONDITION: The GIL must be held when this constructor is called.
50+
explicit gil_scoped_release(bool disassoc = false) : disassoc(disassoc) {
51+
+ assert(PyGILState_Check());
52+
// `get_internals()` must be called here unconditionally in order to initialize
53+
// `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an
54+
// initialization race could occur as multiple threads try `gil_scoped_acquire`.
55+
@@ -201,7 +205,11 @@ class gil_scoped_release {
56+
PyThreadState *state;
57+
58+
public:
59+
- gil_scoped_release() : state{PyEval_SaveThread()} {}
60+
+ // PRECONDITION: The GIL must be held when this constructor is called.
61+
+ gil_scoped_release() {
62+
+ assert(PyGILState_Check());
63+
+ state = PyEval_SaveThread();
64+
+ }
65+
gil_scoped_release(const gil_scoped_release &) = delete;
66+
gil_scoped_release &operator=(const gil_scoped_release &) = delete;
67+
~gil_scoped_release() { PyEval_RestoreThread(state); }
68+
diff --git a/include/pybind11/gil_safe_call_once.h b/include/pybind11/gil_safe_call_once.h
69+
new file mode 100644
70+
index 0000000..58b90b8
71+
--- /dev/null
72+
+++ b/include/pybind11/gil_safe_call_once.h
73+
@@ -0,0 +1,90 @@
74+
+// Copyright (c) 2023 The pybind Community.
75+
+
76+
+
77+
+#include "detail/common.h"
78+
+#include "gil.h"
79+
+
80+
+#include <cassert>
81+
+#include <mutex>
82+
+
83+
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
84+
+
85+
+// Use the `gil_safe_call_once_and_store` class below instead of the naive
86+
+//
87+
+// static auto imported_obj = py::module_::import("module_name"); // BAD, DO NOT USE!
88+
+//
89+
+// which has two serious issues:
90+
+//
91+
+// 1. Py_DECREF() calls potentially after the Python interpreter was finalized already, and
92+
+// 2. deadlocks in multi-threaded processes (because of missing lock ordering).
93+
+//
94+
+// The following alternative avoids both problems:
95+
+//
96+
+// PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store<py::object> storage;
97+
+// auto &imported_obj = storage // Do NOT make this `static`!
98+
+// .call_once_and_store_result([]() {
99+
+// return py::module_::import("module_name");
100+
+// })
101+
+// .get_stored();
102+
+//
103+
+// The parameter of `call_once_and_store_result()` must be callable. It can make
104+
+// CPython API calls, and in particular, it can temporarily release the GIL.
105+
+//
106+
+// `T` can be any C++ type, it does not have to involve CPython API types.
107+
+//
108+
+// The behavior with regard to signals, e.g. `SIGINT` (`KeyboardInterrupt`),
109+
+// is not ideal. If the main thread is the one to actually run the `Callable`,
110+
+// then a `KeyboardInterrupt` will interrupt it if it is running normal Python
111+
+// code. The situation is different if a non-main thread runs the
112+
+// `Callable`, and then the main thread starts waiting for it to complete:
113+
+// a `KeyboardInterrupt` will not interrupt the non-main thread, but it will
114+
+// get processed only when it is the main thread's turn again and it is running
115+
+// normal Python code. However, this will be unnoticeable for quick call-once
116+
+// functions, which is usually the case.
117+
+template <typename T>
118+
+class gil_safe_call_once_and_store {
119+
+public:
120+
+ // PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called.
121+
+ template <typename Callable>
122+
+ gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn) {
123+
+ if (!is_initialized_) { // This read is guarded by the GIL.
124+
+ // Multiple threads may enter here, because the GIL is released in the next line and
125+
+ // CPython API calls in the `fn()` call below may release and reacquire the GIL.
126+
+ gil_scoped_release gil_rel; // Needed to establish lock ordering.
127+
+ std::call_once(once_flag_, [&] {
128+
+ // Only one thread will ever enter here.
129+
+ gil_scoped_acquire gil_acq;
130+
+ ::new (storage_) T(fn()); // fn may release, but will reacquire, the GIL.
131+
+ is_initialized_ = true; // This write is guarded by the GIL.
132+
+ });
133+
+ // All threads will observe `is_initialized_` as true here.
134+
+ }
135+
+ // Intentionally not returning `T &` to ensure the calling code is self-documenting.
136+
+ return *this;
137+
+ }
138+
+
139+
+ // This must only be called after `call_once_and_store_result()` was called.
140+
+ T &get_stored() {
141+
+ assert(is_initialized_);
142+
+ PYBIND11_WARNING_PUSH
143+
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5
144+
+ // Needed for gcc 4.8.5
145+
+ PYBIND11_WARNING_DISABLE_GCC("-Wstrict-aliasing")
146+
+#endif
147+
+ return *reinterpret_cast<T *>(storage_);
148+
+ PYBIND11_WARNING_POP
149+
+ }
150+
+
151+
+ constexpr gil_safe_call_once_and_store() = default;
152+
+ PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() = default;
153+
+
154+
+private:
155+
+ alignas(T) char storage_[sizeof(T)] = {};
156+
+ std::once_flag once_flag_ = {};
157+
+ bool is_initialized_ = false;
158+
+ // The `is_initialized_`-`storage_` pair is very similar to `std::optional`,
159+
+ // but the latter does not have the triviality properties of former,
160+
+ // therefore `std::optional` is not a viable alternative here.
161+
+};
162+
+
163+
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
164+
diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h
165+
index 36077ec..1217c8d 100644
166+
--- a/include/pybind11/numpy.h
167+
+++ b/include/pybind11/numpy.h
168+
@@ -10,7 +10,10 @@
169+
#pragma once
170+
171+
#include "pybind11.h"
172+
+#include "detail/common.h"
173+
#include "complex.h"
174+
+#include "gil_safe_call_once.h"
175+
+#include "pytypes.h"
176+
177+
#include <algorithm>
178+
#include <array>
179+
@@ -120,6 +123,20 @@ inline numpy_internals &get_numpy_internals() {
180+
return *ptr;
181+
}
182+
183+
+PYBIND11_NOINLINE module_ import_numpy_core_submodule(const char *submodule_name) {
184+
+ module_ numpy = module_::import("numpy");
185+
+ str version_string = numpy.attr("__version__");
186+
+
187+
+ module_ numpy_lib = module_::import("numpy.lib");
188+
+ object numpy_version = numpy_lib.attr("NumpyVersion")(version_string);
189+
+ int major_version = numpy_version.attr("major").cast<int>();
190+
+
191+
+ /* `numpy.core` was renamed to `numpy._core` in NumPy 2.0 as it officially
192+
+ became a private module. */
193+
+ std::string numpy_core_path = major_version >= 2 ? "numpy._core" : "numpy.core";
194+
+ return module_::import((numpy_core_path + "." + submodule_name).c_str());
195+
+}
196+
+
197+
template <typename T>
198+
struct same_size {
199+
template <typename U>
200+
@@ -192,8 +209,8 @@ struct npy_api {
201+
};
202+
203+
static npy_api &get() {
204+
- static npy_api api = lookup();
205+
- return api;
206+
+ PYBIND11_CONSTINIT static gil_safe_call_once_and_store<npy_api> storage;
207+
+ return storage.call_once_and_store_result(lookup).get_stored();
208+
}
209+
210+
bool PyArray_Check_(PyObject *obj) const {
211+
@@ -263,9 +280,13 @@ private:
212+
};
213+
214+
static npy_api lookup() {
215+
- module_ m = module_::import("numpy.core.multiarray");
216+
+ module_ m = detail::import_numpy_core_submodule("multiarray");
217+
auto c = m.attr("_ARRAY_API");
218+
void **api_ptr = (void **) PyCapsule_GetPointer(c.ptr(), nullptr);
219+
+ if (api_ptr == nullptr) {
220+
+ raise_from(PyExc_SystemError, "FAILURE obtaining numpy _ARRAY_API pointer.");
221+
+ throw error_already_set();
222+
+ }
223+
npy_api api;
224+
#define DECL_NPY_API(Func) api.Func##_ = (decltype(api.Func##_)) api_ptr[API_##Func];
225+
DECL_NPY_API(PyArray_GetNDArrayCFeatureVersion);
226+
@@ -625,13 +646,14 @@ public:
227+
char flags() const { return detail::array_descriptor_proxy(m_ptr)->flags; }
228+
229+
private:
230+
- static object _dtype_from_pep3118() {
231+
- static PyObject *obj = module_::import("numpy.core._internal")
232+
- .attr("_dtype_from_pep3118")
233+
- .cast<object>()
234+
- .release()
235+
- .ptr();
236+
- return reinterpret_borrow<object>(obj);
237+
+ static object &_dtype_from_pep3118() {
238+
+ PYBIND11_CONSTINIT static gil_safe_call_once_and_store<object> storage;
239+
+ return storage
240+
+ .call_once_and_store_result([]() {
241+
+ return detail::import_numpy_core_submodule("_internal")
242+
+ .attr("_dtype_from_pep3118");
243+
+ })
244+
+ .get_stored();
245+
}
246+
247+
dtype strip_padding(ssize_t itemsize) {

python/cmake/deps/pybind11.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ if (NOT TARGET deps::pybind11)
1919
GIT_REPOSITORY https://github.com/pybind/pybind11.git
2020
GIT_TAG v2.11.1
2121
GIT_SHALLOW TRUE
22+
PATCH_COMMAND git apply "${CMAKE_CURRENT_LIST_DIR}/pybind11_pr4857_4877.patch" || true
2223
)
2324
FetchContent_GetProperties(deps-pybind11)
2425
if (NOT deps-pybind11_POPULATED)

0 commit comments

Comments
 (0)