diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 374a138865..01acf6068b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -148,6 +148,7 @@ set(PYBIND11_TEST_FILES test_exceptions test_factory_constructors test_gil_scoped + test_invalid_holder_access test_iostream test_kwargs_and_defaults test_local_bindings @@ -558,7 +559,7 @@ set(PYBIND11_PYTEST_ARGS # A single command to compile and run the tests add_custom_target( pytest - COMMAND ${PYBIND11_TEST_PREFIX_COMMAND} ${PYTHON_EXECUTABLE} -m pytest + COMMAND ${PYBIND11_TEST_PREFIX_COMMAND} ${PYTHON_EXECUTABLE} -X faulthandler -m pytest ${PYBIND11_ABS_PYTEST_FILES} ${PYBIND11_PYTEST_ARGS} DEPENDS ${test_targets} WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" diff --git a/tests/pybind11_tests.cpp b/tests/pybind11_tests.cpp index 4317925737..03e6e8a88a 100644 --- a/tests/pybind11_tests.cpp +++ b/tests/pybind11_tests.cpp @@ -75,6 +75,19 @@ const char *cpp_std() { #endif } +int cpp_std_num() { + return +#if defined(PYBIND11_CPP20) + 20; +#elif defined(PYBIND11_CPP17) + 17; +#elif defined(PYBIND11_CPP14) + 14; +#else + 11; +#endif +} + PYBIND11_MODULE(pybind11_tests, m, py::mod_gil_not_used()) { m.doc() = "pybind11 test module"; @@ -88,6 +101,7 @@ PYBIND11_MODULE(pybind11_tests, m, py::mod_gil_not_used()) { m.attr("compiler_info") = py::none(); #endif m.attr("cpp_std") = cpp_std(); + m.attr("cpp_std_num") = cpp_std_num(); m.attr("PYBIND11_INTERNALS_ID") = PYBIND11_INTERNALS_ID; // Free threaded Python uses UINT32_MAX for immortal objects. m.attr("PYBIND11_SIMPLE_GIL_MANAGEMENT") = diff --git a/tests/test_invalid_holder_access.cpp b/tests/test_invalid_holder_access.cpp new file mode 100644 index 0000000000..fedf472e48 --- /dev/null +++ b/tests/test_invalid_holder_access.cpp @@ -0,0 +1,145 @@ +#include "pybind11_tests.h" + +#include +#include +#include + +class VecOwnsObjs { +public: + void append(const py::object &obj) { vec.emplace_back(obj); } + + void set_item(py::ssize_t i, const py::object &obj) { + if (!(i >= 0 && i < size())) { + throw std::out_of_range("Index out of range"); + } + vec[py::size_t(i)] = obj; + } + + py::object get_item(py::ssize_t i) const { + if (!(i >= 0 && i < size())) { + throw std::out_of_range("Index out of range"); + } + return vec[py::size_t(i)]; + } + + py::ssize_t size() const { return py::ssize_t_cast(vec.size()); } + + bool is_empty() const { return vec.empty(); } + + static int tp_traverse(PyObject *self_base, visitproc visit, void *arg) { + // https://docs.python.org/3/c-api/typeobj.html#c.PyTypeObject.tp_traverse +#if PY_VERSION_HEX >= 0x03090000 + Py_VISIT(Py_TYPE(self_base)); +#endif + + if (should_check_holder_initialization && !py::detail::is_holder_constructed(self_base)) { + // The holder has not been constructed yet. Skip the traversal to avoid + // segmentation faults. + return 0; + } + + // The actual logic of the tp_traverse function goes here. + auto &self = py::cast(py::handle{self_base}); + for (const auto &obj : self.vec) { + Py_VISIT(obj.ptr()); + } + return 0; + } + + static int tp_clear(PyObject *self_base) { + if (should_check_holder_initialization && !py::detail::is_holder_constructed(self_base)) { + // The holder has not been constructed yet. Skip the traversal to avoid + // segmentation faults. + return 0; + } + + // The actual logic of the tp_clear function goes here. + auto &self = py::cast(py::handle{self_base}); + for (auto &obj : self.vec) { + Py_CLEAR(obj.ptr()); + } + self.vec.clear(); + return 0; + } + + py::object get_state() const { + py::list state{}; + for (const auto &item : vec) { + state.append(item); + } + return py::tuple(state); + } + + static bool get_should_check_holder_initialization() { + return should_check_holder_initialization; + } + + static void set_should_check_holder_initialization(bool value) { + should_check_holder_initialization = value; + } + + static bool get_should_raise_error_on_set_state() { return should_raise_error_on_set_state; } + + static void set_should_raise_error_on_set_state(bool value) { + should_raise_error_on_set_state = value; + } + + static bool should_check_holder_initialization; + static bool should_raise_error_on_set_state; + +private: + std::vector vec{}; +}; + +bool VecOwnsObjs::should_check_holder_initialization = false; +bool VecOwnsObjs::should_raise_error_on_set_state = false; + +TEST_SUBMODULE(invalid_holder_access, m) { + m.doc() = "Test invalid holder access"; + +#if defined(PYBIND11_CPP14) + m.def("create_vector", [](const py::iterable &iterable) -> std::unique_ptr { + auto vec = std::make_unique(); + for (const auto &item : iterable) { + vec->append(py::reinterpret_borrow(item)); + } + return vec; + }); +#endif + + py::class_( + m, "VecOwnsObjs", py::custom_type_setup([](PyHeapTypeObject *heap_type) -> void { + auto *const type = &heap_type->ht_type; + type->tp_flags |= Py_TPFLAGS_HAVE_GC; + type->tp_traverse = &VecOwnsObjs::tp_traverse; + type->tp_clear = &VecOwnsObjs::tp_clear; + })) + .def_static("set_should_check_holder_initialization", + &VecOwnsObjs::set_should_check_holder_initialization, + py::arg("value")) + .def_static("set_should_raise_error_on_set_state", + &VecOwnsObjs::set_should_raise_error_on_set_state, + py::arg("value")) +#if defined(PYBIND11_CPP14) + .def(py::pickle([](const VecOwnsObjs &self) -> py::object { return self.get_state(); }, + [](const py::object &state) -> std::unique_ptr { + if (!py::isinstance(state)) { + throw std::runtime_error("Invalid state"); + } + auto vec = std::make_unique(); + if (VecOwnsObjs::get_should_raise_error_on_set_state()) { + throw std::runtime_error("raise error on set_state for testing"); + } + for (const auto &item : state) { + vec->append(py::reinterpret_borrow(item)); + } + return vec; + }), + py::arg("state")) +#endif + .def("append", &VecOwnsObjs::append, py::arg("obj")) + .def("is_empty", &VecOwnsObjs::is_empty) + .def("__setitem__", &VecOwnsObjs::set_item, py::arg("i"), py::arg("obj")) + .def("__getitem__", &VecOwnsObjs::get_item, py::arg("i")) + .def("__len__", &VecOwnsObjs::size); +} diff --git a/tests/test_invalid_holder_access.py b/tests/test_invalid_holder_access.py new file mode 100644 index 0000000000..63d489b92c --- /dev/null +++ b/tests/test_invalid_holder_access.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +import gc +import multiprocessing +import pickle +import signal +import sys +import weakref + +import pytest + +import env # noqa: F401 +import pybind11_tests +from pybind11_tests import invalid_holder_access as m + +XFAIL_REASON = "Known issues: https://github.com/pybind/pybind11/pull/5654" + + +try: + import multiprocessing + + spawn_context = multiprocessing.get_context("spawn") +except (ImportError, ValueError): + spawn_context = None + + +def check_segfault(target): + """Check if the target function causes a segmentation fault. + + The check should be done in a separate process to avoid crashing the main + process with the segfault. + """ + process = spawn_context.Process(target=target) + process.start() + process.join() + rc = abs(process.exitcode) + if 128 < rc < 256: + rc -= 128 + assert rc in ( + 0, + signal.SIGSEGV, + signal.SIGABRT, + 0xC0000005, # STATUS_ACCESS_VIOLATION on Windows + ) + if rc != 0: + raise SystemError( + "Segmentation Fault: The C++ compiler initializes container incorrectly." + ) + + +def test_no_init(): + with pytest.raises(TypeError, match=r"No constructor defined"): + m.VecOwnsObjs() + vec = m.VecOwnsObjs.__new__(m.VecOwnsObjs) + with pytest.raises(TypeError, match=r"No constructor defined"): + vec.__init__() + + +def manual_new_target(): + # Repeatedly trigger allocation without initialization (raw malloc'ed) to + # detect uninitialized memory bugs. + for _ in range(32): + # The holder is a pointer variable while the C++ ctor is not called. + vec = m.VecOwnsObjs.__new__(m.VecOwnsObjs) + if vec.is_empty(): # <= this line could cause a segfault + # The C++ compiler initializes container correctly. + assert len(vec) == 0 + else: + # The program is not supposed to reach here. It will abort with + # SIGSEGV on `vec.is_empty()`. + sys.exit(signal.SIGSEGV) # manually trigger SIGSEGV if not raised + + +# This test might succeed on some platforms with some compilers, but it is not +# guaranteed to work everywhere. It is marked as non-strict xfail. +@pytest.mark.xfail(reason=XFAIL_REASON, raises=SystemError, strict=False) +@pytest.mark.skipif(spawn_context is None, reason="spawn context not available") +@pytest.mark.skipif( + sys.platform.startswith("emscripten"), + reason="Requires multiprocessing", +) +def test_manual_new(): + """ + Manually calling the constructor (__new__) of a class that has C++ STL + container will allocate memory without initializing it can cause a + segmentation fault. + """ + check_segfault(manual_new_target) + + +@pytest.mark.skipif( + pybind11_tests.cpp_std_num < 14, + reason="std::{unique_ptr,make_unique} not available in C++11", +) +def test_set_state_with_error_no_segfault_if_gc_checks_holder_has_initialized(): + """ + The ``tp_traverse`` and ``tp_clear`` functions should check if the holder + has been initialized before traversing or clearing the C++ STL container. + """ + m.VecOwnsObjs.set_should_check_holder_initialization(True) + + vec = m.create_vector((1, 2, 3, 4)) + + m.VecOwnsObjs.set_should_raise_error_on_set_state(False) + pickle.loads(pickle.dumps(vec)) + + # During the unpickling process, Python firstly allocates the object with + # the `__new__` method and then calls the `__setstate__` method to set the + # state of the object. + # + # obj = cls.__new__(cls) + # obj.__setstate__(state) + # + # The `__init__` method is not called during unpickling. + # So, if the `__setstate__` method raises an error, the object will be in + # an uninitialized state. + # If `tp_traverse` and `tp_clear` functions check if the holder has been + # initialized, the functions can exit early and do not cause segfault on GC. + m.VecOwnsObjs.set_should_raise_error_on_set_state(True) + serialized = pickle.dumps(vec) + with pytest.raises( + RuntimeError, + match="raise error on set_state for testing", + ): + pickle.loads(serialized) + + +def unpicklable_with_error_target(): + m.VecOwnsObjs.set_should_check_holder_initialization(False) + m.VecOwnsObjs.set_should_raise_error_on_set_state(True) + + vec = m.create_vector((1, 2, 3, 4)) + serialized = pickle.dumps(vec) + + # Repeatedly trigger allocation without initialization (raw malloc'ed) to + # detect uninitialized memory bugs. + for _ in range(32): + # During the unpickling process, Python firstly allocates the object with + # the `__new__` method and then calls the `__setstate__` method to set the + # state of the object. + # + # obj = cls.__new__(cls) + # obj.__setstate__(state) + # + # The `__init__` method is not called during unpickling. + # So, if the `__setstate__` method raises an error, the object will be in + # an uninitialized state. The GC will traverse the uninitialized C++ STL + # container and cause a segmentation fault. + try: # noqa: SIM105 + pickle.loads(serialized) + except RuntimeError: + pass + + +# This test might succeed on some platforms with some compilers, but it is not +# guaranteed to work everywhere. It is marked as non-strict xfail. +@pytest.mark.xfail(reason=XFAIL_REASON, raises=SystemError, strict=False) +@pytest.mark.skipif(spawn_context is None, reason="spawn context not available") +@pytest.mark.skipif( + pybind11_tests.cpp_std_num < 14, + reason="std::{unique_ptr,make_unique} not available in C++11", +) +def test_set_state_with_error_will_segfault_if_gc_does_not_check_holder_has_initialized(): + m.VecOwnsObjs.set_should_check_holder_initialization(False) + + vec = m.create_vector((1, 2, 3, 4)) + + m.VecOwnsObjs.set_should_raise_error_on_set_state(False) + pickle.loads(pickle.dumps(vec)) + + # See comments above. + check_segfault(unpicklable_with_error_target) + + +@pytest.mark.skipif("env.PYPY or env.GRAALPY", reason="Cannot reliably trigger GC") +@pytest.mark.skipif( + pybind11_tests.cpp_std_num < 14, + reason="std::{unique_ptr,make_unique} not available in C++11", +) +def test_gc(): + vec = m.create_vector(()) + vec.append((vec, vec)) + + wr = weakref.ref(vec) + assert wr() is vec + del vec + for _ in range(10): + gc.collect() + assert wr() is None