Skip to content

Add tests for accessing uninitialized holders #5654

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 15 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ set(PYBIND11_TEST_FILES
test_exceptions
test_factory_constructors
test_gil_scoped
test_invalid_holder_access
test_iostream
test_kwargs_and_defaults
test_local_bindings
Expand Down Expand Up @@ -558,7 +559,7 @@ set(PYBIND11_PYTEST_ARGS
# A single command to compile and run the tests
add_custom_target(
pytest
COMMAND ${PYBIND11_TEST_PREFIX_COMMAND} ${PYTHON_EXECUTABLE} -m pytest
COMMAND ${PYBIND11_TEST_PREFIX_COMMAND} ${PYTHON_EXECUTABLE} -X faulthandler -m pytest
${PYBIND11_ABS_PYTEST_FILES} ${PYBIND11_PYTEST_ARGS}
DEPENDS ${test_targets}
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
Expand Down
14 changes: 14 additions & 0 deletions tests/pybind11_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ const char *cpp_std() {
#endif
}

int cpp_std_num() {
return
#if defined(PYBIND11_CPP20)
20;
#elif defined(PYBIND11_CPP17)
17;
#elif defined(PYBIND11_CPP14)
14;
#else
11;
#endif
}

PYBIND11_MODULE(pybind11_tests, m, py::mod_gil_not_used()) {
m.doc() = "pybind11 test module";

Expand All @@ -88,6 +101,7 @@ PYBIND11_MODULE(pybind11_tests, m, py::mod_gil_not_used()) {
m.attr("compiler_info") = py::none();
#endif
m.attr("cpp_std") = cpp_std();
m.attr("cpp_std_num") = cpp_std_num();
m.attr("PYBIND11_INTERNALS_ID") = PYBIND11_INTERNALS_ID;
// Free threaded Python uses UINT32_MAX for immortal objects.
m.attr("PYBIND11_SIMPLE_GIL_MANAGEMENT") =
Expand Down
145 changes: 145 additions & 0 deletions tests/test_invalid_holder_access.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#include "pybind11_tests.h"

#include <Python.h>
#include <memory>
#include <vector>

class VecOwnsObjs {
public:
void append(const py::object &obj) { vec.emplace_back(obj); }

void set_item(py::ssize_t i, const py::object &obj) {
if (!(i >= 0 && i < size())) {
throw std::out_of_range("Index out of range");
}
vec[py::size_t(i)] = obj;
}

py::object get_item(py::ssize_t i) const {
if (!(i >= 0 && i < size())) {
throw std::out_of_range("Index out of range");
}
return vec[py::size_t(i)];
}

py::ssize_t size() const { return py::ssize_t_cast(vec.size()); }

bool is_empty() const { return vec.empty(); }

static int tp_traverse(PyObject *self_base, visitproc visit, void *arg) {
// https://docs.python.org/3/c-api/typeobj.html#c.PyTypeObject.tp_traverse
#if PY_VERSION_HEX >= 0x03090000
Py_VISIT(Py_TYPE(self_base));
#endif

if (should_check_holder_initialization && !py::detail::is_holder_constructed(self_base)) {
// The holder has not been constructed yet. Skip the traversal to avoid
// segmentation faults.
return 0;
}

// The actual logic of the tp_traverse function goes here.
auto &self = py::cast<VecOwnsObjs &>(py::handle{self_base});
for (const auto &obj : self.vec) {
Py_VISIT(obj.ptr());
}
return 0;
}

static int tp_clear(PyObject *self_base) {
if (should_check_holder_initialization && !py::detail::is_holder_constructed(self_base)) {
// The holder has not been constructed yet. Skip the traversal to avoid
// segmentation faults.
return 0;
}

// The actual logic of the tp_clear function goes here.
auto &self = py::cast<VecOwnsObjs &>(py::handle{self_base});
for (auto &obj : self.vec) {
Py_CLEAR(obj.ptr());
}
self.vec.clear();
return 0;
}

py::object get_state() const {
py::list state{};
for (const auto &item : vec) {
state.append(item);
}
return py::tuple(state);
}

static bool get_should_check_holder_initialization() {
return should_check_holder_initialization;
}

static void set_should_check_holder_initialization(bool value) {
should_check_holder_initialization = value;
}

static bool get_should_raise_error_on_set_state() { return should_raise_error_on_set_state; }

static void set_should_raise_error_on_set_state(bool value) {
should_raise_error_on_set_state = value;
}

static bool should_check_holder_initialization;
static bool should_raise_error_on_set_state;

private:
std::vector<py::object> vec{};
};

bool VecOwnsObjs::should_check_holder_initialization = false;
bool VecOwnsObjs::should_raise_error_on_set_state = false;

TEST_SUBMODULE(invalid_holder_access, m) {
m.doc() = "Test invalid holder access";

#if defined(PYBIND11_CPP14)
m.def("create_vector", [](const py::iterable &iterable) -> std::unique_ptr<VecOwnsObjs> {
auto vec = std::make_unique<VecOwnsObjs>();
for (const auto &item : iterable) {
vec->append(py::reinterpret_borrow<py::object>(item));
}
return vec;
});
#endif

py::class_<VecOwnsObjs>(
m, "VecOwnsObjs", py::custom_type_setup([](PyHeapTypeObject *heap_type) -> void {
auto *const type = &heap_type->ht_type;
type->tp_flags |= Py_TPFLAGS_HAVE_GC;
type->tp_traverse = &VecOwnsObjs::tp_traverse;
type->tp_clear = &VecOwnsObjs::tp_clear;
}))
.def_static("set_should_check_holder_initialization",
&VecOwnsObjs::set_should_check_holder_initialization,
py::arg("value"))
.def_static("set_should_raise_error_on_set_state",
&VecOwnsObjs::set_should_raise_error_on_set_state,
py::arg("value"))
#if defined(PYBIND11_CPP14)
.def(py::pickle([](const VecOwnsObjs &self) -> py::object { return self.get_state(); },
[](const py::object &state) -> std::unique_ptr<VecOwnsObjs> {
if (!py::isinstance<py::tuple>(state)) {
throw std::runtime_error("Invalid state");
}
auto vec = std::make_unique<VecOwnsObjs>();
if (VecOwnsObjs::get_should_raise_error_on_set_state()) {
throw std::runtime_error("raise error on set_state for testing");
}
for (const auto &item : state) {
vec->append(py::reinterpret_borrow<py::object>(item));
}
return vec;
}),
py::arg("state"))
#endif
.def("append", &VecOwnsObjs::append, py::arg("obj"))
.def("is_empty", &VecOwnsObjs::is_empty)
.def("__setitem__", &VecOwnsObjs::set_item, py::arg("i"), py::arg("obj"))
.def("__getitem__", &VecOwnsObjs::get_item, py::arg("i"))
.def("__len__", &VecOwnsObjs::size);
}
189 changes: 189 additions & 0 deletions tests/test_invalid_holder_access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
from __future__ import annotations

import gc
import multiprocessing
import pickle
import signal
import sys
import weakref

import pytest

import env # noqa: F401
import pybind11_tests
from pybind11_tests import invalid_holder_access as m

XFAIL_REASON = "Known issues: https://github.com/pybind/pybind11/pull/5654"


try:
import multiprocessing

spawn_context = multiprocessing.get_context("spawn")
except (ImportError, ValueError):
spawn_context = None


def check_segfault(target):
"""Check if the target function causes a segmentation fault.

The check should be done in a separate process to avoid crashing the main
process with the segfault.
"""
process = spawn_context.Process(target=target)
process.start()
process.join()
rc = abs(process.exitcode)
if 128 < rc < 256:
rc -= 128
assert rc in (
0,
signal.SIGSEGV,
signal.SIGABRT,
0xC0000005, # STATUS_ACCESS_VIOLATION on Windows
)
if rc != 0:
raise SystemError(
"Segmentation Fault: The C++ compiler initializes container incorrectly."
)


def test_no_init():
with pytest.raises(TypeError, match=r"No constructor defined"):
m.VecOwnsObjs()
vec = m.VecOwnsObjs.__new__(m.VecOwnsObjs)
with pytest.raises(TypeError, match=r"No constructor defined"):
vec.__init__()


def manual_new_target():
# Repeatedly trigger allocation without initialization (raw malloc'ed) to
# detect uninitialized memory bugs.
for _ in range(32):
# The holder is a pointer variable while the C++ ctor is not called.
vec = m.VecOwnsObjs.__new__(m.VecOwnsObjs)
if vec.is_empty(): # <= this line could cause a segfault
# The C++ compiler initializes container correctly.
assert len(vec) == 0
else:
# The program is not supposed to reach here. It will abort with
# SIGSEGV on `vec.is_empty()`.
sys.exit(signal.SIGSEGV) # manually trigger SIGSEGV if not raised


# This test might succeed on some platforms with some compilers, but it is not
# guaranteed to work everywhere. It is marked as non-strict xfail.
@pytest.mark.xfail(reason=XFAIL_REASON, raises=SystemError, strict=False)
@pytest.mark.skipif(spawn_context is None, reason="spawn context not available")
@pytest.mark.skipif(
sys.platform.startswith("emscripten"),
reason="Requires multiprocessing",
)
def test_manual_new():
"""
Manually calling the constructor (__new__) of a class that has C++ STL
container will allocate memory without initializing it can cause a
segmentation fault.
"""
check_segfault(manual_new_target)


@pytest.mark.skipif(
pybind11_tests.cpp_std_num < 14,
reason="std::{unique_ptr,make_unique} not available in C++11",
)
def test_set_state_with_error_no_segfault_if_gc_checks_holder_has_initialized():
"""
The ``tp_traverse`` and ``tp_clear`` functions should check if the holder
has been initialized before traversing or clearing the C++ STL container.
"""
m.VecOwnsObjs.set_should_check_holder_initialization(True)

vec = m.create_vector((1, 2, 3, 4))

m.VecOwnsObjs.set_should_raise_error_on_set_state(False)
pickle.loads(pickle.dumps(vec))

# During the unpickling process, Python firstly allocates the object with
# the `__new__` method and then calls the `__setstate__` method to set the
# state of the object.
#
# obj = cls.__new__(cls)
# obj.__setstate__(state)
#
# The `__init__` method is not called during unpickling.
# So, if the `__setstate__` method raises an error, the object will be in
# an uninitialized state.
# If `tp_traverse` and `tp_clear` functions check if the holder has been
# initialized, the functions can exit early and do not cause segfault on GC.
m.VecOwnsObjs.set_should_raise_error_on_set_state(True)
serialized = pickle.dumps(vec)
with pytest.raises(
RuntimeError,
match="raise error on set_state for testing",
):
pickle.loads(serialized)


def unpicklable_with_error_target():
m.VecOwnsObjs.set_should_check_holder_initialization(False)
m.VecOwnsObjs.set_should_raise_error_on_set_state(True)

vec = m.create_vector((1, 2, 3, 4))
serialized = pickle.dumps(vec)

# Repeatedly trigger allocation without initialization (raw malloc'ed) to
# detect uninitialized memory bugs.
for _ in range(32):
# During the unpickling process, Python firstly allocates the object with
# the `__new__` method and then calls the `__setstate__` method to set the
# state of the object.
#
# obj = cls.__new__(cls)
# obj.__setstate__(state)
#
# The `__init__` method is not called during unpickling.
# So, if the `__setstate__` method raises an error, the object will be in
# an uninitialized state. The GC will traverse the uninitialized C++ STL
# container and cause a segmentation fault.
try: # noqa: SIM105
pickle.loads(serialized)
except RuntimeError:
pass


# This test might succeed on some platforms with some compilers, but it is not
# guaranteed to work everywhere. It is marked as non-strict xfail.
@pytest.mark.xfail(reason=XFAIL_REASON, raises=SystemError, strict=False)
@pytest.mark.skipif(spawn_context is None, reason="spawn context not available")
@pytest.mark.skipif(
pybind11_tests.cpp_std_num < 14,
reason="std::{unique_ptr,make_unique} not available in C++11",
)
def test_set_state_with_error_will_segfault_if_gc_does_not_check_holder_has_initialized():
m.VecOwnsObjs.set_should_check_holder_initialization(False)

vec = m.create_vector((1, 2, 3, 4))

m.VecOwnsObjs.set_should_raise_error_on_set_state(False)
pickle.loads(pickle.dumps(vec))

# See comments above.
check_segfault(unpicklable_with_error_target)


@pytest.mark.skipif("env.PYPY or env.GRAALPY", reason="Cannot reliably trigger GC")
@pytest.mark.skipif(
pybind11_tests.cpp_std_num < 14,
reason="std::{unique_ptr,make_unique} not available in C++11",
)
def test_gc():
vec = m.create_vector(())
vec.append((vec, vec))

wr = weakref.ref(vec)
assert wr() is vec
del vec
for _ in range(10):
gc.collect()
assert wr() is None
Loading