Skip to content

Commit b07223f

Browse files
authored
fix: improve bytes to str decoding error handling (pybind#4294)
* (bugfix): Improve bytes to str decoding error handling * regroup test * Further broaden tests * Add another decode error test * Fix bug in tests * Reviewer suggestions
1 parent fcb5554 commit b07223f

File tree

3 files changed

+28
-0
lines changed

3 files changed

+28
-0
lines changed

include/pybind11/pytypes.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,6 +1432,9 @@ class str : public object {
14321432
str(const char *c, const SzType &n)
14331433
: object(PyUnicode_FromStringAndSize(c, ssize_t_cast(n)), stolen_t{}) {
14341434
if (!m_ptr) {
1435+
if (PyErr_Occurred()) {
1436+
throw error_already_set();
1437+
}
14351438
pybind11_fail("Could not allocate string object!");
14361439
}
14371440
}
@@ -1441,6 +1444,9 @@ class str : public object {
14411444
// NOLINTNEXTLINE(google-explicit-constructor)
14421445
str(const char *c = "") : object(PyUnicode_FromString(c), stolen_t{}) {
14431446
if (!m_ptr) {
1447+
if (PyErr_Occurred()) {
1448+
throw error_already_set();
1449+
}
14441450
pybind11_fail("Could not allocate string object!");
14451451
}
14461452
}
@@ -1598,6 +1604,9 @@ inline str::str(const bytes &b) {
15981604
}
15991605
auto obj = reinterpret_steal<object>(PyUnicode_FromStringAndSize(buffer, length));
16001606
if (!obj) {
1607+
if (PyErr_Occurred()) {
1608+
throw error_already_set();
1609+
}
16011610
pybind11_fail("Could not allocate string object!");
16021611
}
16031612
m_ptr = obj.release().ptr();

tests/test_pytypes.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,12 @@ TEST_SUBMODULE(pytypes, m) {
206206
m.def("str_from_char_ssize_t", []() { return py::str{"red", (py::ssize_t) 3}; });
207207
m.def("str_from_char_size_t", []() { return py::str{"blue", (py::size_t) 4}; });
208208
m.def("str_from_string", []() { return py::str(std::string("baz")); });
209+
m.def("str_from_std_string_input", [](const std::string &stri) { return py::str(stri); });
210+
m.def("str_from_cstr_input", [](const char *c_str) { return py::str(c_str); });
209211
m.def("str_from_bytes", []() { return py::str(py::bytes("boo", 3)); });
212+
m.def("str_from_bytes_input",
213+
[](const py::bytes &encoded_str) { return py::str(encoded_str); });
214+
210215
m.def("str_from_object", [](const py::object &obj) { return py::str(obj); });
211216
m.def("repr_from_object", [](const py::object &obj) { return py::repr(obj); });
212217
m.def("str_from_handle", [](py::handle h) { return py::str(h); });

tests/test_pytypes.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,20 @@ def __repr__(self):
244244
m.str_from_string_from_str(ucs_surrogates_str)
245245

246246

247+
@pytest.mark.parametrize(
248+
"func",
249+
[
250+
m.str_from_bytes_input,
251+
m.str_from_cstr_input,
252+
m.str_from_std_string_input,
253+
],
254+
)
255+
def test_surrogate_pairs_unicode_error(func):
256+
input_str = "\ud83d\ude4f".encode("utf-8", "surrogatepass")
257+
with pytest.raises(UnicodeDecodeError):
258+
func(input_str)
259+
260+
247261
def test_bytes(doc):
248262
assert m.bytes_from_char_ssize_t().decode() == "green"
249263
assert m.bytes_from_char_size_t().decode() == "purple"

0 commit comments

Comments
 (0)