Skip to content

Commit 7064d43

Browse files
wangxf123456pre-commit-ci[bot]rwgk
authored
[smart_holder] Add a new return value policy return_as_bytes (#3838)
* Add return_as_bytes policy * Fix format * Fix test failures * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix std::variant * Resolve comments * Note this policy experimental * Add tests for `return_as_bytes` with `def_property`. * Change comment for the new return_as_bytes enum to note that the policy is not available on master. * Applying pr3838_sh.patch (exactly as used Google-internally since 2022-03-31). * Add `case return_as_bytes` to `switch`es in detail/type_caster_base.h and eigen.h Based on systematic review under #3838 (comment) * Add missing break (clang-tidy). * More clang-tidy fixes (this time around clang-tidy was run interactively to pre-empt repeat trips through the CI). * Underscore prefix: _return_as_bytes Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ralf W. Grosse-Kunstleve <[email protected]>
1 parent 8532780 commit 7064d43

9 files changed

+107
-4
lines changed

include/pybind11/cast.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -447,11 +447,15 @@ struct string_caster {
447447
return true;
448448
}
449449

450-
static handle
451-
cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) {
450+
static handle cast(const StringType &src, return_value_policy policy, handle /* parent */) {
452451
const char *buffer = reinterpret_cast<const char *>(src.data());
453452
auto nbytes = ssize_t(src.size() * sizeof(CharT));
454-
handle s = decode_utfN(buffer, nbytes);
453+
handle s;
454+
if (policy == return_value_policy::_return_as_bytes) {
455+
s = PyBytes_FromStringAndSize(buffer, nbytes);
456+
} else {
457+
s = decode_utfN(buffer, nbytes);
458+
}
455459
if (!s) {
456460
throw error_already_set();
457461
}

include/pybind11/detail/common.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,19 @@ enum class return_value_policy : uint8_t {
459459
collected while Python is still using the child. More advanced
460460
variations of this scheme are also possible using combinations of
461461
return_value_policy::reference and the keep_alive call policy */
462-
reference_internal
462+
reference_internal,
463+
464+
/** With this policy, C++ string types are converted to Python bytes,
465+
instead of str. This is most useful when a C++ function returns a
466+
container-like type with nested C++ string types, and `py::bytes` cannot
467+
be applied easily. Dictionary like types might not work, for example,
468+
`Dict[str, bytes]`, because this policy forces all string return values
469+
to be converted to bytes. Note that this return_value_policy is not
470+
concerned with lifetime/ownership semantics, like the other policies,
471+
but the purpose of _return_as_bytes is certain to be orthogonal, because
472+
C++ strings are always copied to Python `bytes` or `str`.
473+
NOTE: This policy is NOT available on master. */
474+
_return_as_bytes
463475
};
464476

465477
PYBIND11_NAMESPACE_BEGIN(detail)

include/pybind11/detail/smart_holder_type_casters.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,12 +783,15 @@ struct smart_holder_type_caster<std::shared_ptr<T>> : smart_holder_type_caster_l
783783
break;
784784
case return_value_policy::take_ownership:
785785
throw cast_error("Invalid return_value_policy for shared_ptr (take_ownership).");
786+
break;
786787
case return_value_policy::copy:
787788
case return_value_policy::move:
788789
break;
789790
case return_value_policy::reference:
790791
throw cast_error("Invalid return_value_policy for shared_ptr (reference).");
792+
break;
791793
case return_value_policy::reference_internal:
794+
case return_value_policy::_return_as_bytes:
792795
break;
793796
}
794797
if (!src) {

include/pybind11/detail/type_caster_base.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,10 @@ class type_caster_generic {
651651
keep_alive_impl(inst, parent);
652652
break;
653653

654+
case return_value_policy::_return_as_bytes:
655+
pybind11_fail("return_value_policy::_return_as_bytes does not apply.");
656+
break;
657+
654658
default:
655659
throw cast_error("unhandled return_value_policy: should not happen!");
656660
}

include/pybind11/eigen.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,9 @@ struct type_caster<Type, enable_if_t<is_eigen_dense_plain<Type>::value>> {
349349
return eigen_ref_array<props>(*src);
350350
case return_value_policy::reference_internal:
351351
return eigen_ref_array<props>(*src, parent);
352+
case return_value_policy::_return_as_bytes:
353+
pybind11_fail("return_value_policy::_return_as_bytes does not apply.");
354+
break;
352355
default:
353356
throw cast_error("unhandled return_value_policy: should not happen!");
354357
};

tests/test_builtin_casters.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,17 @@
1111

1212
#include "pybind11_tests.h"
1313

14+
#include <utility>
15+
1416
struct ConstRefCasted {
1517
int tag;
1618
};
1719

20+
struct StringAttr {
21+
explicit StringAttr(std::string v) : value(std::move(v)) {}
22+
std::string value;
23+
};
24+
1825
PYBIND11_NAMESPACE_BEGIN(pybind11)
1926
PYBIND11_NAMESPACE_BEGIN(detail)
2027
template <>
@@ -378,4 +385,28 @@ TEST_SUBMODULE(builtin_casters, m) {
378385
m.def("takes_const_ref", [](const ConstRefCasted &x) { return x.tag; });
379386
m.def("takes_const_ref_wrap",
380387
[](std::reference_wrapper<const ConstRefCasted> x) { return x.get().tag; });
388+
389+
// test return_value_policy::_return_as_bytes
390+
m.def(
391+
"invalid_utf8_string_as_bytes",
392+
[]() { return std::string("\xba\xd0\xba\xd0"); },
393+
py::return_value_policy::_return_as_bytes);
394+
m.def("invalid_utf8_string_as_str", []() { return std::string("\xba\xd0\xba\xd0"); });
395+
m.def(
396+
"invalid_utf8_char_array_as_bytes",
397+
[]() { return "\xba\xd0\xba\xd0"; },
398+
py::return_value_policy::_return_as_bytes);
399+
py::class_<StringAttr>(m, "StringAttr")
400+
.def(py::init<std::string>())
401+
.def_property(
402+
"value",
403+
py::cpp_function([](StringAttr &self) { return self.value; },
404+
py::return_value_policy::_return_as_bytes),
405+
py::cpp_function([](StringAttr &self, std::string v) { self.value = std::move(v); }));
406+
#ifdef PYBIND11_HAS_STRING_VIEW
407+
m.def(
408+
"invalid_utf8_string_view_as_bytes",
409+
[]() { return std::string_view("\xba\xd0\xba\xd0"); },
410+
py::return_value_policy::_return_as_bytes);
411+
#endif
381412
}

tests/test_builtin_casters.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,3 +524,17 @@ def test_const_ref_caster():
524524
assert m.takes_const_ptr(x) == 5
525525
assert m.takes_const_ref(x) == 4
526526
assert m.takes_const_ref_wrap(x) == 4
527+
528+
529+
def test_return_as_bytes_policy():
530+
expected_return_value = b"\xba\xd0\xba\xd0"
531+
assert m.invalid_utf8_string_as_bytes() == expected_return_value
532+
with pytest.raises(UnicodeDecodeError):
533+
m.invalid_utf8_string_as_str()
534+
assert m.invalid_utf8_char_array_as_bytes() == expected_return_value
535+
obj = m.StringAttr(expected_return_value)
536+
assert obj.value == expected_return_value
537+
obj.value = "123"
538+
assert obj.value == b"123"
539+
if hasattr(m, "has_string_view"):
540+
assert m.invalid_utf8_string_view_as_bytes() == expected_return_value

tests/test_stl.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,4 +541,25 @@ TEST_SUBMODULE(stl, m) {
541541
[]() { return new std::vector<bool>(4513); },
542542
// Without explicitly specifying `take_ownership`, this function leaks.
543543
py::return_value_policy::take_ownership);
544+
545+
// test return_value_policy::_return_as_bytes
546+
m.def(
547+
"invalid_utf8_string_array_as_bytes",
548+
[]() { return std::array<std::string, 1>{{"\xba\xd0\xba\xd0"}}; },
549+
py::return_value_policy::_return_as_bytes);
550+
m.def("invalid_utf8_string_array_as_str",
551+
[]() { return std::array<std::string, 1>{{"\xba\xd0\xba\xd0"}}; });
552+
#ifdef PYBIND11_HAS_OPTIONAL
553+
m.def(
554+
"invalid_utf8_optional_string_as_bytes",
555+
[]() { return std::optional<std::string>{"\xba\xd0\xba\xd0"}; },
556+
py::return_value_policy::_return_as_bytes);
557+
#endif
558+
559+
#ifdef PYBIND11_TEST_VARIANT
560+
m.def(
561+
"invalid_utf8_variant_string_as_bytes",
562+
[]() { return variant<std::string, int>{"\xba\xd0\xba\xd0"}; },
563+
py::return_value_policy::_return_as_bytes);
564+
#endif
544565
}

tests/test_stl.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,3 +371,14 @@ def test_return_vector_bool_raw_ptr():
371371
v = m.return_vector_bool_raw_ptr()
372372
assert isinstance(v, list)
373373
assert len(v) == 4513
374+
375+
376+
def test_return_as_bytes_policy():
377+
expected_return_value = b"\xba\xd0\xba\xd0"
378+
assert m.invalid_utf8_string_array_as_bytes() == [expected_return_value]
379+
with pytest.raises(UnicodeDecodeError):
380+
m.invalid_utf8_string_array_as_str()
381+
if hasattr(m, "has_optional"):
382+
assert m.invalid_utf8_optional_string_as_bytes() == expected_return_value
383+
if hasattr(m, "load_variant"):
384+
assert m.invalid_utf8_variant_string_as_bytes() == expected_return_value

0 commit comments

Comments
 (0)