Skip to content

Fix undefined memoryview format #2223

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jul 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ ALIASES += "endrst=\endverbatim"
QUIET = YES
WARNINGS = YES
WARN_IF_UNDOCUMENTED = NO
PREDEFINED = DOXYGEN_SHOULD_SKIP_THIS \
PY_MAJOR_VERSION=3
42 changes: 42 additions & 0 deletions docs/advanced/pycpp/numpy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -384,3 +384,45 @@ operation on the C++ side:

py::array a = /* A NumPy array */;
py::array b = a[py::make_tuple(0, py::ellipsis(), 0)];

Memory view
===========

For a case when we simply want to provide a direct accessor to C/C++ buffer
without a concrete class object, we can return a ``memoryview`` object. Suppose
we wish to expose a ``memoryview`` for 2x4 uint8_t array, we can do the
following:

.. code-block:: cpp

const uint8_t buffer[] = {
0, 1, 2, 3,
4, 5, 6, 7
};
m.def("get_memoryview2d", []() {
return py::memoryview::from_buffer(
buffer, // buffer pointer
{ 2, 4 }, // shape (rows, cols)
{ sizeof(uint8_t) * 4, sizeof(uint8_t) } // strides in bytes
);
})

This approach is meant for providing a ``memoryview`` for a C/C++ buffer not
managed by Python. The user is responsible for managing the lifetime of the
buffer. Using a ``memoryview`` created in this way after deleting the buffer in
C++ side results in undefined behavior.

We can also use ``memoryview::from_memory`` for a simple 1D contiguous buffer:

.. code-block:: cpp

m.def("get_memoryview1d", []() {
return py::memoryview::from_memory(
buffer, // buffer pointer
sizeof(uint8_t) * 8 // buffer size
);
})

.. note::

``memoryview::from_memory`` is not available in Python 2.
10 changes: 6 additions & 4 deletions include/pybind11/buffer_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ struct buffer_info {
explicit buffer_info(Py_buffer *view, bool ownview = true)
: buffer_info(view->buf, view->itemsize, view->format, view->ndim,
{view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}, view->readonly) {
this->view = view;
this->m_view = view;
this->ownview = ownview;
}

Expand All @@ -73,24 +73,26 @@ struct buffer_info {
ndim = rhs.ndim;
shape = std::move(rhs.shape);
strides = std::move(rhs.strides);
std::swap(view, rhs.view);
std::swap(m_view, rhs.m_view);
std::swap(ownview, rhs.ownview);
readonly = rhs.readonly;
return *this;
}

~buffer_info() {
if (view && ownview) { PyBuffer_Release(view); delete view; }
if (m_view && ownview) { PyBuffer_Release(m_view); delete m_view; }
}

Py_buffer *view() const { return m_view; }
Py_buffer *&view() { return m_view; }
private:
struct private_ctr_tag { };

buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
detail::any_container<ssize_t> &&shape_in, detail::any_container<ssize_t> &&strides_in, bool readonly)
: buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) { }

Py_buffer *view = nullptr;
Py_buffer *m_view = nullptr;
bool ownview = false;
};

Expand Down
150 changes: 127 additions & 23 deletions include/pybind11/pytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1331,35 +1331,139 @@ class buffer : public object {

class memoryview : public object {
public:
explicit memoryview(const buffer_info& info) {
static Py_buffer buf { };
// Py_buffer uses signed sizes, strides and shape!..
static std::vector<Py_ssize_t> py_strides { };
static std::vector<Py_ssize_t> py_shape { };
buf.buf = info.ptr;
buf.itemsize = info.itemsize;
buf.format = const_cast<char *>(info.format.c_str());
buf.ndim = (int) info.ndim;
buf.len = info.size;
py_strides.clear();
py_shape.clear();
for (size_t i = 0; i < (size_t) info.ndim; ++i) {
py_strides.push_back(info.strides[i]);
py_shape.push_back(info.shape[i]);
}
buf.strides = py_strides.data();
buf.shape = py_shape.data();
buf.suboffsets = nullptr;
buf.readonly = info.readonly;
buf.internal = nullptr;
PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject)

m_ptr = PyMemoryView_FromBuffer(&buf);
/** \rst
Creates ``memoryview`` from ``buffer_info``.

``buffer_info`` must be created from ``buffer::request()``. Otherwise
throws an exception.

For creating a ``memoryview`` from objects that support buffer protocol,
use ``memoryview(const object& obj)`` instead of this constructor.
\endrst */
explicit memoryview(const buffer_info& info) {
if (!info.view())
pybind11_fail("Prohibited to create memoryview without Py_buffer");
// Note: PyMemoryView_FromBuffer never increments obj reference.
m_ptr = (info.view()->obj) ?
PyMemoryView_FromObject(info.view()->obj) :
PyMemoryView_FromBuffer(info.view());
if (!m_ptr)
pybind11_fail("Unable to create memoryview from buffer descriptor");
}

PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject)
/** \rst
Creates ``memoryview`` from static buffer.

This method is meant for providing a ``memoryview`` for C/C++ buffer not
managed by Python. The caller is responsible for managing the lifetime
of ``ptr`` and ``format``, which MUST outlive the memoryview constructed
here.

See also: Python C API documentation for `PyMemoryView_FromBuffer`_.

.. _PyMemoryView_FromBuffer: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromBuffer

:param ptr: Pointer to the buffer.
:param itemsize: Byte size of an element.
:param format: Pointer to the null-terminated format string. For
homogeneous Buffers, this should be set to
``format_descriptor<T>::value``.
:param shape: Shape of the tensor (1 entry per dimension).
:param strides: Number of bytes between adjacent entries (for each
per dimension).
:param readonly: Flag to indicate if the underlying storage may be
written to.
\endrst */
static memoryview from_buffer(
void *ptr, ssize_t itemsize, const char *format,
detail::any_container<ssize_t> shape,
detail::any_container<ssize_t> strides, bool readonly = false);

static memoryview from_buffer(
const void *ptr, ssize_t itemsize, const char *format,
detail::any_container<ssize_t> shape,
detail::any_container<ssize_t> strides) {
return memoryview::from_buffer(
const_cast<void*>(ptr), itemsize, format, shape, strides, true);
}

template<typename T>
static memoryview from_buffer(
T *ptr, detail::any_container<ssize_t> shape,
detail::any_container<ssize_t> strides, bool readonly = false) {
return memoryview::from_buffer(
reinterpret_cast<void*>(ptr), sizeof(T),
format_descriptor<T>::value, shape, strides, readonly);
}

template<typename T>
static memoryview from_buffer(
const T *ptr, detail::any_container<ssize_t> shape,
detail::any_container<ssize_t> strides) {
return memoryview::from_buffer(
const_cast<T*>(ptr), shape, strides, true);
}

#if PY_MAJOR_VERSION >= 3
/** \rst
Creates ``memoryview`` from static memory.

This method is meant for providing a ``memoryview`` for C/C++ buffer not
managed by Python. The caller is responsible for managing the lifetime
of ``mem``, which MUST outlive the memoryview constructed here.

This method is not available in Python 2.

See also: Python C API documentation for `PyMemoryView_FromBuffer`_.

.. _PyMemoryView_FromMemory: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromMemory
\endrst */
static memoryview from_memory(void *mem, ssize_t size, bool readonly = false) {
PyObject* ptr = PyMemoryView_FromMemory(
reinterpret_cast<char*>(mem), size,
(readonly) ? PyBUF_READ : PyBUF_WRITE);
if (!ptr)
pybind11_fail("Could not allocate memoryview object!");
return memoryview(object(ptr, stolen_t{}));
}

static memoryview from_memory(const void *mem, ssize_t size) {
return memoryview::from_memory(const_cast<void*>(mem), size, true);
}
#endif
};

#ifndef DOXYGEN_SHOULD_SKIP_THIS
inline memoryview memoryview::from_buffer(
void *ptr, ssize_t itemsize, const char* format,
detail::any_container<ssize_t> shape,
detail::any_container<ssize_t> strides, bool readonly) {
size_t ndim = shape->size();
if (ndim != strides->size())
pybind11_fail("memoryview: shape length doesn't match strides length");
ssize_t size = ndim ? 1 : 0;
for (size_t i = 0; i < ndim; ++i)
size *= (*shape)[i];
Py_buffer view;
view.buf = ptr;
view.obj = nullptr;
view.len = size * itemsize;
view.readonly = static_cast<int>(readonly);
view.itemsize = itemsize;
view.format = const_cast<char*>(format);
view.ndim = static_cast<int>(ndim);
view.shape = shape->data();
view.strides = strides->data();
view.suboffsets = nullptr;
view.internal = nullptr;
PyObject* obj = PyMemoryView_FromBuffer(&view);
if (!obj)
throw error_already_set();
return memoryview(object(obj, stolen_t{}));
}
#endif // DOXYGEN_SHOULD_SKIP_THIS
/// @} pytypes

/// \addtogroup python_builtins
Expand Down
49 changes: 49 additions & 0 deletions tests/test_pytypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,4 +307,53 @@ TEST_SUBMODULE(pytypes, m) {
m.def("test_list_slicing", [](py::list a) {
return a[py::slice(0, -1, 2)];
});

m.def("test_memoryview_object", [](py::buffer b) {
return py::memoryview(b);
});

m.def("test_memoryview_buffer_info", [](py::buffer b) {
return py::memoryview(b.request());
});

m.def("test_memoryview_from_buffer", [](bool is_unsigned) {
static const int16_t si16[] = { 3, 1, 4, 1, 5 };
static const uint16_t ui16[] = { 2, 7, 1, 8 };
if (is_unsigned)
return py::memoryview::from_buffer(
ui16, { 4 }, { sizeof(uint16_t) });
else
return py::memoryview::from_buffer(
si16, { 5 }, { sizeof(int16_t) });
});

m.def("test_memoryview_from_buffer_nativeformat", []() {
static const char* format = "@i";
static const int32_t arr[] = { 4, 7, 5 };
return py::memoryview::from_buffer(
arr, sizeof(int32_t), format, { 3 }, { sizeof(int32_t) });
});

m.def("test_memoryview_from_buffer_empty_shape", []() {
static const char* buf = "";
return py::memoryview::from_buffer(buf, 1, "B", { }, { });
});

m.def("test_memoryview_from_buffer_invalid_strides", []() {
static const char* buf = "\x02\x03\x04";
return py::memoryview::from_buffer(buf, 1, "B", { 3 }, { });
});

m.def("test_memoryview_from_buffer_nullptr", []() {
return py::memoryview::from_buffer(
static_cast<void*>(nullptr), 1, "B", { }, { });
});

#if PY_MAJOR_VERSION >= 3
m.def("test_memoryview_from_memory", []() {
const char* buf = "\xff\xe1\xab\x37";
return py::memoryview::from_memory(
buf, static_cast<ssize_t>(strlen(buf)));
});
#endif
}
67 changes: 67 additions & 0 deletions tests/test_pytypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,70 @@ def test_number_protocol():
def test_list_slicing():
li = list(range(100))
assert li[::2] == m.test_list_slicing(li)


@pytest.mark.parametrize('method, args, fmt, expected_view', [
(m.test_memoryview_object, (b'red',), 'B', b'red'),
(m.test_memoryview_buffer_info, (b'green',), 'B', b'green'),
(m.test_memoryview_from_buffer, (False,), 'h', [3, 1, 4, 1, 5]),
(m.test_memoryview_from_buffer, (True,), 'H', [2, 7, 1, 8]),
(m.test_memoryview_from_buffer_nativeformat, (), '@i', [4, 7, 5]),
])
def test_memoryview(method, args, fmt, expected_view):
view = method(*args)
assert isinstance(view, memoryview)
assert view.format == fmt
if isinstance(expected_view, bytes) or sys.version_info[0] >= 3:
view_as_list = list(view)
else:
# Using max to pick non-zero byte (big-endian vs little-endian).
view_as_list = [max([ord(c) for c in s]) for s in view]
assert view_as_list == list(expected_view)


@pytest.mark.skipif(
not hasattr(sys, 'getrefcount'),
reason='getrefcount is not available')
@pytest.mark.parametrize('method', [
m.test_memoryview_object,
m.test_memoryview_buffer_info,
])
def test_memoryview_refcount(method):
buf = b'\x0a\x0b\x0c\x0d'
ref_before = sys.getrefcount(buf)
view = method(buf)
ref_after = sys.getrefcount(buf)
assert ref_before < ref_after
assert list(view) == list(buf)


def test_memoryview_from_buffer_empty_shape():
view = m.test_memoryview_from_buffer_empty_shape()
assert isinstance(view, memoryview)
assert view.format == 'B'
if sys.version_info.major < 3:
# Python 2 behavior is weird, but Python 3 (the future) is fine.
assert bytes(view).startswith(b'<memory at ')
else:
assert bytes(view) == b''


def test_test_memoryview_from_buffer_invalid_strides():
with pytest.raises(RuntimeError):
m.test_memoryview_from_buffer_invalid_strides()


def test_test_memoryview_from_buffer_nullptr():
if sys.version_info.major < 3:
m.test_memoryview_from_buffer_nullptr()
else:
with pytest.raises(ValueError):
m.test_memoryview_from_buffer_nullptr()


@pytest.mark.skipif(sys.version_info.major < 3, reason='API not available')
def test_memoryview_from_memory():
view = m.test_memoryview_from_memory()
assert isinstance(view, memoryview)
assert view.format == 'B'
assert bytes(view) == b'\xff\xe1\xab\x37'