Skip to content

[WIP] gh-129813: Add PyBytesWriter C API #129814

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions Doc/c-api/bytes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,80 @@ called with a non-bytes parameter.
reallocation fails, the original bytes object at *\*bytes* is deallocated,
*\*bytes* is set to ``NULL``, :exc:`MemoryError` is set, and ``-1`` is
returned.

PyBytesWriter
^^^^^^^^^^^^^

.. versionadded:: next

.. c:type:: PyBytesWriter

A Python :class:`bytes` writer instance created by
:c:func:`PyBytesWriter_Create`.

The instance must be destroyed by :c:func:`PyBytesWriter_Finish` or
:c:func:`PyBytesWriter_Discard`.

.. c:function:: void* PyBytesWriter_Create(PyBytesWriter **writer, Py_ssize_t alloc)

Create a :c:type:`PyBytesWriter` to write *alloc* bytes.

If *alloc* is greater than zero, allocate *alloc* bytes for the returned
buffer.

On success, return non-``NULL`` buffer where bytes can be written.
On error, set an exception and return ``NULL``.

*alloc* must be positive or zero.

.. c:function:: void PyBytesWriter_Discard(PyBytesWriter *writer)

Discard a :c:type:`PyBytesWriter` created by :c:func:`PyBytesWriter_Create`.

The writer instance is invalid after the call.

.. c:function:: PyObject* PyBytesWriter_Finish(PyBytesWriter *writer, void *buf)

Finish a :c:type:`PyBytesWriter` created by :c:func:`PyBytesWriter_Create`.

On success, return a Python :class:`bytes` object.
On error, set an exception and return ``NULL``.

The writer instance is invalid after the call.

.. c:function:: void* PyBytesWriter_Extend(PyBytesWriter *writer, void *buf, Py_ssize_t extend)

Add *extend* bytes to the buffer: allocate *extend* bytes in addition to
bytes already allocated by previous :c:func:`PyBytesWriter_Create` and
:c:func:`PyBytesWriter_Extend` calls.

On success, return non-``NULL`` buffer where bytes can be written.
On error, set an exception and return ``NULL``.

*extend* must be positive or zero.

.. c:function:: void* PyBytesWriter_WriteBytes(PyBytesWriter *writer, void *buf, const char *bytes, Py_ssize_t size)

Extend the buffer by *size* bytes and write *bytes* into the writer.

If *size* is equal to ``-1``, call ``strlen(bytes)`` to get the
string length.

On success, return non-``NULL`` buffer.
On error, set an exception and return ``NULL``.

.. c:function:: void* PyBytesWriter_Format(PyBytesWriter *writer, void *buf, const char *format, ...)

Similar to ``PyBytes_FromFormat()``, but write the output directly
into the writer.

On success, return non-``NULL`` buffer.
On error, set an exception and return ``NULL``.

.. c:function:: Py_ssize_t PyBytesWriter_GetRemaining(PyBytesWriter *writer, void *buf)

Get the number of remaining bytes to write.

It is the difference between total allocated bytes (bytes allocated by
:c:func:`PyBytesWriter_Create` and :c:func:`PyBytesWriter_Extend`) and the
current position in the buffer.
32 changes: 32 additions & 0 deletions Include/cpython/bytesobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,35 @@ _PyBytes_Join(PyObject *sep, PyObject *iterable)
{
return PyBytes_Join(sep, iterable);
}


// --- PyBytesWriter API -----------------------------------------------------

typedef struct PyBytesWriter PyBytesWriter;

PyAPI_FUNC(void*) PyBytesWriter_Create(
PyBytesWriter **writer,
Py_ssize_t alloc);
PyAPI_FUNC(void) PyBytesWriter_Discard(
PyBytesWriter *writer);
PyAPI_FUNC(PyObject*) PyBytesWriter_Finish(
PyBytesWriter *writer,
void *buf);

PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetRemaining(
PyBytesWriter *writer,
void *buf);
PyAPI_FUNC(void*) PyBytesWriter_Extend(
PyBytesWriter *writer,
void *buf,
Py_ssize_t extend);
PyAPI_FUNC(void*) PyBytesWriter_WriteBytes(
PyBytesWriter *writer,
void *buf,
const void *bytes,
Py_ssize_t size);
PyAPI_FUNC(void*) PyBytesWriter_Format(
PyBytesWriter *writer,
void *buf,
const char *format,
...);
83 changes: 1 addition & 82 deletions Include/internal/pycore_bytesobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,88 +59,7 @@ PyAPI_FUNC(void)
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
const char* src, Py_ssize_t len_src);

/* --- _PyBytesWriter ----------------------------------------------------- */

/* The _PyBytesWriter structure is big: it contains an embedded "stack buffer".
A _PyBytesWriter variable must be declared at the end of variables in a
function to optimize the memory allocation on the stack. */
typedef struct {
/* bytes, bytearray or NULL (when the small buffer is used) */
PyObject *buffer;

/* Number of allocated size. */
Py_ssize_t allocated;

/* Minimum number of allocated bytes,
incremented by _PyBytesWriter_Prepare() */
Py_ssize_t min_size;

/* If non-zero, use a bytearray instead of a bytes object for buffer. */
int use_bytearray;

/* If non-zero, overallocate the buffer (default: 0).
This flag must be zero if use_bytearray is non-zero. */
int overallocate;

/* Stack buffer */
int use_small_buffer;
char small_buffer[512];
} _PyBytesWriter;

/* Initialize a bytes writer

By default, the overallocation is disabled. Set the overallocate attribute
to control the allocation of the buffer.

Export _PyBytesWriter API for '_pickle' shared extension. */
PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer);

/* Get the buffer content and reset the writer.
Return a bytes object, or a bytearray object if use_bytearray is non-zero.
Raise an exception and return NULL on error. */
PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer,
void *str);

/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void) _PyBytesWriter_Dealloc(_PyBytesWriter *writer);

/* Allocate the buffer to write size bytes.
Return the pointer to the beginning of buffer data.
Raise an exception and return NULL on error. */
PyAPI_FUNC(void*) _PyBytesWriter_Alloc(_PyBytesWriter *writer,
Py_ssize_t size);

/* Ensure that the buffer is large enough to write *size* bytes.
Add size to the writer minimum size (min_size attribute).

str is the current pointer inside the buffer.
Return the updated current pointer inside the buffer.
Raise an exception and return NULL on error. */
PyAPI_FUNC(void*) _PyBytesWriter_Prepare(_PyBytesWriter *writer,
void *str,
Py_ssize_t size);

/* Resize the buffer to make it larger.
The new buffer may be larger than size bytes because of overallocation.
Return the updated current pointer inside the buffer.
Raise an exception and return NULL on error.

Note: size must be greater than the number of allocated bytes in the writer.

This function doesn't use the writer minimum size (min_size attribute).

See also _PyBytesWriter_Prepare().
*/
PyAPI_FUNC(void*) _PyBytesWriter_Resize(_PyBytesWriter *writer,
void *str,
Py_ssize_t size);

/* Write bytes.
Raise an exception and return NULL on error. */
PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
void *str,
const void *bytes,
Py_ssize_t size);
extern char* _PyBytesWriter_Start(PyBytesWriter *writer);

#ifdef __cplusplus
}
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_freelist_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ extern "C" {
# define Py_futureiters_MAXFREELIST 255
# define Py_object_stack_chunks_MAXFREELIST 4
# define Py_unicode_writers_MAXFREELIST 1
# define Py_bytes_writers_MAXFREELIST 1
# define Py_pymethodobjects_MAXFREELIST 20

// A generic freelist of either PyObjects or other data structures.
Expand Down Expand Up @@ -53,6 +54,7 @@ struct _Py_freelists {
struct _Py_freelist futureiters;
struct _Py_freelist object_stack_chunks;
struct _Py_freelist unicode_writers;
struct _Py_freelist bytes_writers;
struct _Py_freelist pymethodobjects;
};

Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_long.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ extern int _PyLong_FormatWriter(
int alternate);

extern char* _PyLong_FormatBytesWriter(
_PyBytesWriter *writer,
PyBytesWriter *writer,
char *str,
PyObject *obj,
int base,
Expand Down
82 changes: 82 additions & 0 deletions Lib/test/test_capi/test_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,5 +290,87 @@ def test_join(self):
bytes_join(b'', NULL)


class PyBytesWriterTest(unittest.TestCase):
def create_writer(self, alloc, string=b''):
return _testcapi.PyBytesWriter(alloc, string)

def test_empty(self):
# Test PyBytesWriter_Create()
writer = self.create_writer(0)
self.assertEqual(writer.get_remaining(), 0)
self.assertEqual(writer.finish(), b'')

def test_abc(self):
# Test PyBytesWriter_Create()
writer = self.create_writer(3, b'abc')
self.assertEqual(writer.get_remaining(), 0)
self.assertEqual(writer.finish(), b'abc')

writer = self.create_writer(10, b'abc')
self.assertEqual(writer.get_remaining(), 7)
self.assertEqual(writer.finish(), b'abc')

def test_write_bytes(self):
# Test PyBytesWriter_WriteBytes()

writer = self.create_writer(0)
writer.write_bytes(b'Hello World!', -1)
self.assertEqual(writer.finish(), b'Hello World!')

writer = self.create_writer(0)
writer.write_bytes(b'Hello ', -1)
writer.write_bytes(b'World! <truncated>', 6)
self.assertEqual(writer.finish(), b'Hello World!')

def test_extend(self):
# Test PyBytesWriter_Extend()

writer = self.create_writer(0)
writer.extend(20, b'number=123456')
writer.extend(0, b'')
self.assertEqual(writer.get_remaining(), 7)
self.assertEqual(writer.finish(), b'number=123456')

writer = self.create_writer(0)
writer.extend(0, b'')
writer.extend(20, b'number=123456')
self.assertEqual(writer.get_remaining(), 7)
self.assertEqual(writer.finish(), b'number=123456')

writer = self.create_writer(0)
writer.extend(10, b'number=')
writer.extend(10, b'123456')
self.assertEqual(writer.get_remaining(), 7)
self.assertEqual(writer.finish(), b'number=123456')

writer = self.create_writer(0)
writer.extend(10, b'number=')
writer.extend(0, b'')
writer.extend(10, b'123456')
self.assertEqual(writer.get_remaining(), 7)
self.assertEqual(writer.finish(), b'number=123456')

writer = self.create_writer(0)
writer.extend(10, b'number')
writer.extend(10, b'=')
writer.extend(10, b'123')
writer.extend(10, b'456')
self.assertEqual(writer.get_remaining(), 27)
self.assertEqual(writer.finish(), b'number=123456')

def test_format(self):
# Test PyBytesWriter_Format()
writer = self.create_writer(0)
writer.format_i(123456)
self.assertEqual(writer.get_remaining(), 0)
self.assertEqual(writer.finish(), b'123456')

def test_example_center(self):
self.assertEqual(_testcapi.byteswriter_center(0, b'writer'),
b'writer')
self.assertEqual(_testcapi.byteswriter_center(3, b'writer'),
b' writer ')


if __name__ == "__main__":
unittest.main()
33 changes: 13 additions & 20 deletions Modules/_pickle.c
Original file line number Diff line number Diff line change
Expand Up @@ -2601,29 +2601,22 @@ save_picklebuffer(PickleState *st, PicklerObject *self, PyObject *obj)
static PyObject *
raw_unicode_escape(PyObject *obj)
{
char *p;
Py_ssize_t i, size;
const void *data;
int kind;
_PyBytesWriter writer;
Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
const void *data = PyUnicode_DATA(obj);
int kind = PyUnicode_KIND(obj);

_PyBytesWriter_Init(&writer);

size = PyUnicode_GET_LENGTH(obj);
data = PyUnicode_DATA(obj);
kind = PyUnicode_KIND(obj);

p = _PyBytesWriter_Alloc(&writer, size);
if (p == NULL)
goto error;
writer.overallocate = 1;
PyBytesWriter *writer;
char *p = PyBytesWriter_Create(&writer, size);
if (p == NULL) {
return NULL;
}

for (i=0; i < size; i++) {
for (Py_ssize_t i=0; i < size; i++) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
/* Map 32-bit characters to '\Uxxxxxxxx' */
if (ch >= 0x10000) {
/* -1: subtract 1 preallocated byte */
p = _PyBytesWriter_Prepare(&writer, p, 10-1);
p = PyBytesWriter_Extend(writer, p, 10-1);
if (p == NULL)
goto error;

Expand All @@ -2644,7 +2637,7 @@ raw_unicode_escape(PyObject *obj)
ch == 0x1a)
{
/* -1: subtract 1 preallocated byte */
p = _PyBytesWriter_Prepare(&writer, p, 6-1);
p = PyBytesWriter_Extend(writer, p, 6-1);
if (p == NULL)
goto error;

Expand All @@ -2660,10 +2653,10 @@ raw_unicode_escape(PyObject *obj)
*p++ = (char) ch;
}

return _PyBytesWriter_Finish(&writer, p);
return PyBytesWriter_Finish(writer, p);

error:
_PyBytesWriter_Dealloc(&writer);
PyBytesWriter_Discard(writer);
return NULL;
}

Expand Down
Loading
Loading