diff --git a/Doc/c-api/bytes.rst b/Doc/c-api/bytes.rst index d47beee68eaa33..26bc4e7d6e5e47 100644 --- a/Doc/c-api/bytes.rst +++ b/Doc/c-api/bytes.rst @@ -219,3 +219,80 @@ called with a non-bytes parameter. reallocation fails, the original bytes object at *\*bytes* is deallocated, *\*bytes* is set to ``NULL``, :exc:`MemoryError` is set, and ``-1`` is returned. + +PyBytesWriter +^^^^^^^^^^^^^ + +.. versionadded:: next + +.. c:type:: PyBytesWriter + + A Python :class:`bytes` writer instance created by + :c:func:`PyBytesWriter_Create`. + + The instance must be destroyed by :c:func:`PyBytesWriter_Finish` or + :c:func:`PyBytesWriter_Discard`. + +.. c:function:: void* PyBytesWriter_Create(PyBytesWriter **writer, Py_ssize_t alloc) + + Create a :c:type:`PyBytesWriter` to write *alloc* bytes. + + If *alloc* is greater than zero, allocate *alloc* bytes for the returned + buffer. + + On success, return non-``NULL`` buffer where bytes can be written. + On error, set an exception and return ``NULL``. + + *alloc* must be positive or zero. + +.. c:function:: void PyBytesWriter_Discard(PyBytesWriter *writer) + + Discard a :c:type:`PyBytesWriter` created by :c:func:`PyBytesWriter_Create`. + + The writer instance is invalid after the call. + +.. c:function:: PyObject* PyBytesWriter_Finish(PyBytesWriter *writer, void *buf) + + Finish a :c:type:`PyBytesWriter` created by :c:func:`PyBytesWriter_Create`. + + On success, return a Python :class:`bytes` object. + On error, set an exception and return ``NULL``. + + The writer instance is invalid after the call. + +.. c:function:: void* PyBytesWriter_Extend(PyBytesWriter *writer, void *buf, Py_ssize_t extend) + + Add *extend* bytes to the buffer: allocate *extend* bytes in addition to + bytes already allocated by previous :c:func:`PyBytesWriter_Create` and + :c:func:`PyBytesWriter_Extend` calls. + + On success, return non-``NULL`` buffer where bytes can be written. + On error, set an exception and return ``NULL``. + + *extend* must be positive or zero. + +.. c:function:: void* PyBytesWriter_WriteBytes(PyBytesWriter *writer, void *buf, const char *bytes, Py_ssize_t size) + + Extend the buffer by *size* bytes and write *bytes* into the writer. + + If *size* is equal to ``-1``, call ``strlen(bytes)`` to get the + string length. + + On success, return non-``NULL`` buffer. + On error, set an exception and return ``NULL``. + +.. c:function:: void* PyBytesWriter_Format(PyBytesWriter *writer, void *buf, const char *format, ...) + + Similar to ``PyBytes_FromFormat()``, but write the output directly + into the writer. + + On success, return non-``NULL`` buffer. + On error, set an exception and return ``NULL``. + +.. c:function:: Py_ssize_t PyBytesWriter_GetRemaining(PyBytesWriter *writer, void *buf) + + Get the number of remaining bytes to write. + + It is the difference between total allocated bytes (bytes allocated by + :c:func:`PyBytesWriter_Create` and :c:func:`PyBytesWriter_Extend`) and the + current position in the buffer. diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 71c133f173f157..985d22c7ff3338 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -40,3 +40,35 @@ _PyBytes_Join(PyObject *sep, PyObject *iterable) { return PyBytes_Join(sep, iterable); } + + +// --- PyBytesWriter API ----------------------------------------------------- + +typedef struct PyBytesWriter PyBytesWriter; + +PyAPI_FUNC(void*) PyBytesWriter_Create( + PyBytesWriter **writer, + Py_ssize_t alloc); +PyAPI_FUNC(void) PyBytesWriter_Discard( + PyBytesWriter *writer); +PyAPI_FUNC(PyObject*) PyBytesWriter_Finish( + PyBytesWriter *writer, + void *buf); + +PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetRemaining( + PyBytesWriter *writer, + void *buf); +PyAPI_FUNC(void*) PyBytesWriter_Extend( + PyBytesWriter *writer, + void *buf, + Py_ssize_t extend); +PyAPI_FUNC(void*) PyBytesWriter_WriteBytes( + PyBytesWriter *writer, + void *buf, + const void *bytes, + Py_ssize_t size); +PyAPI_FUNC(void*) PyBytesWriter_Format( + PyBytesWriter *writer, + void *buf, + const char *format, + ...); diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index 300e7f4896a39e..7e0317c6a2ed21 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -59,88 +59,7 @@ PyAPI_FUNC(void) _PyBytes_Repeat(char* dest, Py_ssize_t len_dest, const char* src, Py_ssize_t len_src); -/* --- _PyBytesWriter ----------------------------------------------------- */ - -/* The _PyBytesWriter structure is big: it contains an embedded "stack buffer". - A _PyBytesWriter variable must be declared at the end of variables in a - function to optimize the memory allocation on the stack. */ -typedef struct { - /* bytes, bytearray or NULL (when the small buffer is used) */ - PyObject *buffer; - - /* Number of allocated size. */ - Py_ssize_t allocated; - - /* Minimum number of allocated bytes, - incremented by _PyBytesWriter_Prepare() */ - Py_ssize_t min_size; - - /* If non-zero, use a bytearray instead of a bytes object for buffer. */ - int use_bytearray; - - /* If non-zero, overallocate the buffer (default: 0). - This flag must be zero if use_bytearray is non-zero. */ - int overallocate; - - /* Stack buffer */ - int use_small_buffer; - char small_buffer[512]; -} _PyBytesWriter; - -/* Initialize a bytes writer - - By default, the overallocation is disabled. Set the overallocate attribute - to control the allocation of the buffer. - - Export _PyBytesWriter API for '_pickle' shared extension. */ -PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer); - -/* Get the buffer content and reset the writer. - Return a bytes object, or a bytearray object if use_bytearray is non-zero. - Raise an exception and return NULL on error. */ -PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer, - void *str); - -/* Deallocate memory of a writer (clear its internal buffer). */ -PyAPI_FUNC(void) _PyBytesWriter_Dealloc(_PyBytesWriter *writer); - -/* Allocate the buffer to write size bytes. - Return the pointer to the beginning of buffer data. - Raise an exception and return NULL on error. */ -PyAPI_FUNC(void*) _PyBytesWriter_Alloc(_PyBytesWriter *writer, - Py_ssize_t size); - -/* Ensure that the buffer is large enough to write *size* bytes. - Add size to the writer minimum size (min_size attribute). - - str is the current pointer inside the buffer. - Return the updated current pointer inside the buffer. - Raise an exception and return NULL on error. */ -PyAPI_FUNC(void*) _PyBytesWriter_Prepare(_PyBytesWriter *writer, - void *str, - Py_ssize_t size); - -/* Resize the buffer to make it larger. - The new buffer may be larger than size bytes because of overallocation. - Return the updated current pointer inside the buffer. - Raise an exception and return NULL on error. - - Note: size must be greater than the number of allocated bytes in the writer. - - This function doesn't use the writer minimum size (min_size attribute). - - See also _PyBytesWriter_Prepare(). - */ -PyAPI_FUNC(void*) _PyBytesWriter_Resize(_PyBytesWriter *writer, - void *str, - Py_ssize_t size); - -/* Write bytes. - Raise an exception and return NULL on error. */ -PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, - void *str, - const void *bytes, - Py_ssize_t size); +extern char* _PyBytesWriter_Start(PyBytesWriter *writer); #ifdef __cplusplus } diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 7c252f5b570c13..50c8e04c761e03 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -24,6 +24,7 @@ extern "C" { # define Py_futureiters_MAXFREELIST 255 # define Py_object_stack_chunks_MAXFREELIST 4 # define Py_unicode_writers_MAXFREELIST 1 +# define Py_bytes_writers_MAXFREELIST 1 # define Py_pymethodobjects_MAXFREELIST 20 // A generic freelist of either PyObjects or other data structures. @@ -53,6 +54,7 @@ struct _Py_freelists { struct _Py_freelist futureiters; struct _Py_freelist object_stack_chunks; struct _Py_freelist unicode_writers; + struct _Py_freelist bytes_writers; struct _Py_freelist pymethodobjects; }; diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index df0656a7cb8f0c..34e8e4478eb0d4 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -136,7 +136,7 @@ extern int _PyLong_FormatWriter( int alternate); extern char* _PyLong_FormatBytesWriter( - _PyBytesWriter *writer, + PyBytesWriter *writer, char *str, PyObject *obj, int base, diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 5908d79e14029b..4302dd69853831 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -290,5 +290,87 @@ def test_join(self): bytes_join(b'', NULL) +class PyBytesWriterTest(unittest.TestCase): + def create_writer(self, alloc, string=b''): + return _testcapi.PyBytesWriter(alloc, string) + + def test_empty(self): + # Test PyBytesWriter_Create() + writer = self.create_writer(0) + self.assertEqual(writer.get_remaining(), 0) + self.assertEqual(writer.finish(), b'') + + def test_abc(self): + # Test PyBytesWriter_Create() + writer = self.create_writer(3, b'abc') + self.assertEqual(writer.get_remaining(), 0) + self.assertEqual(writer.finish(), b'abc') + + writer = self.create_writer(10, b'abc') + self.assertEqual(writer.get_remaining(), 7) + self.assertEqual(writer.finish(), b'abc') + + def test_write_bytes(self): + # Test PyBytesWriter_WriteBytes() + + writer = self.create_writer(0) + writer.write_bytes(b'Hello World!', -1) + self.assertEqual(writer.finish(), b'Hello World!') + + writer = self.create_writer(0) + writer.write_bytes(b'Hello ', -1) + writer.write_bytes(b'World! ', 6) + self.assertEqual(writer.finish(), b'Hello World!') + + def test_extend(self): + # Test PyBytesWriter_Extend() + + writer = self.create_writer(0) + writer.extend(20, b'number=123456') + writer.extend(0, b'') + self.assertEqual(writer.get_remaining(), 7) + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer(0) + writer.extend(0, b'') + writer.extend(20, b'number=123456') + self.assertEqual(writer.get_remaining(), 7) + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer(0) + writer.extend(10, b'number=') + writer.extend(10, b'123456') + self.assertEqual(writer.get_remaining(), 7) + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer(0) + writer.extend(10, b'number=') + writer.extend(0, b'') + writer.extend(10, b'123456') + self.assertEqual(writer.get_remaining(), 7) + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer(0) + writer.extend(10, b'number') + writer.extend(10, b'=') + writer.extend(10, b'123') + writer.extend(10, b'456') + self.assertEqual(writer.get_remaining(), 27) + self.assertEqual(writer.finish(), b'number=123456') + + def test_format(self): + # Test PyBytesWriter_Format() + writer = self.create_writer(0) + writer.format_i(123456) + self.assertEqual(writer.get_remaining(), 0) + self.assertEqual(writer.finish(), b'123456') + + def test_example_center(self): + self.assertEqual(_testcapi.byteswriter_center(0, b'writer'), + b'writer') + self.assertEqual(_testcapi.byteswriter_center(3, b'writer'), + b' writer ') + + if __name__ == "__main__": unittest.main() diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 5641f93391c551..8d166717d659ef 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2601,29 +2601,22 @@ save_picklebuffer(PickleState *st, PicklerObject *self, PyObject *obj) static PyObject * raw_unicode_escape(PyObject *obj) { - char *p; - Py_ssize_t i, size; - const void *data; - int kind; - _PyBytesWriter writer; + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); + const void *data = PyUnicode_DATA(obj); + int kind = PyUnicode_KIND(obj); - _PyBytesWriter_Init(&writer); - - size = PyUnicode_GET_LENGTH(obj); - data = PyUnicode_DATA(obj); - kind = PyUnicode_KIND(obj); - - p = _PyBytesWriter_Alloc(&writer, size); - if (p == NULL) - goto error; - writer.overallocate = 1; + PyBytesWriter *writer; + char *p = PyBytesWriter_Create(&writer, size); + if (p == NULL) { + return NULL; + } - for (i=0; i < size; i++) { + for (Py_ssize_t i=0; i < size; i++) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); /* Map 32-bit characters to '\Uxxxxxxxx' */ if (ch >= 0x10000) { /* -1: subtract 1 preallocated byte */ - p = _PyBytesWriter_Prepare(&writer, p, 10-1); + p = PyBytesWriter_Extend(writer, p, 10-1); if (p == NULL) goto error; @@ -2644,7 +2637,7 @@ raw_unicode_escape(PyObject *obj) ch == 0x1a) { /* -1: subtract 1 preallocated byte */ - p = _PyBytesWriter_Prepare(&writer, p, 6-1); + p = PyBytesWriter_Extend(writer, p, 6-1); if (p == NULL) goto error; @@ -2660,10 +2653,10 @@ raw_unicode_escape(PyObject *obj) *p++ = (char) ch; } - return _PyBytesWriter_Finish(&writer, p); + return PyBytesWriter_Finish(writer, p); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } diff --git a/Modules/_struct.c b/Modules/_struct.c index 21582b945be23d..7e2bd0fb7e62b0 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -2259,7 +2259,6 @@ strings."); static PyObject * s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { - char *buf; PyStructObject *soself; _structmodulestate *state = get_struct_state_structinst(self); @@ -2275,21 +2274,23 @@ s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } /* Allocate a new string */ - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); - buf = _PyBytesWriter_Alloc(&writer, soself->s_size); + PyBytesWriter *writer; + char *buf = PyBytesWriter_Create(&writer, soself->s_size); if (buf == NULL) { - _PyBytesWriter_Dealloc(&writer); - return NULL; + goto error; } /* Call the guts */ - if ( s_pack_internal(soself, args, 0, buf, state) != 0 ) { - _PyBytesWriter_Dealloc(&writer); - return NULL; + if (s_pack_internal(soself, args, 0, buf, state) != 0) { + goto error; } + buf += soself->s_size; - return _PyBytesWriter_Finish(&writer, buf + soself->s_size); + return PyBytesWriter_Finish(writer, buf); + +error: + PyBytesWriter_Discard(writer); + return NULL; } PyDoc_STRVAR(s_pack_into__doc__, diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 33903de14ba68d..d69597f1727c5c 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -51,9 +51,268 @@ bytes_join(PyObject *Py_UNUSED(module), PyObject *args) } +// --- PyBytesWriter type --------------------------------------------------- + +typedef struct { + PyObject_HEAD + PyBytesWriter *writer; + char *buf; +} WriterObject; + + +static PyObject * +writer_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + WriterObject *self = (WriterObject *)type->tp_alloc(type, 0); + if (!self) { + return NULL; + } + self->writer = NULL; + self->buf = NULL; + return (PyObject*)self; +} + + +static int +writer_init(PyObject *self_raw, PyObject *args, PyObject *kwargs) +{ + WriterObject *self = (WriterObject *)self_raw; + if (self->writer) { + PyBytesWriter_Discard(self->writer); + } + + if (kwargs && PyDict_GET_SIZE(kwargs)) { + PyErr_Format(PyExc_TypeError, + "PyBytesWriter() takes exactly no keyword arguments"); + return -1; + } + + Py_ssize_t alloc; + char *str; + Py_ssize_t str_size; + if (!PyArg_ParseTuple(args, "ny#", &alloc, &str, &str_size)) { + return -1; + } + + self->buf = PyBytesWriter_Create(&self->writer, alloc); + if (self->buf == NULL) { + return -1; + } + + memcpy(self->buf, str, str_size); + self->buf += str_size; + + return 0; +} + + +static void +writer_dealloc(PyObject *self_raw) +{ + WriterObject *self = (WriterObject *)self_raw; + PyTypeObject *tp = Py_TYPE(self); + if (self->writer) { + PyBytesWriter_Discard(self->writer); + } + tp->tp_free(self); + Py_DECREF(tp); +} + + +static inline int +writer_check(WriterObject *self) +{ + if (self->writer == NULL) { + PyErr_SetString(PyExc_ValueError, "operation on finished writer"); + return -1; + } + return 0; +} + + +static PyObject* +writer_write_bytes(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "yn", &str, &size)) { + return NULL; + } + + self->buf = PyBytesWriter_WriteBytes(self->writer, self->buf, str, size); + if (self->buf == NULL) { + return NULL; + } + Py_RETURN_NONE; +} + + +static PyObject* +writer_format_i(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + int number; + if (!PyArg_ParseTuple(args, "i", &number)) { + return NULL; + } + + self->buf = PyBytesWriter_Format(self->writer, self->buf, "%i", number); + if (self->buf == NULL) { + return NULL; + } + Py_RETURN_NONE; +} + + +static PyObject* +writer_extend(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t extend; + char *str; + Py_ssize_t str_size; + if (!PyArg_ParseTuple(args, + "ny#", + &extend, &str, &str_size)) { + return NULL; + } + assert(extend >= str_size); + + self->buf = PyBytesWriter_Extend(self->writer, self->buf, extend); + if (self->buf == NULL) { + return NULL; + } + memcpy(self->buf, str, str_size); + self->buf += str_size; + + Py_RETURN_NONE; +} + + +static PyObject* +writer_get_remaining(PyObject *self_raw, PyObject *Py_UNUSED(args)) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t size = PyBytesWriter_GetRemaining(self->writer, self->buf); + return PyLong_FromSsize_t(size); +} + + +static PyObject* +writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + PyObject *str = PyBytesWriter_Finish(self->writer, self->buf); + self->writer = NULL; + return str; +} + + +static PyMethodDef writer_methods[] = { + {"write_bytes", _PyCFunction_CAST(writer_write_bytes), METH_VARARGS}, + {"format_i", _PyCFunction_CAST(writer_format_i), METH_VARARGS}, + {"extend", _PyCFunction_CAST(writer_extend), METH_VARARGS}, + {"get_remaining", _PyCFunction_CAST(writer_get_remaining), METH_NOARGS}, + {"finish", _PyCFunction_CAST(writer_finish), METH_NOARGS}, + {NULL, NULL} /* sentinel */ +}; + +static PyType_Slot Writer_Type_slots[] = { + {Py_tp_new, writer_new}, + {Py_tp_init, writer_init}, + {Py_tp_dealloc, writer_dealloc}, + {Py_tp_methods, writer_methods}, + {0, 0}, /* sentinel */ +}; + +static PyType_Spec Writer_spec = { + .name = "_testcapi.PyBytesWriter", + .basicsize = sizeof(WriterObject), + .flags = Py_TPFLAGS_DEFAULT, + .slots = Writer_Type_slots, +}; + + +// Similar to bytes.center() with a different API: spaces are number of +// whitespaces added to the left and to the right. +static PyObject * +byteswriter_center_example(Py_ssize_t spaces, char *str, Py_ssize_t str_size) +{ + PyBytesWriter *writer; + char *buf = PyBytesWriter_Create(&writer, spaces * 2); + if (buf == NULL) { + goto error; + } + assert(PyBytesWriter_GetRemaining(writer, buf) == spaces * 2); + + // Add left spaces + memset(buf, ' ', spaces); + buf += spaces; + assert(PyBytesWriter_GetRemaining(writer, buf) == spaces); + + // Copy string + buf = PyBytesWriter_Extend(writer, buf, str_size); + if (buf == NULL) { + goto error; + } + assert(PyBytesWriter_GetRemaining(writer, buf) == spaces + str_size); + + memcpy(buf, str, str_size); + buf += str_size; + assert(PyBytesWriter_GetRemaining(writer, buf) == spaces); + + // Add right spaces + memset(buf, ' ', spaces); + buf += spaces; + assert(PyBytesWriter_GetRemaining(writer, buf) == 0); + + return PyBytesWriter_Finish(writer, buf); + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + +static PyObject * +byteswriter_center(PyObject *Py_UNUSED(module), PyObject *args) +{ + Py_ssize_t spaces; + char *str; + Py_ssize_t str_size; + if (!PyArg_ParseTuple(args, "ny#", &spaces, &str, &str_size)) { + return NULL; + } + + return byteswriter_center_example(spaces, str, str_size); +} + + static PyMethodDef test_methods[] = { {"bytes_resize", bytes_resize, METH_VARARGS}, {"bytes_join", bytes_join, METH_VARARGS}, + {"byteswriter_center", byteswriter_center, METH_VARARGS}, {NULL}, }; @@ -64,5 +323,15 @@ _PyTestCapi_Init_Bytes(PyObject *m) return -1; } + PyTypeObject *writer_type = (PyTypeObject *)PyType_FromSpec(&Writer_spec); + if (writer_type == NULL) { + return -1; + } + if (PyModule_AddType(m, writer_type) < 0) { + Py_DECREF(writer_type); + return -1; + } + Py_DECREF(writer_type); + return 0; } diff --git a/Modules/binascii.c b/Modules/binascii.c index 6bb01d148b6faa..c4708060b80a9e 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -302,21 +302,11 @@ static PyObject * binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) /*[clinic end generated code: output=b1b99de62d9bbeb8 input=beb27822241095cd]*/ { - unsigned char *ascii_data; - const unsigned char *bin_data; - int leftbits = 0; - unsigned char this_ch; - unsigned int leftchar = 0; - binascii_state *state; - Py_ssize_t bin_len, out_len; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - bin_data = data->buf; - bin_len = data->len; + const unsigned char *bin_data = data->buf; + Py_ssize_t bin_len = data->len; if ( bin_len > 45 ) { /* The 45 is a limit that appears in all uuencode's */ - state = get_binascii_state(module); + binascii_state *state = get_binascii_state(module); if (state == NULL) { return NULL; } @@ -325,10 +315,13 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) } /* We're lazy and allocate to much (fixed up later) */ - out_len = 2 + (bin_len + 2) / 3 * 4; - ascii_data = _PyBytesWriter_Alloc(&writer, out_len); - if (ascii_data == NULL) + Py_ssize_t out_len = 2 + (bin_len + 2) / 3 * 4; + + PyBytesWriter *writer; + unsigned char *ascii_data = PyBytesWriter_Create(&writer, out_len); + if (ascii_data == NULL) { return NULL; + } /* Store the length */ if (backtick && !bin_len) @@ -336,6 +329,8 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) else *ascii_data++ = ' ' + (unsigned char)bin_len; + int leftbits = 0; + unsigned int leftchar = 0; for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) { /* Shift the data (or padding) into our buffer */ if ( bin_len > 0 ) /* Data */ @@ -346,7 +341,7 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) /* See if there are 6-bit groups ready */ while ( leftbits >= 6 ) { - this_ch = (leftchar >> (leftbits-6)) & 0x3f; + unsigned char this_ch = (leftchar >> (leftbits-6)) & 0x3f; leftbits -= 6; if (backtick && !this_ch) *ascii_data++ = '`'; @@ -356,7 +351,7 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) } *ascii_data++ = '\n'; /* Append a courtesy newline */ - return _PyBytesWriter_Finish(&writer, ascii_data); + return PyBytesWriter_Finish(writer, ascii_data); } /*[clinic input] @@ -387,12 +382,12 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) /* Allocate the buffer */ Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); - unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len); - if (bin_data == NULL) + PyBytesWriter *writer; + unsigned char *bin_data_start = PyBytesWriter_Create(&writer, bin_len); + if (bin_data_start == NULL) { return NULL; - unsigned char *bin_data_start = bin_data; + } + unsigned char *bin_data = bin_data_start; if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') { state = get_binascii_state(module); @@ -502,13 +497,15 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) } else { PyErr_SetString(state->Error, "Incorrect padding"); } - error_end: - _PyBytesWriter_Dealloc(&writer); - return NULL; + goto error_end; } done: - return _PyBytesWriter_Finish(&writer, bin_data); + return PyBytesWriter_Finish(writer, bin_data); + +error_end: + PyBytesWriter_Discard(writer); + return NULL; } @@ -527,18 +524,15 @@ static PyObject * binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) /*[clinic end generated code: output=4ad62c8e8485d3b3 input=0e20ff59c5f2e3e1]*/ { - unsigned char *ascii_data; const unsigned char *bin_data; int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; Py_ssize_t bin_len, out_len; - _PyBytesWriter writer; binascii_state *state; bin_data = data->buf; bin_len = data->len; - _PyBytesWriter_Init(&writer); assert(bin_len >= 0); @@ -557,9 +551,12 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) out_len = bin_len*2 + 2; if (newline) out_len++; - ascii_data = _PyBytesWriter_Alloc(&writer, out_len); - if (ascii_data == NULL) + + PyBytesWriter *writer; + unsigned char *ascii_data = PyBytesWriter_Create(&writer, out_len); + if (ascii_data == NULL) { return NULL; + } for( ; bin_len > 0 ; bin_len--, bin_data++ ) { /* Shift the data into our buffer */ @@ -584,7 +581,7 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) if (newline) *ascii_data++ = '\n'; /* Append a courtesy newline */ - return _PyBytesWriter_Finish(&writer, ascii_data); + return PyBytesWriter_Finish(writer, ascii_data); } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index b3d1c425ad18b7..715dabd12d394f 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -7,6 +7,7 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_format.h" // F_LJUST +#include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP() #include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_long.h" // _PyLong_DigitValue @@ -32,8 +33,8 @@ class bytes "PyBytesObject *" "&PyBytes_Type" #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) /* Forward declaration */ -Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, - char *str); +static void* PyBytesWriter_CreateByteArray(PyBytesWriter **writer, + Py_ssize_t alloc); #define CHARACTERS _Py_SINGLETON(bytes_characters) @@ -194,10 +195,10 @@ PyBytes_FromString(const char *str) return (PyObject *) op; } -PyObject * -PyBytes_FromFormatV(const char *format, va_list vargs) +static char* +bytes_fromformat(PyBytesWriter *writer, char *s, + const char *format, va_list vargs) { - char *s; const char *f; const char *p; Py_ssize_t prec; @@ -211,20 +212,18 @@ PyBytes_FromFormatV(const char *format, va_list vargs) Longest 64-bit pointer representation: "0xffffffffffffffff\0" (19 bytes). */ char buffer[21]; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - - s = _PyBytesWriter_Alloc(&writer, strlen(format)); - if (s == NULL) - return NULL; - writer.overallocate = 1; #define WRITE_BYTES(str) \ do { \ - s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \ - if (s == NULL) \ - goto error; \ + Py_ssize_t len = strlen(str); \ + if (len > 2) { \ + s = PyBytesWriter_Extend(writer, s, len - 2); \ + if (s == NULL) { \ + goto error; \ + } \ + } \ + memcpy(s, (str), len); \ + s += len; \ } while (0) for (f = format; *f; f++) { @@ -266,10 +265,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) ++f; } - /* subtract bytes preallocated for the format string - (ex: 2 for "%s") */ - writer.min_size -= (f - p + 1); - switch (*f) { case 'c': { @@ -280,7 +275,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) "expects an integer in range [0; 255]"); goto error; } - writer.min_size++; *s++ = (unsigned char)c; break; } @@ -339,9 +333,10 @@ PyBytes_FromFormatV(const char *format, va_list vargs) i++; } } - s = _PyBytesWriter_WriteBytes(&writer, s, p, i); - if (s == NULL) + s = PyBytesWriter_WriteBytes(writer, s, p, i); + if (s == NULL) { goto error; + } break; } @@ -360,31 +355,46 @@ PyBytes_FromFormatV(const char *format, va_list vargs) break; case '%': - writer.min_size++; *s++ = '%'; break; default: - if (*f == 0) { - /* fix min_size if we reached the end of the format string */ - writer.min_size++; - } - /* invalid format string: copy unformatted string and exit */ - WRITE_BYTES(p); - return _PyBytesWriter_Finish(&writer, s); + s = PyBytesWriter_WriteBytes(writer, s, p, strlen(p)); + if (s == NULL) { + goto error; + } + goto done; } } #undef WRITE_BYTES - return _PyBytesWriter_Finish(&writer, s); +done: + return s; - error: - _PyBytesWriter_Dealloc(&writer); +error: return NULL; } +PyObject * +PyBytes_FromFormatV(const char *format, va_list vargs) +{ + PyBytesWriter *writer; + char *s = PyBytesWriter_Create(&writer, strlen(format)); + if (s == NULL) { + return NULL; + } + + s = bytes_fromformat(writer, s, format, vargs); + if (s == NULL) { + PyBytesWriter_Discard(writer); + return NULL; + } + + return PyBytesWriter_Finish(writer, s); +} + PyObject * PyBytes_FromFormat(const char *format, ...) { @@ -419,7 +429,7 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) static char* formatfloat(PyObject *v, int flags, int prec, int type, - PyObject **p_result, _PyBytesWriter *writer, char *str) + PyObject **p_result, PyBytesWriter *writer, char *str) { char *p; PyObject *result; @@ -447,14 +457,8 @@ formatfloat(PyObject *v, int flags, int prec, int type, len = strlen(p); if (writer != NULL) { - str = _PyBytesWriter_Prepare(writer, str, len); - if (str == NULL) { - PyMem_Free(p); - return NULL; - } - memcpy(str, p, len); + str = PyBytesWriter_WriteBytes(writer, str, p, len); PyMem_Free(p); - str += len; return str; } @@ -605,7 +609,6 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, Py_ssize_t fmtcnt; int args_owned = 0; PyObject *dict = NULL; - _PyBytesWriter writer; if (args == NULL) { PyErr_BadInternalCall(); @@ -614,14 +617,16 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, fmt = format; fmtcnt = format_len; - _PyBytesWriter_Init(&writer); - writer.use_bytearray = use_bytearray; - - res = _PyBytesWriter_Alloc(&writer, fmtcnt); - if (res == NULL) + PyBytesWriter *writer; + if (use_bytearray) { + res = PyBytesWriter_CreateByteArray(&writer, fmtcnt); + } + else { + res = PyBytesWriter_Create(&writer, fmtcnt); + } + if (res == NULL) { return NULL; - if (!use_bytearray) - writer.overallocate = 1; + } if (PyTuple_Check(args)) { arglen = PyTuple_GET_SIZE(args); @@ -824,10 +829,6 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (v == NULL) goto error; - if (fmtcnt == 0) { - /* last write: disable writer overallocation */ - writer.overallocate = 0; - } sign = 0; fill = ' '; @@ -889,8 +890,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, } /* Fast path */ - writer.min_size -= 2; /* size preallocated for "%d" */ - res = _PyLong_FormatBytesWriter(&writer, res, + res = _PyLong_FormatBytesWriter(writer, res, v, base, alternate); if (res == NULL) goto error; @@ -918,8 +918,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, && !(flags & (F_SIGN | F_BLANK))) { /* Fast path */ - writer.min_size -= 2; /* size preallocated for "%f" */ - res = formatfloat(v, flags, prec, c, NULL, &writer, res); + res = formatfloat(v, flags, prec, c, NULL, writer, res); if (res == NULL) goto error; continue; @@ -973,11 +972,12 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, alloc = width; if (sign != 0 && len == width) alloc++; - /* 2: size preallocated for %s */ + /* 2: size preallocated for "%s" string */ if (alloc > 2) { - res = _PyBytesWriter_Prepare(&writer, res, alloc - 2); - if (res == NULL) + res = PyBytesWriter_Extend(writer, res, alloc - 2); + if (res == NULL) { goto error; + } } #ifndef NDEBUG char *before = res; @@ -1050,10 +1050,6 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, assert((res - before) == alloc); #endif } /* '%' */ - - /* If overallocation was disabled, ensure that it was the last - write. Otherwise, we missed an optimization */ - assert(writer.overallocate || fmtcnt == 0 || use_bytearray); } /* until end */ if (argidx < arglen && !dict) { @@ -1065,10 +1061,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (args_owned) { Py_DECREF(args); } - return _PyBytesWriter_Finish(&writer, res); + return PyBytesWriter_Finish(writer, res); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); if (args_owned) { Py_DECREF(args); } @@ -1082,16 +1078,13 @@ PyObject *_PyBytes_DecodeEscape(const char *s, const char **first_invalid_escape) { int c; - char *p; const char *end; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - p = _PyBytesWriter_Alloc(&writer, len); - if (p == NULL) + PyBytesWriter *writer; + char *p = PyBytesWriter_Create(&writer, len); + if (p == NULL) { return NULL; - writer.overallocate = 1; + } *first_invalid_escape = NULL; @@ -1183,10 +1176,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, } } - return _PyBytesWriter_Finish(&writer, p); + return PyBytesWriter_Finish(writer, p); failed: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } @@ -2507,14 +2500,10 @@ bytes_fromhex_impl(PyTypeObject *type, PyObject *string) PyObject* _PyBytes_FromHex(PyObject *string, int use_bytearray) { - char *buf; Py_ssize_t hexlen, invalid_char; unsigned int top, bot; const Py_UCS1 *str, *end; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - writer.use_bytearray = use_bytearray; + PyBytesWriter *writer = NULL; assert(PyUnicode_Check(string)); hexlen = PyUnicode_GET_LENGTH(string); @@ -2537,9 +2526,16 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) str = PyUnicode_1BYTE_DATA(string); /* This overestimates if there are spaces */ - buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); - if (buf == NULL) + char *buf; + if (use_bytearray) { + buf = PyBytesWriter_CreateByteArray(&writer, hexlen / 2); + } + else { + buf = PyBytesWriter_Create(&writer, hexlen / 2); + } + if (buf == NULL) { return NULL; + } end = str + hexlen; while (str < end) { @@ -2574,7 +2570,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) *buf++ = (unsigned char)((top << 4) + bot); } - return _PyBytesWriter_Finish(&writer, buf); + return PyBytesWriter_Finish(writer, buf); error: if (invalid_char == -1) { @@ -2585,7 +2581,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) "non-hexadecimal number found in " "fromhex() arg at position %zd", invalid_char); } - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } @@ -2830,23 +2826,18 @@ _PyBytes_FromBuffer(PyObject *x) static PyObject* _PyBytes_FromList(PyObject *x) { - Py_ssize_t i, size = PyList_GET_SIZE(x); - Py_ssize_t value; - char *str; - PyObject *item; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - str = _PyBytesWriter_Alloc(&writer, size); - if (str == NULL) + Py_ssize_t size = PyList_GET_SIZE(x); + PyBytesWriter *writer; + char *str = PyBytesWriter_Create(&writer, size); + if (str == NULL) { return NULL; - writer.overallocate = 1; - size = writer.allocated; + } - for (i = 0; i < PyList_GET_SIZE(x); i++) { - item = PyList_GET_ITEM(x, i); + Py_ssize_t extend = 1; + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) { + PyObject *item = PyList_GET_ITEM(x, i); Py_INCREF(item); - value = PyNumber_AsSsize_t(item, NULL); + Py_ssize_t value = PyNumber_AsSsize_t(item, NULL); Py_DECREF(item); if (value == -1 && PyErr_Occurred()) goto error; @@ -2858,37 +2849,39 @@ _PyBytes_FromList(PyObject *x) } if (i >= size) { - str = _PyBytesWriter_Resize(&writer, str, size+1); - if (str == NULL) - return NULL; - size = writer.allocated; + // The list was extended by a previous PyNumber_AsSsize_t() call + str = PyBytesWriter_Extend(writer, str, extend); + if (str == NULL) { + goto error; + } + size += extend; + + if (extend <= PY_SSIZE_T_MAX / 2) { + extend *= 2; + } } *str++ = (char) value; } - return _PyBytesWriter_Finish(&writer, str); + return PyBytesWriter_Finish(writer, str); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } static PyObject* _PyBytes_FromTuple(PyObject *x) { - PyObject *bytes; - Py_ssize_t i, size = PyTuple_GET_SIZE(x); - Py_ssize_t value; - char *str; - PyObject *item; - - bytes = PyBytes_FromStringAndSize(NULL, size); - if (bytes == NULL) + Py_ssize_t size = PyTuple_GET_SIZE(x); + PyBytesWriter *writer; + char *str = PyBytesWriter_Create(&writer, size); + if (str == NULL) { return NULL; - str = ((PyBytesObject *)bytes)->ob_sval; + } - for (i = 0; i < size; i++) { - item = PyTuple_GET_ITEM(x, i); - value = PyNumber_AsSsize_t(item, NULL); + for (Py_ssize_t i = 0; i < size; i++) { + PyObject *item = PyTuple_GET_ITEM(x, i); + Py_ssize_t value = PyNumber_AsSsize_t(item, NULL); if (value == -1 && PyErr_Occurred()) goto error; @@ -2899,34 +2892,30 @@ _PyBytes_FromTuple(PyObject *x) } *str++ = (char) value; } - return bytes; + return PyBytesWriter_Finish(writer, str); error: - Py_DECREF(bytes); + PyBytesWriter_Discard(writer); return NULL; } static PyObject * _PyBytes_FromIterator(PyObject *it, PyObject *x) { - char *str; - Py_ssize_t i, size; - _PyBytesWriter writer; - /* For iterator version, create a bytes object and resize as needed */ - size = PyObject_LengthHint(x, 64); + Py_ssize_t size = PyObject_LengthHint(x, 64); if (size == -1 && PyErr_Occurred()) return NULL; - _PyBytesWriter_Init(&writer); - str = _PyBytesWriter_Alloc(&writer, size); - if (str == NULL) + PyBytesWriter *writer; + char *str = PyBytesWriter_Create(&writer, size); + if (str == NULL) { return NULL; - writer.overallocate = 1; - size = writer.allocated; + } /* Run the iterator to exhaustion */ - for (i = 0; ; i++) { + Py_ssize_t extend = 1; + for (Py_ssize_t i = 0; ; i++) { PyObject *item; Py_ssize_t value; @@ -2953,18 +2942,24 @@ _PyBytes_FromIterator(PyObject *it, PyObject *x) /* Append the byte */ if (i >= size) { - str = _PyBytesWriter_Resize(&writer, str, size+1); - if (str == NULL) - return NULL; - size = writer.allocated; + // The list was extended by a previous PyNumber_AsSsize_t() call + str = PyBytesWriter_Extend(writer, str, extend); + if (str == NULL) { + goto error; + } + size += extend; + + if (extend <= PY_SSIZE_T_MAX / 2) { + extend *= 2; + } } *str++ = (char) value; } - return _PyBytesWriter_Finish(&writer, str); + return PyBytesWriter_Finish(writer, str); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } @@ -3396,308 +3391,319 @@ bytes_iter(PyObject *seq) } -/* _PyBytesWriter API */ - -#ifdef MS_WINDOWS - /* On Windows, overallocate by 50% is the best factor */ -# define OVERALLOCATE_FACTOR 2 -#else - /* On Linux, overallocate by 25% is the best factor */ -# define OVERALLOCATE_FACTOR 4 -#endif - void -_PyBytesWriter_Init(_PyBytesWriter *writer) +_PyBytes_Repeat(char* dest, Py_ssize_t len_dest, + const char* src, Py_ssize_t len_src) { - /* Set all attributes before small_buffer to 0 */ - memset(writer, 0, offsetof(_PyBytesWriter, small_buffer)); -#ifndef NDEBUG - memset(writer->small_buffer, PYMEM_CLEANBYTE, - sizeof(writer->small_buffer)); -#endif + if (len_dest == 0) { + return; + } + if (len_src == 1) { + memset(dest, src[0], len_dest); + } + else { + if (src != dest) { + memcpy(dest, src, len_src); + } + Py_ssize_t copied = len_src; + while (copied < len_dest) { + Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied); + memcpy(dest + copied, dest, bytes_to_copy); + copied += bytes_to_copy; + } + } } -void -_PyBytesWriter_Dealloc(_PyBytesWriter *writer) -{ - Py_CLEAR(writer->buffer); -} -Py_LOCAL_INLINE(char*) -_PyBytesWriter_AsString(_PyBytesWriter *writer) +// --- PyBytesWriter API ----------------------------------------------------- + +struct PyBytesWriter { + char small_buffer[256 - sizeof(PyObject*) - sizeof(Py_ssize_t) - sizeof(int)]; + PyObject *obj; + Py_ssize_t size; + int use_bytearray; +}; + + +static inline char* +byteswriter_start(PyBytesWriter *writer) { - if (writer->use_small_buffer) { - assert(writer->buffer == NULL); + if (writer->obj == NULL) { return writer->small_buffer; } else if (writer->use_bytearray) { - assert(writer->buffer != NULL); - return PyByteArray_AS_STRING(writer->buffer); + return PyByteArray_AS_STRING(writer->obj); } else { - assert(writer->buffer != NULL); - return PyBytes_AS_STRING(writer->buffer); + return PyBytes_AS_STRING(writer->obj); } } -Py_LOCAL_INLINE(Py_ssize_t) -_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str) -{ - const char *start = _PyBytesWriter_AsString(writer); - assert(str != NULL); - assert(str >= start); - assert(str - start <= writer->allocated); - return str - start; -} -#ifndef NDEBUG -Py_LOCAL_INLINE(int) -_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str) +char* +_PyBytesWriter_Start(PyBytesWriter *writer) { - const char *start, *end; - - if (writer->use_small_buffer) { - assert(writer->buffer == NULL); - } - else { - assert(writer->buffer != NULL); - if (writer->use_bytearray) - assert(PyByteArray_CheckExact(writer->buffer)); - else - assert(PyBytes_CheckExact(writer->buffer)); - assert(Py_REFCNT(writer->buffer) == 1); - } - - if (writer->use_bytearray) { - /* bytearray has its own overallocation algorithm, - writer overallocation must be disabled */ - assert(!writer->overallocate); - } + return byteswriter_start(writer); +} - assert(0 <= writer->allocated); - assert(0 <= writer->min_size && writer->min_size <= writer->allocated); - /* the last byte must always be null */ - start = _PyBytesWriter_AsString(writer); - assert(start[writer->allocated] == 0); - end = start + writer->allocated; - assert(str != NULL); - assert(start <= str && str <= end); - return 1; -} +#ifdef MS_WINDOWS + /* On Windows, overallocate by 50% is the best factor */ +# define OVERALLOCATE_FACTOR 2 +#else + /* On Linux, overallocate by 25% is the best factor */ +# define OVERALLOCATE_FACTOR 4 #endif -void* -_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size) -{ - Py_ssize_t allocated, pos; - assert(_PyBytesWriter_CheckConsistency(writer, str)); - assert(writer->allocated < size); +static inline char* +byteswriter_alloc(PyBytesWriter *writer, Py_ssize_t size, int overallocate) +{ + if (writer->obj == NULL) { + if ((size_t)size <= sizeof(writer->small_buffer)) { + return writer->small_buffer; + } + } + else if (writer->use_bytearray) { + if (size <= PyByteArray_GET_SIZE(writer->obj)) { + return PyByteArray_AS_STRING(writer->obj); + } + } + else { + if (size <= PyBytes_GET_SIZE(writer->obj)) { + return PyBytes_AS_STRING(writer->obj); + } + } - allocated = size; - if (writer->overallocate - && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) { - /* overallocate to limit the number of realloc() */ - allocated += allocated / OVERALLOCATE_FACTOR; + if (overallocate) { + if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) { + size += size / OVERALLOCATE_FACTOR; + } } - pos = _PyBytesWriter_GetSize(writer, str); - if (!writer->use_small_buffer) { + if (writer->obj != NULL) { if (writer->use_bytearray) { - if (PyByteArray_Resize(writer->buffer, allocated)) - goto error; - /* writer->allocated can be smaller than writer->buffer->ob_alloc, - but we cannot use ob_alloc because bytes may need to be moved - to use the whole buffer. bytearray uses an internal optimization - to avoid moving or copying bytes when bytes are removed at the - beginning (ex: del bytearray[:1]). */ + if (PyByteArray_Resize(writer->obj, size)) { + return NULL; + } } else { - if (_PyBytes_Resize(&writer->buffer, allocated)) - goto error; + if (_PyBytes_Resize(&writer->obj, size)) { + return NULL; + } + } + assert(writer->obj != NULL); + } + else if (writer->use_bytearray) { + writer->obj = PyByteArray_FromStringAndSize(NULL, size); + if (writer->obj == NULL) { + return NULL; + } + if (writer->size) { + memcpy(PyByteArray_AS_STRING(writer->obj), + writer->small_buffer, + writer->size); } } else { - /* convert from stack buffer to bytes object buffer */ - assert(writer->buffer == NULL); + writer->obj = PyBytes_FromStringAndSize(NULL, size); + if (writer->obj == NULL) { + return NULL; + } + if (writer->size) { + memcpy(PyBytes_AS_STRING(writer->obj), + writer->small_buffer, + writer->size); + } + } + return byteswriter_start(writer); +} - if (writer->use_bytearray) - writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated); - else - writer->buffer = PyBytes_FromStringAndSize(NULL, allocated); - if (writer->buffer == NULL) - goto error; - if (pos != 0) { - char *dest; - if (writer->use_bytearray) - dest = PyByteArray_AS_STRING(writer->buffer); - else - dest = PyBytes_AS_STRING(writer->buffer); - memcpy(dest, - writer->small_buffer, - pos); +static void* +byteswriter_create(PyBytesWriter **writer_p, Py_ssize_t alloc, int bytearray) +{ + if (alloc < 0) { + PyErr_SetString(PyExc_ValueError, "alloc must be >= 0"); + goto error; + } + + PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers); + if (writer == NULL) { + writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter)); + if (writer == NULL) { + PyErr_NoMemory(); + goto error; } + } + writer->obj = NULL; + writer->size = 0; + writer->use_bytearray = bytearray; - writer->use_small_buffer = 0; -#ifndef NDEBUG - memset(writer->small_buffer, PYMEM_CLEANBYTE, - sizeof(writer->small_buffer)); -#endif + char *start; + if (alloc >= 1) { + start = byteswriter_alloc(writer, alloc, 0); + if (start == NULL) { + PyBytesWriter_Discard(writer); + goto error; + } + writer->size = alloc; + } + else { + start = writer->small_buffer; } - writer->allocated = allocated; - str = _PyBytesWriter_AsString(writer) + pos; - assert(_PyBytesWriter_CheckConsistency(writer, str)); - return str; + *writer_p = writer; + return start; error: - _PyBytesWriter_Dealloc(writer); + *writer_p = NULL; return NULL; } + void* -_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size) +PyBytesWriter_Create(PyBytesWriter **writer, Py_ssize_t alloc) { - Py_ssize_t new_min_size; - - assert(_PyBytesWriter_CheckConsistency(writer, str)); - assert(size >= 0); - - if (size == 0) { - /* nothing to do */ - return str; - } - - if (writer->min_size > PY_SSIZE_T_MAX - size) { - PyErr_NoMemory(); - _PyBytesWriter_Dealloc(writer); - return NULL; - } - new_min_size = writer->min_size + size; + return byteswriter_create(writer, alloc, 0); +} - if (new_min_size > writer->allocated) - str = _PyBytesWriter_Resize(writer, str, new_min_size); - writer->min_size = new_min_size; - return str; +static void* +PyBytesWriter_CreateByteArray(PyBytesWriter **writer, Py_ssize_t alloc) +{ + return byteswriter_create(writer, alloc, 1); } -/* Allocate the buffer to write size bytes. - Return the pointer to the beginning of buffer data. - Raise an exception and return NULL on error. */ -void* -_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size) + +void +PyBytesWriter_Discard(PyBytesWriter *writer) { - /* ensure that _PyBytesWriter_Alloc() is only called once */ - assert(writer->min_size == 0 && writer->buffer == NULL); - assert(size >= 0); + if (writer == NULL) { + return; + } - writer->use_small_buffer = 1; -#ifndef NDEBUG - writer->allocated = sizeof(writer->small_buffer) - 1; - /* In debug mode, don't use the full small buffer because it is less - efficient than bytes and bytearray objects to detect buffer underflow - and buffer overflow. Use 10 bytes of the small buffer to test also - code using the smaller buffer in debug mode. - - Don't modify the _PyBytesWriter structure (use a shorter small buffer) - in debug mode to also be able to detect stack overflow when running - tests in debug mode. The _PyBytesWriter is large (more than 512 bytes), - if _Py_EnterRecursiveCall() is not used in deep C callback, we may hit a - stack overflow. */ - writer->allocated = Py_MIN(writer->allocated, 10); - /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0, - to detect buffer overflow */ - writer->small_buffer[writer->allocated] = 0; -#else - writer->allocated = sizeof(writer->small_buffer); -#endif - return _PyBytesWriter_Prepare(writer, writer->small_buffer, size); + Py_XDECREF(writer->obj); + _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free); } -PyObject * -_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str) -{ - Py_ssize_t size; - PyObject *result; - - assert(_PyBytesWriter_CheckConsistency(writer, str)); - size = _PyBytesWriter_GetSize(writer, str); - if (size == 0 && !writer->use_bytearray) { - Py_CLEAR(writer->buffer); - /* Get the empty byte string singleton */ - result = PyBytes_FromStringAndSize(NULL, 0); +PyObject* +PyBytesWriter_Finish(PyBytesWriter *writer, void *buf) +{ + char *start = byteswriter_start(writer); + if ((char*)buf < start) { + PyErr_SetString(PyExc_ValueError, "invalid buf pointer"); + goto error; } - else if (writer->use_small_buffer) { + Py_ssize_t final_size = ((char*)buf - start); + writer->size = -1; + + PyObject *result; + if (final_size == 0) { if (writer->use_bytearray) { - result = PyByteArray_FromStringAndSize(writer->small_buffer, size); + result = PyByteArray_FromStringAndSize("", 0); } else { - result = PyBytes_FromStringAndSize(writer->small_buffer, size); + result = bytes_get_empty(); } } - else { - result = writer->buffer; - writer->buffer = NULL; - - if (size != writer->allocated) { - if (writer->use_bytearray) { - if (PyByteArray_Resize(result, size)) { - Py_DECREF(result); - return NULL; + else if (writer->obj != NULL) { + if (writer->use_bytearray) { + if (final_size != PyByteArray_GET_SIZE(writer->obj)) { + if (PyByteArray_Resize(writer->obj, final_size)) { + goto error; } } - else { - if (_PyBytes_Resize(&result, size)) { - assert(result == NULL); - return NULL; + } + else { + if (final_size != PyBytes_GET_SIZE(writer->obj)) { + if (_PyBytes_Resize(&writer->obj, final_size)) { + goto error; } } } + result = writer->obj; + writer->obj = NULL; + } + else { + if (writer->use_bytearray) { + result = PyByteArray_FromStringAndSize(writer->small_buffer, final_size); + } + else { + result = PyBytes_FromStringAndSize(writer->small_buffer, final_size); + } } + PyBytesWriter_Discard(writer); return result; + +error: + PyBytesWriter_Discard(writer); + return NULL; } -void* -_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr, - const void *bytes, Py_ssize_t size) + +Py_ssize_t +PyBytesWriter_GetRemaining(PyBytesWriter *writer, void *buf) { - char *str = (char *)ptr; + Py_ssize_t pos = (char*)buf - byteswriter_start(writer); + return writer->size - pos; +} - str = _PyBytesWriter_Prepare(writer, str, size); - if (str == NULL) + +void* +PyBytesWriter_Extend(PyBytesWriter *writer, void *buf, Py_ssize_t extend) +{ + Py_ssize_t alloc_size = writer->size; + if (extend > PY_SSIZE_T_MAX - alloc_size) { + PyErr_NoMemory(); return NULL; + } + alloc_size += extend; - memcpy(str, bytes, size); - str += size; + Py_ssize_t pos = (char*)buf - byteswriter_start(writer); + int overallocate = !writer->use_bytearray; + char *start = byteswriter_alloc(writer, alloc_size, overallocate); + if (start == NULL) { + return NULL; + } + writer->size = alloc_size; - return str; + return start + pos; } -void -_PyBytes_Repeat(char* dest, Py_ssize_t len_dest, - const char* src, Py_ssize_t len_src) +void* +PyBytesWriter_WriteBytes(PyBytesWriter *writer, void *buf, + const void *bytes, Py_ssize_t size) { - if (len_dest == 0) { - return; - } - if (len_src == 1) { - memset(dest, src[0], len_dest); + if (size < 0) { + size = strlen(bytes); } - else { - if (src != dest) { - memcpy(dest, src, len_src); - } - Py_ssize_t copied = len_src; - while (copied < len_dest) { - Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied); - memcpy(dest + copied, dest, bytes_to_copy); - copied += bytes_to_copy; - } + + buf = PyBytesWriter_Extend(writer, buf, size); + if (buf == NULL) { + return NULL; } + + memcpy(buf, bytes, size); + buf = (char*)buf + size; + return buf; } + +void* +PyBytesWriter_Format(PyBytesWriter *writer, void *buf, + const char *format, ...) +{ + buf = PyBytesWriter_Extend(writer, buf, strlen(format)); + if (buf == NULL) { + return NULL; + } + + va_list vargs; + va_start(vargs, format); + buf = bytes_fromformat(writer, buf, format, vargs); + va_end(vargs); + return buf; +} diff --git a/Objects/longobject.c b/Objects/longobject.c index 370328dcfe8c9a..6eede8ac5efb78 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2080,7 +2080,7 @@ static int pylong_int_to_decimal_string(PyObject *aa, PyObject **p_output, _PyUnicodeWriter *writer, - _PyBytesWriter *bytes_writer, + PyBytesWriter *bytes_writer, char **bytes_str) { PyObject *s = NULL; @@ -2111,7 +2111,7 @@ pylong_int_to_decimal_string(PyObject *aa, Py_ssize_t size = PyUnicode_GET_LENGTH(s); const void *data = PyUnicode_DATA(s); int kind = PyUnicode_KIND(s); - *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, size); + *bytes_str = PyBytesWriter_Extend(bytes_writer, *bytes_str, size); if (*bytes_str == NULL) { goto error; } @@ -2148,7 +2148,7 @@ static int long_to_decimal_string_internal(PyObject *aa, PyObject **p_output, _PyUnicodeWriter *writer, - _PyBytesWriter *bytes_writer, + PyBytesWriter *bytes_writer, char **bytes_str) { PyLongObject *scratch, *a; @@ -2190,10 +2190,10 @@ long_to_decimal_string_internal(PyObject *aa, if (size_a > 1000) { /* Switch to _pylong.int_to_decimal_string(). */ return pylong_int_to_decimal_string(aa, - p_output, - writer, - bytes_writer, - bytes_str); + p_output, + writer, + bytes_writer, + bytes_str); } #endif @@ -2274,7 +2274,7 @@ long_to_decimal_string_internal(PyObject *aa, } } else if (bytes_writer) { - *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, strlen); + *bytes_str = PyBytesWriter_Extend(bytes_writer, *bytes_str, strlen); if (*bytes_str == NULL) { Py_DECREF(scratch); return -1; @@ -2384,7 +2384,7 @@ long_to_decimal_string(PyObject *aa) static int long_format_binary(PyObject *aa, int base, int alternate, PyObject **p_output, _PyUnicodeWriter *writer, - _PyBytesWriter *bytes_writer, char **bytes_str) + PyBytesWriter *bytes_writer, char **bytes_str) { PyLongObject *a = (PyLongObject *)aa; PyObject *v = NULL; @@ -2445,7 +2445,7 @@ long_format_binary(PyObject *aa, int base, int alternate, return -1; } else if (bytes_writer) { - *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, sz); + *bytes_str = PyBytesWriter_Extend(bytes_writer, *bytes_str, sz); if (*bytes_str == NULL) return -1; } @@ -2574,7 +2574,7 @@ _PyLong_FormatWriter(_PyUnicodeWriter *writer, } char* -_PyLong_FormatBytesWriter(_PyBytesWriter *writer, char *str, +_PyLong_FormatBytesWriter(PyBytesWriter *writer, char *str, PyObject *obj, int base, int alternate) { diff --git a/Objects/object.c b/Objects/object.c index f3c7fa6d906ad6..768682d3d921ed 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -938,6 +938,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) clear_freelist(&freelists->object_stack_chunks, 1, PyMem_RawFree); } clear_freelist(&freelists->unicode_writers, is_finalization, PyMem_Free); + clear_freelist(&freelists->bytes_writers, is_finalization, PyMem_Free); clear_freelist(&freelists->ints, is_finalization, free_object); clear_freelist(&freelists->pymethodobjects, is_finalization, free_object); } diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 440410d0aef17d..e950c464cb9b33 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -258,7 +258,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, PyUnicode_READ() macro. Delete some parts of the code depending on the kind: UCS-1 strings don't need to handle surrogates for example. */ Py_LOCAL_INLINE(char *) -STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, +STRINGLIB(utf8_encoder)(PyBytesWriter **writer_p, PyObject *unicode, const STRINGLIB_CHAR *data, Py_ssize_t size, @@ -287,8 +287,9 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, return NULL; } - _PyBytesWriter_Init(writer); - p = _PyBytesWriter_Alloc(writer, size * max_char_size); + PyBytesWriter *writer; + p = PyBytesWriter_Create(&writer, size * max_char_size); + *writer_p = writer; if (p == NULL) return NULL; @@ -323,9 +324,6 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos])) endpos++; - /* Only overallocate the buffer if it's not the last write */ - writer->overallocate = (endpos < size); - switch (error_handler) { case _Py_ERROR_REPLACE: @@ -347,8 +345,6 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, break; case _Py_ERROR_BACKSLASHREPLACE: - /* subtract preallocated bytes */ - writer->min_size -= max_char_size * (endpos - startpos); p = backslashreplace(writer, p, unicode, startpos, endpos); if (p == NULL) @@ -357,8 +353,6 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, break; case _Py_ERROR_XMLCHARREFREPLACE: - /* subtract preallocated bytes */ - writer->min_size -= max_char_size * (endpos - startpos); p = xmlcharrefreplace(writer, p, unicode, startpos, endpos); if (p == NULL) @@ -388,23 +382,16 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, goto error; if (newpos < startpos) { - writer->overallocate = 1; - p = _PyBytesWriter_Prepare(writer, p, - max_char_size * (startpos - newpos)); + p = PyBytesWriter_Extend(writer, p, + max_char_size * (startpos - newpos)); if (p == NULL) goto error; } - else { - /* subtract preallocated bytes */ - writer->min_size -= max_char_size * (newpos - startpos); - /* Only overallocate the buffer if it's not the last write */ - writer->overallocate = (newpos < size); - } if (PyBytes_Check(rep)) { - p = _PyBytesWriter_WriteBytes(writer, p, - PyBytes_AS_STRING(rep), - PyBytes_GET_SIZE(rep)); + p = PyBytesWriter_WriteBytes(writer, p, + PyBytes_AS_STRING(rep), + PyBytes_GET_SIZE(rep)); } else { /* rep is unicode */ @@ -415,9 +402,9 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, goto error; } - p = _PyBytesWriter_WriteBytes(writer, p, - PyUnicode_DATA(rep), - PyUnicode_GET_LENGTH(rep)); + p = PyBytesWriter_WriteBytes(writer, p, + PyUnicode_DATA(rep), + PyUnicode_GET_LENGTH(rep)); } if (p == NULL) @@ -426,10 +413,6 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, i = newpos; } - - /* If overallocation was disabled, ensure that it was the last - write. Otherwise, we missed an optimization */ - assert(writer->overallocate || i == size); } else #if STRINGLIB_SIZEOF_CHAR > 2 diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 75967d69ed374d..ce0040766b5909 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -830,7 +830,7 @@ unicode_result_unchanged(PyObject *unicode) /* Implementation of the "backslashreplace" error handler for 8-bit encodings: ASCII, Latin1, UTF-8, etc. */ static char* -backslashreplace(_PyBytesWriter *writer, char *str, +backslashreplace(PyBytesWriter *writer, char *str, PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend) { Py_ssize_t size, i; @@ -863,7 +863,7 @@ backslashreplace(_PyBytesWriter *writer, char *str, size += incr; } - str = _PyBytesWriter_Prepare(writer, str, size); + str = PyBytesWriter_Extend(writer, str, size); if (str == NULL) return NULL; @@ -896,7 +896,7 @@ backslashreplace(_PyBytesWriter *writer, char *str, /* Implementation of the "xmlcharrefreplace" error handler for 8-bit encodings: ASCII, Latin1, UTF-8, etc. */ static char* -xmlcharrefreplace(_PyBytesWriter *writer, char *str, +xmlcharrefreplace(PyBytesWriter *writer, char *str, PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend) { Py_ssize_t size, i; @@ -937,7 +937,7 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, size += incr; } - str = _PyBytesWriter_Prepare(writer, str, size); + str = PyBytesWriter_Extend(writer, str, size); if (str == NULL) return NULL; @@ -5844,7 +5844,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler, const void *data = PyUnicode_DATA(unicode); Py_ssize_t size = PyUnicode_GET_LENGTH(unicode); - _PyBytesWriter writer; + PyBytesWriter *writer = NULL; char *end; switch (kind) { @@ -5864,10 +5864,10 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler, } if (end == NULL) { - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } - return _PyBytesWriter_Finish(&writer, end); + return PyBytesWriter_Finish(writer, end); } static int @@ -5881,7 +5881,7 @@ unicode_fill_utf8(PyObject *unicode) const void *data = PyUnicode_DATA(unicode); Py_ssize_t size = PyUnicode_GET_LENGTH(unicode); - _PyBytesWriter writer; + PyBytesWriter *writer = NULL; char *end; switch (kind) { @@ -5901,17 +5901,16 @@ unicode_fill_utf8(PyObject *unicode) break; } if (end == NULL) { - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return -1; } - const char *start = writer.use_small_buffer ? writer.small_buffer : - PyBytes_AS_STRING(writer.buffer); + const char *start = _PyBytesWriter_Start(writer); Py_ssize_t len = end - start; char *cache = PyMem_Malloc(len + 1); if (cache == NULL) { - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); PyErr_NoMemory(); return -1; } @@ -5919,7 +5918,7 @@ unicode_fill_utf8(PyObject *unicode) cache[len] = '\0'; PyUnicode_SET_UTF8_LENGTH(unicode, len); PyUnicode_SET_UTF8(unicode, cache); - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return 0; } @@ -7354,7 +7353,6 @@ unicode_encode_ucs1(PyObject *unicode, _Py_error_handler error_handler = _Py_ERROR_UNKNOWN; PyObject *rep = NULL; /* output object */ - _PyBytesWriter writer; size = PyUnicode_GET_LENGTH(unicode); kind = PyUnicode_KIND(unicode); @@ -7364,10 +7362,11 @@ unicode_encode_ucs1(PyObject *unicode, if (size == 0) return PyBytes_FromStringAndSize(NULL, 0); - _PyBytesWriter_Init(&writer); - str = _PyBytesWriter_Alloc(&writer, size); - if (str == NULL) + PyBytesWriter *writer; + str = PyBytesWriter_Create(&writer, size); + if (str == NULL) { return NULL; + } while (pos < size) { Py_UCS4 ch = PyUnicode_READ(kind, data, pos); @@ -7388,9 +7387,6 @@ unicode_encode_ucs1(PyObject *unicode, while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit)) ++collend; - /* Only overallocate the buffer if it's not the last write */ - writer.overallocate = (collend < size); - /* cache callback name lookup (if not done yet, i.e. it's the first error) */ if (error_handler == _Py_ERROR_UNKNOWN) error_handler = _Py_GetErrorHandler(errors); @@ -7409,9 +7405,7 @@ unicode_encode_ucs1(PyObject *unicode, break; case _Py_ERROR_BACKSLASHREPLACE: - /* subtract preallocated bytes */ - writer.min_size -= (collend - collstart); - str = backslashreplace(&writer, str, + str = backslashreplace(writer, str, unicode, collstart, collend); if (str == NULL) goto onError; @@ -7419,9 +7413,7 @@ unicode_encode_ucs1(PyObject *unicode, break; case _Py_ERROR_XMLCHARREFREPLACE: - /* subtract preallocated bytes */ - writer.min_size -= (collend - collstart); - str = xmlcharrefreplace(&writer, str, + str = xmlcharrefreplace(writer, str, unicode, collstart, collend); if (str == NULL) goto onError; @@ -7452,24 +7444,17 @@ unicode_encode_ucs1(PyObject *unicode, goto onError; if (newpos < collstart) { - writer.overallocate = 1; - str = _PyBytesWriter_Prepare(&writer, str, - collstart - newpos); + str = PyBytesWriter_Extend(writer, str, + collstart - newpos); if (str == NULL) goto onError; } - else { - /* subtract preallocated bytes */ - writer.min_size -= newpos - collstart; - /* Only overallocate the buffer if it's not the last write */ - writer.overallocate = (newpos < size); - } if (PyBytes_Check(rep)) { /* Directly copy bytes result to output. */ - str = _PyBytesWriter_WriteBytes(&writer, str, - PyBytes_AS_STRING(rep), - PyBytes_GET_SIZE(rep)); + str = PyBytesWriter_WriteBytes(writer, str, + PyBytes_AS_STRING(rep), + PyBytes_GET_SIZE(rep)); } else { assert(PyUnicode_Check(rep)); @@ -7484,9 +7469,9 @@ unicode_encode_ucs1(PyObject *unicode, goto onError; } assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND); - str = _PyBytesWriter_WriteBytes(&writer, str, - PyUnicode_DATA(rep), - PyUnicode_GET_LENGTH(rep)); + str = PyBytesWriter_WriteBytes(writer, str, + PyUnicode_DATA(rep), + PyUnicode_GET_LENGTH(rep)); } if (str == NULL) goto onError; @@ -7494,20 +7479,16 @@ unicode_encode_ucs1(PyObject *unicode, pos = newpos; Py_CLEAR(rep); } - - /* If overallocation was disabled, ensure that it was the last - write. Otherwise, we missed an optimization */ - assert(writer.overallocate || pos == size); } } Py_XDECREF(error_handler_obj); Py_XDECREF(exc); - return _PyBytesWriter_Finish(&writer, str); + return PyBytesWriter_Finish(writer, str); onError: Py_XDECREF(rep); - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); Py_XDECREF(error_handler_obj); Py_XDECREF(exc); return NULL;