Skip to content

Commit 7c4cc95

Browse files
committed
Add PyUnicodeWriter_DecodeUTF8Stateful()
1 parent d1019a0 commit 7c4cc95

File tree

3 files changed

+62
-0
lines changed

3 files changed

+62
-0
lines changed

Include/cpython/unicodeobject.h

+6
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,12 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
459459
PyUnicodeWriter *writer,
460460
const char *str,
461461
Py_ssize_t size);
462+
PyAPI_FUNC(int) PyUnicodeWriter_DecodeUTF8Stateful(
463+
PyUnicodeWriter *writer,
464+
const char *string, /* UTF-8 encoded string */
465+
Py_ssize_t length, /* size of string */
466+
const char *errors, /* error handling */
467+
Py_ssize_t *consumed); /* bytes consumed */
462468
PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
463469
PyUnicodeWriter *writer,
464470
wchar_t *str,

Modules/_testcapi/unicode.c

+34
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,39 @@ test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
374374
}
375375

376376

377+
static PyObject *
378+
test_unicodewriter_decode_utf8(PyObject *self, PyObject *Py_UNUSED(args))
379+
{
380+
// test PyUnicodeWriter_DecodeUTF8Stateful()
381+
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
382+
if (writer == NULL) {
383+
return NULL;
384+
}
385+
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "ign\xFFore", -1, "ignore", NULL) < 0) {
386+
goto error;
387+
}
388+
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
389+
goto error;
390+
}
391+
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "replace\xFF", -1, "replace", NULL) < 0) {
392+
goto error;
393+
}
394+
395+
PyObject *result = PyUnicodeWriter_Finish(writer);
396+
if (result == NULL) {
397+
return NULL;
398+
}
399+
assert(PyUnicode_EqualToUTF8(result, "ignore-replace\xef\xbf\xbd"));
400+
Py_DECREF(result);
401+
402+
Py_RETURN_NONE;
403+
404+
error:
405+
PyUnicodeWriter_Discard(writer);
406+
return NULL;
407+
}
408+
409+
377410
static PyObject *
378411
test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
379412
{
@@ -484,6 +517,7 @@ static PyMethodDef TestMethods[] = {
484517
{"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS},
485518
{"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
486519
{"test_unicodewriter_recover_error", test_unicodewriter_recover_error, METH_NOARGS},
520+
{"test_unicodewriter_decode_utf8", test_unicodewriter_decode_utf8, METH_NOARGS},
487521
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS},
488522
{"test_unicodewriter_format_recover_error", test_unicodewriter_format_recover_error, METH_NOARGS},
489523
{"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS},

Objects/unicodeobject.c

+22
Original file line numberDiff line numberDiff line change
@@ -13501,6 +13501,28 @@ PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
1350113501
}
1350213502

1350313503

13504+
int
13505+
PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer,
13506+
const char *string,
13507+
Py_ssize_t length,
13508+
const char *errors,
13509+
Py_ssize_t *consumed)
13510+
{
13511+
if (length < 0) {
13512+
length = strlen(string);
13513+
}
13514+
13515+
_PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
13516+
Py_ssize_t old_pos = _writer->pos;
13517+
int res = unicode_decode_utf8_writer(_writer, string, length,
13518+
_Py_ERROR_UNKNOWN, errors, consumed);
13519+
if (res < 0) {
13520+
_writer->pos = old_pos;
13521+
}
13522+
return res;
13523+
}
13524+
13525+
1350413526
int
1350513527
PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer,
1350613528
wchar_t *str,

0 commit comments

Comments
 (0)