Skip to content

Commit 3146a25

Browse files
authored
gh-129173: refactor PyCodec_BackslashReplaceErrors into separate functions (#129895)
The logic of `PyCodec_BackslashReplaceErrors` is now split into separate functions, each of which handling a specific exception type.
1 parent 04091c0 commit 3146a25

File tree

1 file changed

+70
-41
lines changed

1 file changed

+70
-41
lines changed

Python/codecs.c

Lines changed: 70 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -956,49 +956,18 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
956956
return restuple;
957957
}
958958

959-
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
959+
960+
// --- handler: 'backslashreplace' --------------------------------------------
961+
962+
static PyObject *
963+
_PyCodec_BackslashReplaceUnicodeEncodeError(PyObject *exc)
960964
{
961965
PyObject *obj;
962966
Py_ssize_t objlen, start, end, slen;
963-
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
964-
if (_PyUnicodeError_GetParams(exc,
965-
&obj, &objlen,
966-
&start, &end, &slen, true) < 0)
967-
{
968-
return NULL;
969-
}
970-
PyObject *res = PyUnicode_New(4 * slen, 127);
971-
if (res == NULL) {
972-
Py_DECREF(obj);
973-
return NULL;
974-
}
975-
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
976-
const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
977-
for (Py_ssize_t i = start; i < end; i++, outp += 4) {
978-
const unsigned char ch = p[i];
979-
outp[0] = '\\';
980-
outp[1] = 'x';
981-
outp[2] = Py_hexdigits[(ch >> 4) & 0xf];
982-
outp[3] = Py_hexdigits[ch & 0xf];
983-
}
984-
assert(_PyUnicode_CheckConsistency(res, 1));
985-
Py_DECREF(obj);
986-
return Py_BuildValue("(Nn)", res, end);
987-
}
988-
989-
if (
990-
PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)
991-
|| PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)
992-
) {
993-
if (_PyUnicodeError_GetParams(exc,
994-
&obj, &objlen,
995-
&start, &end, &slen, false) < 0)
996-
{
997-
return NULL;
998-
}
999-
}
1000-
else {
1001-
wrong_exception_type(exc);
967+
if (_PyUnicodeError_GetParams(exc,
968+
&obj, &objlen,
969+
&start, &end, &slen, false) < 0)
970+
{
1002971
return NULL;
1003972
}
1004973

@@ -1035,6 +1004,65 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
10351004
}
10361005

10371006

1007+
static PyObject *
1008+
_PyCodec_BackslashReplaceUnicodeDecodeError(PyObject *exc)
1009+
{
1010+
PyObject *obj;
1011+
Py_ssize_t objlen, start, end, slen;
1012+
if (_PyUnicodeError_GetParams(exc,
1013+
&obj, &objlen,
1014+
&start, &end, &slen, true) < 0)
1015+
{
1016+
return NULL;
1017+
}
1018+
1019+
PyObject *res = PyUnicode_New(4 * slen, 127);
1020+
if (res == NULL) {
1021+
Py_DECREF(obj);
1022+
return NULL;
1023+
}
1024+
1025+
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
1026+
const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
1027+
for (Py_ssize_t i = start; i < end; i++, outp += 4) {
1028+
const unsigned char ch = p[i];
1029+
outp[0] = '\\';
1030+
outp[1] = 'x';
1031+
outp[2] = Py_hexdigits[(ch >> 4) & 0xf];
1032+
outp[3] = Py_hexdigits[ch & 0xf];
1033+
}
1034+
assert(_PyUnicode_CheckConsistency(res, 1));
1035+
Py_DECREF(obj);
1036+
return Py_BuildValue("(Nn)", res, end);
1037+
}
1038+
1039+
1040+
static inline PyObject *
1041+
_PyCodec_BackslashReplaceUnicodeTranslateError(PyObject *exc)
1042+
{
1043+
// Same implementation as for UnicodeEncodeError objects.
1044+
return _PyCodec_BackslashReplaceUnicodeEncodeError(exc);
1045+
}
1046+
1047+
1048+
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
1049+
{
1050+
if (_PyIsUnicodeEncodeError(exc)) {
1051+
return _PyCodec_BackslashReplaceUnicodeEncodeError(exc);
1052+
}
1053+
else if (_PyIsUnicodeDecodeError(exc)) {
1054+
return _PyCodec_BackslashReplaceUnicodeDecodeError(exc);
1055+
}
1056+
else if (_PyIsUnicodeTranslateError(exc)) {
1057+
return _PyCodec_BackslashReplaceUnicodeTranslateError(exc);
1058+
}
1059+
else {
1060+
wrong_exception_type(exc);
1061+
return NULL;
1062+
}
1063+
}
1064+
1065+
10381066
// --- handler: 'namereplace' -------------------------------------------------
10391067

10401068
PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
@@ -1502,7 +1530,8 @@ xmlcharrefreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
15021530
}
15031531

15041532

1505-
static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
1533+
static inline PyObject *
1534+
backslashreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
15061535
{
15071536
return PyCodec_BackslashReplaceErrors(exc);
15081537
}

0 commit comments

Comments
 (0)