Skip to content

Commit c00964e

Browse files
authored
gh-124665: Add _PyCodec_UnregisterError and _codecs._unregister_error (#124677)
1 parent 04c837d commit c00964e

File tree

6 files changed

+136
-2
lines changed

6 files changed

+136
-2
lines changed

Include/internal/pycore_codecs.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@ extern void _PyCodec_Fini(PyInterpreterState *interp);
2121

2222
extern PyObject* _PyCodec_Lookup(const char *encoding);
2323

24+
/*
25+
* Un-register the error handling callback function registered under
26+
* the given 'name'. Only custom error handlers can be un-registered.
27+
*
28+
* - Return -1 and set an exception if 'name' refers to a built-in
29+
* error handling name (e.g., 'strict'), or if an error occurred.
30+
* - Return 0 if no custom error handler can be found for 'name'.
31+
* - Return 1 if the custom error handler was successfully removed.
32+
*/
33+
extern int _PyCodec_UnregisterError(const char *name);
34+
2435
/* Text codec specific encoding and decoding API.
2536
2637
Checks the encoding against a list of codecs which do not

Lib/test/test_codeccallbacks.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from _codecs import _unregister_error as _codecs_unregister_error
12
import codecs
23
import html.entities
34
import itertools
@@ -1210,7 +1211,6 @@ def replace_with_long(exc):
12101211
'\ufffd\x00\x00'
12111212
)
12121213

1213-
12141214
def test_fake_error_class(self):
12151215
handlers = [
12161216
codecs.strict_errors,
@@ -1235,6 +1235,31 @@ class FakeUnicodeError(Exception):
12351235
with self.assertRaises((TypeError, FakeUnicodeError)):
12361236
handler(FakeUnicodeError())
12371237

1238+
def test_reject_unregister_builtin_error_handler(self):
1239+
for name in [
1240+
'strict', 'ignore', 'replace', 'backslashreplace', 'namereplace',
1241+
'xmlcharrefreplace', 'surrogateescape', 'surrogatepass',
1242+
]:
1243+
with self.subTest(name):
1244+
self.assertRaises(ValueError, _codecs_unregister_error, name)
1245+
1246+
def test_unregister_custom_error_handler(self):
1247+
def custom_handler(exc):
1248+
raise exc
1249+
1250+
custom_name = 'test.test_unregister_custom_error_handler'
1251+
self.assertRaises(LookupError, codecs.lookup_error, custom_name)
1252+
codecs.register_error(custom_name, custom_handler)
1253+
self.assertIs(codecs.lookup_error(custom_name), custom_handler)
1254+
self.assertTrue(_codecs_unregister_error(custom_name))
1255+
self.assertRaises(LookupError, codecs.lookup_error, custom_name)
1256+
1257+
def test_unregister_custom_unknown_error_handler(self):
1258+
unknown_name = 'test.test_unregister_custom_unknown_error_handler'
1259+
self.assertRaises(LookupError, codecs.lookup_error, unknown_name)
1260+
self.assertFalse(_codecs_unregister_error(unknown_name))
1261+
self.assertRaises(LookupError, codecs.lookup_error, unknown_name)
1262+
12381263

12391264
if __name__ == "__main__":
12401265
unittest.main()

Modules/_codecsmodule.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -979,6 +979,30 @@ _codecs_register_error_impl(PyObject *module, const char *errors,
979979
Py_RETURN_NONE;
980980
}
981981

982+
/*[clinic input]
983+
_codecs._unregister_error -> bool
984+
errors: str
985+
/
986+
987+
Un-register the specified error handler for the error handling `errors'.
988+
989+
Only custom error handlers can be un-registered. An exception is raised
990+
if the error handling is a built-in one (e.g., 'strict'), or if an error
991+
occurs.
992+
993+
Otherwise, this returns True if a custom handler has been successfully
994+
un-registered, and False if no custom handler for the specified error
995+
handling exists.
996+
997+
[clinic start generated code]*/
998+
999+
static int
1000+
_codecs__unregister_error_impl(PyObject *module, const char *errors)
1001+
/*[clinic end generated code: output=28c22be667465503 input=a63ab9e9ce1686d4]*/
1002+
{
1003+
return _PyCodec_UnregisterError(errors);
1004+
}
1005+
9821006
/*[clinic input]
9831007
_codecs.lookup_error
9841008
name: str
@@ -1044,6 +1068,7 @@ static PyMethodDef _codecs_functions[] = {
10441068
_CODECS_CODE_PAGE_ENCODE_METHODDEF
10451069
_CODECS_CODE_PAGE_DECODE_METHODDEF
10461070
_CODECS_REGISTER_ERROR_METHODDEF
1071+
_CODECS__UNREGISTER_ERROR_METHODDEF
10471072
_CODECS_LOOKUP_ERROR_METHODDEF
10481073
{NULL, NULL} /* sentinel */
10491074
};

Modules/clinic/_codecsmodule.c.h

Lines changed: 51 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/codecs.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ Copyright (c) Corporation for National Research Initiatives.
1616
#include "pycore_pystate.h" // _PyInterpreterState_GET()
1717
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
1818

19+
static const char *codecs_builtin_error_handlers[] = {
20+
"strict", "ignore", "replace",
21+
"xmlcharrefreplace", "backslashreplace", "namereplace",
22+
"surrogatepass", "surrogateescape",
23+
};
24+
1925
const char *Py_hexdigits = "0123456789abcdef";
2026

2127
/* --- Codec Registry ----------------------------------------------------- */
@@ -618,6 +624,20 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
618624
name, error);
619625
}
620626

627+
int _PyCodec_UnregisterError(const char *name)
628+
{
629+
for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_builtin_error_handlers); ++i) {
630+
if (strcmp(name, codecs_builtin_error_handlers[i]) == 0) {
631+
PyErr_Format(PyExc_ValueError,
632+
"cannot un-register built-in error handler '%s'", name);
633+
return -1;
634+
}
635+
}
636+
PyInterpreterState *interp = _PyInterpreterState_GET();
637+
assert(interp->codecs.initialized);
638+
return PyDict_PopString(interp->codecs.error_registry, name, NULL);
639+
}
640+
621641
/* Lookup the error handling callback function registered under the
622642
name error. As a special case NULL can be passed, in which case
623643
the error handling callback for strict encoding will be returned. */
@@ -1470,6 +1490,8 @@ _PyCodec_InitRegistry(PyInterpreterState *interp)
14701490
}
14711491
}
14721492
};
1493+
// ensure that the built-in error handlers' names are kept in sync
1494+
assert(Py_ARRAY_LENGTH(methods) == Py_ARRAY_LENGTH(codecs_builtin_error_handlers));
14731495

14741496
assert(interp->codecs.initialized == 0);
14751497
interp->codecs.search_path = PyList_New(0);

Tools/c-analyzer/cpython/ignored.tsv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ Python/ast_opt.c fold_unaryop ops -
345345
Python/ceval.c - _PyEval_BinaryOps -
346346
Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS -
347347
Python/codecs.c - Py_hexdigits -
348+
Python/codecs.c - codecs_builtin_error_handlers -
348349
Python/codecs.c - ucnhash_capi -
349350
Python/codecs.c _PyCodec_InitRegistry methods -
350351
Python/compile.c - NO_LOCATION -

0 commit comments

Comments
 (0)