Skip to content

gh-124665: add _PyCodec_UnregisterError and _codecs._unregister_error #124677

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
24df416
Specify constants for native error policies.
picnixz Sep 27, 2024
a65c1b7
Add `PyCodec_UnregisterError` C API function
picnixz Sep 27, 2024
c1ff9e6
Expose `PyCodec_UnregisterError` as `codecs.unregister_error`
picnixz Sep 27, 2024
5837019
Update stable ABI files.
picnixz Sep 27, 2024
80d1ceb
update ignored.tsv
picnixz Sep 27, 2024
a134f23
add tests
picnixz Sep 27, 2024
47971e9
add docs
picnixz Sep 27, 2024
c00ad8d
add What's New entry
picnixz Sep 27, 2024
cf3bc2f
blurb
picnixz Sep 27, 2024
2108308
docs fix
picnixz Sep 27, 2024
1036516
simplify logic
picnixz Sep 27, 2024
ae601fe
nit: error message standardization
picnixz Sep 27, 2024
864c1ab
Make it a private utility for now.
picnixz Sep 28, 2024
3d9ff15
Revert "docs fix"
picnixz Sep 28, 2024
dbf0d5b
Revert "blurb"
picnixz Sep 28, 2024
5ba676c
Revert "add What's New entry"
picnixz Sep 28, 2024
8a0a0a9
Revert "add docs"
picnixz Sep 28, 2024
7915fa1
Revert "Update stable ABI files."
picnixz Sep 28, 2024
8eaa147
simplify array logic
picnixz Sep 28, 2024
7f41aec
update tests
picnixz Sep 28, 2024
988b34d
fix tests
picnixz Sep 28, 2024
c6e1d98
Duplicate standard error policies to avoid indirection.
picnixz Sep 28, 2024
dd6d210
standardize names
picnixz Sep 28, 2024
b68e54e
PEP 7
picnixz Sep 28, 2024
2c528fe
standardize names
picnixz Sep 28, 2024
c95cd65
revert renaming as well
picnixz Sep 28, 2024
101eb76
use 'built-in' instead of 'standard' terminology
picnixz Sep 28, 2024
903238a
use 'random.getrandbits' instead of 'os.urandom' to generate random IDs
picnixz Sep 28, 2024
a6c692d
address Victor's review
picnixz Sep 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Include/internal/pycore_codecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ extern void _PyCodec_Fini(PyInterpreterState *interp);

extern PyObject* _PyCodec_Lookup(const char *encoding);

/*
* Un-register the error handling callback function registered under
* the given 'name'. Only custom error handlers can be un-registered.
*
* - Return -1 and set an exception if 'name' refers to a built-in
* error handling name (e.g., 'strict'), or if an error occurred.
* - Return 0 if no custom error handler can be found for 'name'.
* - Return 1 if the custom error handler was successfully removed.
*/
extern int _PyCodec_UnregisterError(const char *name);

/* Text codec specific encoding and decoding API.

Checks the encoding against a list of codecs which do not
Expand Down
27 changes: 26 additions & 1 deletion Lib/test/test_codeccallbacks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from _codecs import _unregister_error as _codecs_unregister_error
import codecs
import html.entities
import itertools
Expand Down Expand Up @@ -1210,7 +1211,6 @@ def replace_with_long(exc):
'\ufffd\x00\x00'
)


def test_fake_error_class(self):
handlers = [
codecs.strict_errors,
Expand All @@ -1235,6 +1235,31 @@ class FakeUnicodeError(Exception):
with self.assertRaises((TypeError, FakeUnicodeError)):
handler(FakeUnicodeError())

def test_reject_unregister_builtin_error_handler(self):
for name in [
'strict', 'ignore', 'replace', 'backslashreplace', 'namereplace',
'xmlcharrefreplace', 'surrogateescape', 'surrogatepass',
]:
with self.subTest(name):
self.assertRaises(ValueError, _codecs_unregister_error, name)

def test_unregister_custom_error_handler(self):
def custom_handler(exc):
raise exc

custom_name = 'test.test_unregister_custom_error_handler'
self.assertRaises(LookupError, codecs.lookup_error, custom_name)
codecs.register_error(custom_name, custom_handler)
self.assertIs(codecs.lookup_error(custom_name), custom_handler)
self.assertTrue(_codecs_unregister_error(custom_name))
self.assertRaises(LookupError, codecs.lookup_error, custom_name)

def test_unregister_custom_unknown_error_handler(self):
unknown_name = 'test.test_unregister_custom_unknown_error_handler'
self.assertRaises(LookupError, codecs.lookup_error, unknown_name)
self.assertFalse(_codecs_unregister_error(unknown_name))
self.assertRaises(LookupError, codecs.lookup_error, unknown_name)


if __name__ == "__main__":
unittest.main()
25 changes: 25 additions & 0 deletions Modules/_codecsmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,30 @@ _codecs_register_error_impl(PyObject *module, const char *errors,
Py_RETURN_NONE;
}

/*[clinic input]
_codecs._unregister_error -> bool
errors: str
/

Un-register the specified error handler for the error handling `errors'.

Only custom error handlers can be un-registered. An exception is raised
if the error handling is a built-in one (e.g., 'strict'), or if an error
occurs.

Otherwise, this returns True if a custom handler has been successfully
un-registered, and False if no custom handler for the specified error
handling exists.

[clinic start generated code]*/

static int
_codecs__unregister_error_impl(PyObject *module, const char *errors)
/*[clinic end generated code: output=28c22be667465503 input=a63ab9e9ce1686d4]*/
{
return _PyCodec_UnregisterError(errors);
}

/*[clinic input]
_codecs.lookup_error
name: str
Expand Down Expand Up @@ -1044,6 +1068,7 @@ static PyMethodDef _codecs_functions[] = {
_CODECS_CODE_PAGE_ENCODE_METHODDEF
_CODECS_CODE_PAGE_DECODE_METHODDEF
_CODECS_REGISTER_ERROR_METHODDEF
_CODECS__UNREGISTER_ERROR_METHODDEF
_CODECS_LOOKUP_ERROR_METHODDEF
{NULL, NULL} /* sentinel */
};
Expand Down
52 changes: 51 additions & 1 deletion Modules/clinic/_codecsmodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions Python/codecs.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ Copyright (c) Corporation for National Research Initiatives.
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI

static const char *codecs_builtin_error_handlers[] = {
"strict", "ignore", "replace",
"xmlcharrefreplace", "backslashreplace", "namereplace",
"surrogatepass", "surrogateescape",
};

const char *Py_hexdigits = "0123456789abcdef";

/* --- Codec Registry ----------------------------------------------------- */
Expand Down Expand Up @@ -618,6 +624,20 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
name, error);
}

int _PyCodec_UnregisterError(const char *name)
{
for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_builtin_error_handlers); ++i) {
if (strcmp(name, codecs_builtin_error_handlers[i]) == 0) {
PyErr_Format(PyExc_ValueError,
"cannot un-register built-in error handler '%s'", name);
return -1;
}
}
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(interp->codecs.initialized);
return PyDict_PopString(interp->codecs.error_registry, name, NULL);
}

/* Lookup the error handling callback function registered under the
name error. As a special case NULL can be passed, in which case
the error handling callback for strict encoding will be returned. */
Expand Down Expand Up @@ -1470,6 +1490,8 @@ _PyCodec_InitRegistry(PyInterpreterState *interp)
}
}
};
// ensure that the built-in error handlers' names are kept in sync
assert(Py_ARRAY_LENGTH(methods) == Py_ARRAY_LENGTH(codecs_builtin_error_handlers));

assert(interp->codecs.initialized == 0);
interp->codecs.search_path = PyList_New(0);
Expand Down
1 change: 1 addition & 0 deletions Tools/c-analyzer/cpython/ignored.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ Python/ast_opt.c fold_unaryop ops -
Python/ceval.c - _PyEval_BinaryOps -
Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS -
Python/codecs.c - Py_hexdigits -
Python/codecs.c - codecs_builtin_error_handlers -
Python/codecs.c - ucnhash_capi -
Python/codecs.c _PyCodec_InitRegistry methods -
Python/compile.c - NO_LOCATION -
Expand Down
Loading