Skip to content

Commit 629634a

Browse files
committed
pythongh-111495: improve test coverage of codecs C API (pythonGH-126030)
For now, skip some crashers (tracked in pythongh-123378).
1 parent e9269ce commit 629634a

File tree

1 file changed

+115
-23
lines changed

1 file changed

+115
-23
lines changed

Lib/test/test_capi/test_codecs.py

+115-23
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,49 @@ def test_codec_stream_writer(self):
747747

748748
class CAPICodecErrors(unittest.TestCase):
749749

750+
@classmethod
751+
def _generate_exception_args(cls):
752+
for objlen in range(5):
753+
maxind = 2 * max(2, objlen)
754+
for start in range(-maxind, maxind + 1):
755+
for end in range(-maxind, maxind + 1):
756+
yield objlen, start, end
757+
758+
@classmethod
759+
def generate_encode_errors(cls):
760+
return tuple(
761+
UnicodeEncodeError('utf-8', '0' * objlen, start, end, 'why')
762+
for objlen, start, end in cls._generate_exception_args()
763+
)
764+
765+
@classmethod
766+
def generate_decode_errors(cls):
767+
return tuple(
768+
UnicodeDecodeError('utf-8', b'0' * objlen, start, end, 'why')
769+
for objlen, start, end in cls._generate_exception_args()
770+
)
771+
772+
@classmethod
773+
def generate_translate_errors(cls):
774+
return tuple(
775+
UnicodeTranslateError('0' * objlen, start, end, 'why')
776+
for objlen, start, end in cls._generate_exception_args()
777+
)
778+
779+
@classmethod
780+
def setUpClass(cls):
781+
cls.unicode_encode_errors = cls.generate_encode_errors()
782+
cls.unicode_decode_errors = cls.generate_decode_errors()
783+
cls.unicode_translate_errors = cls.generate_translate_errors()
784+
cls.all_unicode_errors = (
785+
cls.unicode_encode_errors
786+
+ cls.unicode_decode_errors
787+
+ cls.unicode_translate_errors
788+
)
789+
cls.bad_unicode_errors = (
790+
ValueError(),
791+
)
792+
750793
def test_codec_register_error(self):
751794
# for cleaning up between tests
752795
from _codecs import _unregister_error as _codecs_unregister_error
@@ -780,33 +823,82 @@ def test_codec_lookup_error(self):
780823
self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
781824
self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
782825
self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
826+
self.assertIs(codec_lookup_error('backslashreplace'), codecs.backslashreplace_errors)
783827
self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
784828
self.assertRaises(LookupError, codec_lookup_error, 'unknown')
785829

786-
def test_codec_error_handlers(self):
787-
exceptions = [
788-
# A UnicodeError with an empty message currently crashes:
789-
# See: https://github.com/python/cpython/issues/123378
790-
# UnicodeEncodeError('bad', '', 0, 1, 'reason'),
791-
UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
792-
UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
793-
UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
794-
]
795-
796-
strict_handler = _testcapi.codec_strict_errors
830+
def test_codec_strict_errors_handler(self):
831+
handler = _testcapi.codec_strict_errors
832+
for exc in self.all_unicode_errors + self.bad_unicode_errors:
833+
with self.subTest(handler=handler, exc=exc):
834+
self.assertRaises(type(exc), handler, exc)
835+
836+
def test_codec_ignore_errors_handler(self):
837+
handler = _testcapi.codec_ignore_errors
838+
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
839+
840+
def test_codec_replace_errors_handler(self):
841+
handler = _testcapi.codec_replace_errors
842+
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
843+
844+
def test_codec_xmlcharrefreplace_errors_handler(self):
845+
handler = _testcapi.codec_xmlcharrefreplace_errors
846+
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
847+
848+
def test_codec_backslashreplace_errors_handler(self):
849+
handler = _testcapi.codec_backslashreplace_errors
850+
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
851+
852+
def test_codec_namereplace_errors_handler(self):
853+
handler = _testlimitedcapi.codec_namereplace_errors
854+
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
855+
856+
def do_test_codec_errors_handler(self, handler, exceptions):
857+
at_least_one = False
797858
for exc in exceptions:
798-
with self.subTest(handler=strict_handler, exc=exc):
799-
self.assertRaises(UnicodeEncodeError, strict_handler, exc)
800-
801-
for handler in [
802-
_testcapi.codec_ignore_errors,
803-
_testcapi.codec_replace_errors,
804-
_testcapi.codec_xmlcharrefreplace_errors,
805-
_testlimitedcapi.codec_namereplace_errors,
806-
]:
807-
for exc in exceptions:
808-
with self.subTest(handler=handler, exc=exc):
809-
self.assertIsInstance(handler(exc), tuple)
859+
# See https://github.com/python/cpython/issues/123378 and related
860+
# discussion and issues for details.
861+
if self._exception_may_crash(exc):
862+
continue
863+
864+
at_least_one = True
865+
with self.subTest(handler=handler, exc=exc):
866+
# test that the handler does not crash
867+
self.assertIsInstance(handler(exc), tuple)
868+
869+
if exceptions:
870+
self.assertTrue(at_least_one, "all exceptions are crashing")
871+
872+
for bad_exc in (
873+
self.bad_unicode_errors
874+
+ tuple(e for e in self.all_unicode_errors if e not in exceptions)
875+
):
876+
with self.subTest('bad type', handler=handler, exc=bad_exc):
877+
self.assertRaises(TypeError, handler, bad_exc)
878+
879+
@classmethod
880+
def _exception_may_crash(cls, exc):
881+
"""Indicate whether a Unicode exception might currently crash
882+
the interpreter when used by a built-in codecs error handler.
883+
884+
Until gh-123378 is fixed, we skip the tests for these exceptions.
885+
886+
This should only be used by "do_test_codec_errors_handler".
887+
"""
888+
message, start, end = exc.object, exc.start, exc.end
889+
match exc:
890+
case UnicodeEncodeError():
891+
return end < start or (end - start) >= len(message)
892+
case UnicodeDecodeError():
893+
# The case "end - start >= len(message)" does not crash.
894+
return end < start
895+
case UnicodeTranslateError():
896+
# Test "end <= start" because PyCodec_ReplaceErrors checks
897+
# the Unicode kind of a 0-length string which by convention
898+
# is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as
899+
# the handler currently expects.
900+
return end <= start or (end - start) >= len(message)
901+
return False
810902

811903

812904
if __name__ == "__main__":

0 commit comments

Comments
 (0)