@@ -747,6 +747,49 @@ def test_codec_stream_writer(self):
747
747
748
748
class CAPICodecErrors (unittest .TestCase ):
749
749
750
+ @classmethod
751
+ def _generate_exception_args (cls ):
752
+ for objlen in range (5 ):
753
+ maxind = 2 * max (2 , objlen )
754
+ for start in range (- maxind , maxind + 1 ):
755
+ for end in range (- maxind , maxind + 1 ):
756
+ yield objlen , start , end
757
+
758
+ @classmethod
759
+ def generate_encode_errors (cls ):
760
+ return tuple (
761
+ UnicodeEncodeError ('utf-8' , '0' * objlen , start , end , 'why' )
762
+ for objlen , start , end in cls ._generate_exception_args ()
763
+ )
764
+
765
+ @classmethod
766
+ def generate_decode_errors (cls ):
767
+ return tuple (
768
+ UnicodeDecodeError ('utf-8' , b'0' * objlen , start , end , 'why' )
769
+ for objlen , start , end in cls ._generate_exception_args ()
770
+ )
771
+
772
+ @classmethod
773
+ def generate_translate_errors (cls ):
774
+ return tuple (
775
+ UnicodeTranslateError ('0' * objlen , start , end , 'why' )
776
+ for objlen , start , end in cls ._generate_exception_args ()
777
+ )
778
+
779
+ @classmethod
780
+ def setUpClass (cls ):
781
+ cls .unicode_encode_errors = cls .generate_encode_errors ()
782
+ cls .unicode_decode_errors = cls .generate_decode_errors ()
783
+ cls .unicode_translate_errors = cls .generate_translate_errors ()
784
+ cls .all_unicode_errors = (
785
+ cls .unicode_encode_errors
786
+ + cls .unicode_decode_errors
787
+ + cls .unicode_translate_errors
788
+ )
789
+ cls .bad_unicode_errors = (
790
+ ValueError (),
791
+ )
792
+
750
793
def test_codec_register_error (self ):
751
794
# for cleaning up between tests
752
795
from _codecs import _unregister_error as _codecs_unregister_error
@@ -780,33 +823,82 @@ def test_codec_lookup_error(self):
780
823
self .assertIs (codec_lookup_error ('ignore' ), codecs .ignore_errors )
781
824
self .assertIs (codec_lookup_error ('replace' ), codecs .replace_errors )
782
825
self .assertIs (codec_lookup_error ('xmlcharrefreplace' ), codecs .xmlcharrefreplace_errors )
826
+ self .assertIs (codec_lookup_error ('backslashreplace' ), codecs .backslashreplace_errors )
783
827
self .assertIs (codec_lookup_error ('namereplace' ), codecs .namereplace_errors )
784
828
self .assertRaises (LookupError , codec_lookup_error , 'unknown' )
785
829
786
- def test_codec_error_handlers (self ):
787
- exceptions = [
788
- # A UnicodeError with an empty message currently crashes:
789
- # See: https://github.com/python/cpython/issues/123378
790
- # UnicodeEncodeError('bad', '', 0, 1, 'reason'),
791
- UnicodeEncodeError ('bad' , 'x' , 0 , 1 , 'reason' ),
792
- UnicodeEncodeError ('bad' , 'xyz123' , 0 , 1 , 'reason' ),
793
- UnicodeEncodeError ('bad' , 'xyz123' , 1 , 4 , 'reason' ),
794
- ]
795
-
796
- strict_handler = _testcapi .codec_strict_errors
830
+ def test_codec_strict_errors_handler (self ):
831
+ handler = _testcapi .codec_strict_errors
832
+ for exc in self .all_unicode_errors + self .bad_unicode_errors :
833
+ with self .subTest (handler = handler , exc = exc ):
834
+ self .assertRaises (type (exc ), handler , exc )
835
+
836
+ def test_codec_ignore_errors_handler (self ):
837
+ handler = _testcapi .codec_ignore_errors
838
+ self .do_test_codec_errors_handler (handler , self .all_unicode_errors )
839
+
840
+ def test_codec_replace_errors_handler (self ):
841
+ handler = _testcapi .codec_replace_errors
842
+ self .do_test_codec_errors_handler (handler , self .all_unicode_errors )
843
+
844
+ def test_codec_xmlcharrefreplace_errors_handler (self ):
845
+ handler = _testcapi .codec_xmlcharrefreplace_errors
846
+ self .do_test_codec_errors_handler (handler , self .unicode_encode_errors )
847
+
848
+ def test_codec_backslashreplace_errors_handler (self ):
849
+ handler = _testcapi .codec_backslashreplace_errors
850
+ self .do_test_codec_errors_handler (handler , self .all_unicode_errors )
851
+
852
+ def test_codec_namereplace_errors_handler (self ):
853
+ handler = _testlimitedcapi .codec_namereplace_errors
854
+ self .do_test_codec_errors_handler (handler , self .unicode_encode_errors )
855
+
856
+ def do_test_codec_errors_handler (self , handler , exceptions ):
857
+ at_least_one = False
797
858
for exc in exceptions :
798
- with self .subTest (handler = strict_handler , exc = exc ):
799
- self .assertRaises (UnicodeEncodeError , strict_handler , exc )
800
-
801
- for handler in [
802
- _testcapi .codec_ignore_errors ,
803
- _testcapi .codec_replace_errors ,
804
- _testcapi .codec_xmlcharrefreplace_errors ,
805
- _testlimitedcapi .codec_namereplace_errors ,
806
- ]:
807
- for exc in exceptions :
808
- with self .subTest (handler = handler , exc = exc ):
809
- self .assertIsInstance (handler (exc ), tuple )
859
+ # See https://github.com/python/cpython/issues/123378 and related
860
+ # discussion and issues for details.
861
+ if self ._exception_may_crash (exc ):
862
+ continue
863
+
864
+ at_least_one = True
865
+ with self .subTest (handler = handler , exc = exc ):
866
+ # test that the handler does not crash
867
+ self .assertIsInstance (handler (exc ), tuple )
868
+
869
+ if exceptions :
870
+ self .assertTrue (at_least_one , "all exceptions are crashing" )
871
+
872
+ for bad_exc in (
873
+ self .bad_unicode_errors
874
+ + tuple (e for e in self .all_unicode_errors if e not in exceptions )
875
+ ):
876
+ with self .subTest ('bad type' , handler = handler , exc = bad_exc ):
877
+ self .assertRaises (TypeError , handler , bad_exc )
878
+
879
+ @classmethod
880
+ def _exception_may_crash (cls , exc ):
881
+ """Indicate whether a Unicode exception might currently crash
882
+ the interpreter when used by a built-in codecs error handler.
883
+
884
+ Until gh-123378 is fixed, we skip the tests for these exceptions.
885
+
886
+ This should only be used by "do_test_codec_errors_handler".
887
+ """
888
+ message , start , end = exc .object , exc .start , exc .end
889
+ match exc :
890
+ case UnicodeEncodeError ():
891
+ return end < start or (end - start ) >= len (message )
892
+ case UnicodeDecodeError ():
893
+ # The case "end - start >= len(message)" does not crash.
894
+ return end < start
895
+ case UnicodeTranslateError ():
896
+ # Test "end <= start" because PyCodec_ReplaceErrors checks
897
+ # the Unicode kind of a 0-length string which by convention
898
+ # is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as
899
+ # the handler currently expects.
900
+ return end <= start or (end - start ) >= len (message )
901
+ return False
810
902
811
903
812
904
if __name__ == "__main__" :
0 commit comments