@@ -898,20 +898,12 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
898
898
validate : bool ,
899
899
scratch : & mut Vec < u8 > ,
900
900
) -> Result < ( ) > {
901
- fn encode_surrogate ( scratch : & mut Vec < u8 > , n : u16 ) {
902
- scratch. extend_from_slice ( & [
903
- ( n >> 12 & 0b0000_1111 ) as u8 | 0b1110_0000 ,
904
- ( n >> 6 & 0b0011_1111 ) as u8 | 0b1000_0000 ,
905
- ( n & 0b0011_1111 ) as u8 | 0b1000_0000 ,
906
- ] ) ;
907
- }
908
-
909
901
let c = match tri ! ( read. decode_hex_escape( ) ) {
910
902
n @ 0xDC00 ..=0xDFFF => {
911
903
return if validate {
912
904
error ( read, ErrorCode :: LoneLeadingSurrogateInHexEscape )
913
905
} else {
914
- encode_surrogate ( scratch , n ) ;
906
+ push_wtf8_codepoint ( n as u32 , scratch ) ;
915
907
Ok ( ( ) )
916
908
} ;
917
909
}
@@ -928,7 +920,7 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
928
920
read. discard ( ) ;
929
921
error ( read, ErrorCode :: UnexpectedEndOfHexEscape )
930
922
} else {
931
- encode_surrogate ( scratch , n1 ) ;
923
+ push_wtf8_codepoint ( n1 as u32 , scratch ) ;
932
924
Ok ( ( ) )
933
925
} ;
934
926
}
@@ -940,7 +932,7 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
940
932
read. discard ( ) ;
941
933
error ( read, ErrorCode :: UnexpectedEndOfHexEscape )
942
934
} else {
943
- encode_surrogate ( scratch , n1 ) ;
935
+ push_wtf8_codepoint ( n1 as u32 , scratch ) ;
944
936
// The \ prior to this byte started an escape sequence,
945
937
// so we need to parse that now. This recursive call
946
938
// does not blow the stack on malicious input because
@@ -966,17 +958,14 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
966
958
n => n as u32 ,
967
959
} ;
968
960
969
- // SAFETY: c is always a codepoint.
970
- unsafe {
971
- push_utf8_codepoint ( c, scratch) ;
972
- }
961
+ push_wtf8_codepoint ( c, scratch) ;
973
962
Ok ( ( ) )
974
963
}
975
964
976
- /// Adds a UTF -8 codepoint to the end of the buffer. This is a more efficient
977
- /// implementation of String::push. n must be a valid codepoint .
965
+ /// Adds a WTF -8 codepoint to the end of the buffer. This is a more efficient
966
+ /// implementation of String::push. The codepoint may be a surrogate .
978
967
#[ inline]
979
- unsafe fn push_utf8_codepoint ( n : u32 , scratch : & mut Vec < u8 > ) {
968
+ fn push_wtf8_codepoint ( n : u32 , scratch : & mut Vec < u8 > ) {
980
969
if n < 0x80 {
981
970
scratch. push ( n as u8 ) ;
982
971
return ;
0 commit comments