|
1 | 1 | use crate::error::{Error, ErrorCode, Result};
|
2 | 2 | use alloc::vec::Vec;
|
3 |
| -use core::char; |
4 | 3 | use core::cmp;
|
5 | 4 | use core::mem;
|
6 | 5 | use core::ops::Deref;
|
@@ -957,25 +956,64 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
|
957 | 956 | return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
|
958 | 957 | }
|
959 | 958 |
|
960 |
| - let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; |
961 |
| - |
962 |
| - match char::from_u32(n) { |
963 |
| - Some(c) => c, |
964 |
| - None => { |
965 |
| - return error(read, ErrorCode::InvalidUnicodeCodePoint); |
966 |
| - } |
967 |
| - } |
| 959 | + // This value is in range U+10000..=U+10FFFF, which is always a |
| 960 | + // valid codepoint. |
| 961 | + (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000 |
968 | 962 | }
|
969 | 963 |
|
970 | 964 | // Every u16 outside of the surrogate ranges above is guaranteed
|
971 | 965 | // to be a legal char.
|
972 |
| - n => char::from_u32(n as u32).unwrap(), |
| 966 | + n => n as u32, |
973 | 967 | };
|
974 | 968 |
|
975 |
| - scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes()); |
| 969 | + // SAFETY: c is always a codepoint. |
| 970 | + unsafe { |
| 971 | + push_utf8_codepoint(c, scratch); |
| 972 | + } |
976 | 973 | Ok(())
|
977 | 974 | }
|
978 | 975 |
|
| 976 | +/// Adds a UTF-8 codepoint to the end of the buffer. This is a more efficient |
| 977 | +/// implementation of String::push. n must be a valid codepoint. |
| 978 | +#[inline] |
| 979 | +unsafe fn push_utf8_codepoint(n: u32, scratch: &mut Vec<u8>) { |
| 980 | + if n < 0x80 { |
| 981 | + scratch.push(n as u8); |
| 982 | + return; |
| 983 | + } |
| 984 | + |
| 985 | + scratch.reserve(4); |
| 986 | + |
| 987 | + unsafe { |
| 988 | + let ptr = scratch.as_mut_ptr().add(scratch.len()); |
| 989 | + |
| 990 | + let encoded_len = match n { |
| 991 | + 0..=0x7F => unreachable!(), |
| 992 | + 0x80..=0x7FF => { |
| 993 | + ptr.write((n >> 6 & 0b0001_1111) as u8 | 0b1100_0000); |
| 994 | + 2 |
| 995 | + } |
| 996 | + 0x800..=0xFFFF => { |
| 997 | + ptr.write((n >> 12 & 0b0000_1111) as u8 | 0b1110_0000); |
| 998 | + ptr.add(1).write((n >> 6 & 0b0011_1111) as u8 | 0b1000_0000); |
| 999 | + 3 |
| 1000 | + } |
| 1001 | + 0x1_0000..=0x10_FFFF => { |
| 1002 | + ptr.write((n >> 18 & 0b0000_0111) as u8 | 0b1111_0000); |
| 1003 | + ptr.add(1) |
| 1004 | + .write((n >> 12 & 0b0011_1111) as u8 | 0b1000_0000); |
| 1005 | + ptr.add(2).write((n >> 6 & 0b0011_1111) as u8 | 0b1000_0000); |
| 1006 | + 4 |
| 1007 | + } |
| 1008 | + 0x11_0000.. => unreachable!(), |
| 1009 | + }; |
| 1010 | + ptr.add(encoded_len - 1) |
| 1011 | + .write((n & 0b0011_1111) as u8 | 0b1000_0000); |
| 1012 | + |
| 1013 | + scratch.set_len(scratch.len() + encoded_len); |
| 1014 | + } |
| 1015 | +} |
| 1016 | + |
979 | 1017 | /// Parses a JSON escape sequence and discards the value. Assumes the previous
|
980 | 1018 | /// byte read was a backslash.
|
981 | 1019 | fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
|
|
0 commit comments