Skip to content

Commit d8921cd

Browse files
authored
Merge pull request #1172 from iex-rs/faster-hex
Parse \uXXXX escapes faster
2 parents b4bc643 + 86d0e11 commit d8921cd

File tree

2 files changed

+57
-55
lines changed

2 files changed

+57
-55
lines changed

src/read.rs

+54-52
Original file line numberDiff line numberDiff line change
@@ -361,16 +361,14 @@ where
361361
}
362362

363363
fn decode_hex_escape(&mut self) -> Result<u16> {
364-
let mut n = 0;
365-
for _ in 0..4 {
366-
match decode_hex_val(tri!(next_or_eof(self))) {
367-
None => return error(self, ErrorCode::InvalidEscape),
368-
Some(val) => {
369-
n = (n << 4) + val;
370-
}
371-
}
364+
let a = tri!(next_or_eof(self));
365+
let b = tri!(next_or_eof(self));
366+
let c = tri!(next_or_eof(self));
367+
let d = tri!(next_or_eof(self));
368+
match decode_four_hex_digits(a, b, c, d) {
369+
Some(val) => Ok(val),
370+
None => error(self, ErrorCode::InvalidEscape),
372371
}
373-
Ok(n)
374372
}
375373

376374
#[cfg(feature = "raw_value")]
@@ -609,24 +607,21 @@ impl<'a> Read<'a> for SliceRead<'a> {
609607
}
610608
}
611609

610+
#[inline]
612611
fn decode_hex_escape(&mut self) -> Result<u16> {
613-
if self.index + 4 > self.slice.len() {
614-
self.index = self.slice.len();
615-
return error(self, ErrorCode::EofWhileParsingString);
616-
}
617-
618-
let mut n = 0;
619-
for _ in 0..4 {
620-
let ch = decode_hex_val(self.slice[self.index]);
621-
self.index += 1;
622-
match ch {
623-
None => return error(self, ErrorCode::InvalidEscape),
624-
Some(val) => {
625-
n = (n << 4) + val;
612+
match self.slice[self.index..] {
613+
[a, b, c, d, ..] => {
614+
self.index += 4;
615+
match decode_four_hex_digits(a, b, c, d) {
616+
Some(val) => Ok(val),
617+
None => error(self, ErrorCode::InvalidEscape),
626618
}
627619
}
620+
_ => {
621+
self.index = self.slice.len();
622+
error(self, ErrorCode::EofWhileParsingString)
623+
}
628624
}
629-
Ok(n)
630625
}
631626

632627
#[cfg(feature = "raw_value")]
@@ -993,34 +988,41 @@ where
993988
Ok(())
994989
}
995990

996-
static HEX: [u8; 256] = {
997-
const __: u8 = 255; // not a hex digit
998-
[
999-
// 1 2 3 4 5 6 7 8 9 A B C D E F
1000-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
1001-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
1002-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
1003-
00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3
1004-
__, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4
1005-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5
1006-
__, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6
1007-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
1008-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
1009-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
1010-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
1011-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
1012-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
1013-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
1014-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
1015-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
1016-
]
1017-
};
1018-
1019-
fn decode_hex_val(val: u8) -> Option<u16> {
1020-
let n = HEX[val as usize] as u16;
1021-
if n == 255 {
1022-
None
1023-
} else {
1024-
Some(n)
991+
const fn decode_hex_val_slow(val: u8) -> Option<u8> {
992+
match val {
993+
b'0'..=b'9' => Some(val - b'0'),
994+
b'A'..=b'F' => Some(val - b'A' + 10),
995+
b'a'..=b'f' => Some(val - b'a' + 10),
996+
_ => None,
997+
}
998+
}
999+
1000+
const fn build_hex_table(shift: usize) -> [i16; 256] {
1001+
let mut table = [0; 256];
1002+
let mut ch = 0;
1003+
while ch < 256 {
1004+
table[ch] = match decode_hex_val_slow(ch as u8) {
1005+
Some(val) => (val as i16) << shift,
1006+
None => -1,
1007+
};
1008+
ch += 1;
10251009
}
1010+
table
1011+
}
1012+
1013+
static HEX0: [i16; 256] = build_hex_table(0);
1014+
static HEX1: [i16; 256] = build_hex_table(4);
1015+
1016+
fn decode_four_hex_digits(a: u8, b: u8, c: u8, d: u8) -> Option<u16> {
1017+
let a = HEX1[a as usize];
1018+
let b = HEX0[b as usize];
1019+
let c = HEX1[c as usize];
1020+
let d = HEX0[d as usize];
1021+
1022+
// A single sign bit check.
1023+
if (a | b | c | d) < 0 {
1024+
return None;
1025+
}
1026+
1027+
Some((((a | b) << 8) | c | d) as u16)
10261028
}

tests/test.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1091,15 +1091,15 @@ fn test_parse_string() {
10911091
),
10921092
(
10931093
&[b'"', b'\\', b'u', 250, 48, 51, 48, b'"'],
1094-
"invalid escape at line 1 column 4",
1094+
"invalid escape at line 1 column 7",
10951095
),
10961096
(
10971097
&[b'"', b'\\', b'u', 48, 250, 51, 48, b'"'],
1098-
"invalid escape at line 1 column 5",
1098+
"invalid escape at line 1 column 7",
10991099
),
11001100
(
11011101
&[b'"', b'\\', b'u', 48, 48, 250, 48, b'"'],
1102-
"invalid escape at line 1 column 6",
1102+
"invalid escape at line 1 column 7",
11031103
),
11041104
(
11051105
&[b'"', b'\\', b'u', 48, 48, 51, 250, b'"'],

0 commit comments

Comments
 (0)