diff --git a/compiler/rustc_ast/src/lib.rs b/compiler/rustc_ast/src/lib.rs index 84fe9ad26720e..21183121e15a0 100644 --- a/compiler/rustc_ast/src/lib.rs +++ b/compiler/rustc_ast/src/lib.rs @@ -16,6 +16,7 @@ #![feature(min_specialization)] #![recursion_limit = "256"] #![feature(slice_internals)] +#![feature(stmt_expr_attributes)] #[macro_use] extern crate rustc_macros; diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 1cc5ddfd8ee29..224afbd553fb8 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -56,25 +56,30 @@ impl LitKind { // new symbol because the string in the LitKind is different to the // string in the token. let s = symbol.as_str(); - let symbol = - if s.contains(&['\\', '\r']) { - let mut buf = String::with_capacity(s.len()); - let mut error = Ok(()); - unescape_literal(&s, Mode::Str, &mut |_, unescaped_char| { - match unescaped_char { - Ok(c) => buf.push(c), - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + let symbol = if s.contains(&['\\', '\r']) { + let mut buf = String::with_capacity(s.len()); + let mut error = Ok(()); + // Force-inlining here is aggressive but the closure is + // called on every char in the string, so it can be + // hot in programs with many long strings. + unescape_literal( + &s, + Mode::Str, + &mut #[inline(always)] + |_, unescaped_char| match unescaped_char { + Ok(c) => buf.push(c), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); } } - }); - error?; - Symbol::intern(&buf) - } else { - symbol - }; + }, + ); + error?; + Symbol::intern(&buf) + } else { + symbol + }; LitKind::Str(symbol, ast::StrStyle::Cooked) } token::StrRaw(n) => { diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index d789237e692d2..97f9588ae1ef5 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -159,26 +159,8 @@ impl Mode { } } -fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result { - if first_char != '\\' { - // Previous character was not a slash, and we don't expect it to be - // an escape-only character. - return match first_char { - '\t' | '\n' => Err(EscapeError::EscapeOnlyChar), - '\r' => Err(EscapeError::BareCarriageReturn), - '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar), - '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar), - _ => { - if mode.is_bytes() && !first_char.is_ascii() { - // Byte literal can't be a non-ascii character. - return Err(EscapeError::NonAsciiCharInByte); - } - Ok(first_char) - } - }; - } - - // Previous character is '\\', try to unescape it. +fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { + // Previous character was '\\', unescape what follows. let second_char = chars.next().ok_or(EscapeError::LoneSlash)?; @@ -270,9 +252,24 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result Result { + if mode.is_bytes() && !first_char.is_ascii() { + // Byte literal can't be a non-ascii character. + Err(EscapeError::NonAsciiCharInByte) + } else { + Ok(first_char) + } +} + fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { let first_char = chars.next().ok_or(EscapeError::ZeroChars)?; - let res = scan_escape(first_char, chars, mode)?; + let res = match first_char { + '\\' => scan_escape(chars, mode), + '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), + '\r' => Err(EscapeError::BareCarriageReturn), + _ => ascii_check(first_char, mode), + }?; if chars.next().is_some() { return Err(EscapeError::MoreThanOneChar); } @@ -303,12 +300,14 @@ where skip_ascii_whitespace(&mut chars, start, callback); continue; } - _ => scan_escape(first_char, &mut chars, mode), + _ => scan_escape(&mut chars, mode), } } '\n' => Ok('\n'), '\t' => Ok('\t'), - _ => scan_escape(first_char, &mut chars, mode), + '"' => Err(EscapeError::EscapeOnlyChar), + '\r' => Err(EscapeError::BareCarriageReturn), + _ => ascii_check(first_char, mode), }; let end = initial_len - chars.as_str().len(); callback(start..end, unescaped_char);