Skip to content

Commit e9a0c3c

Browse files
committed
Remove TokenKind::InvalidPrefix.
It was added in rust-lang#123752 to handle some cases involving emoji, but it isn't necessary because it's always treated the same as `TokenKind::InvalidIdent`. This commit removes it, which makes things a little simpler.
1 parent 2c7c369 commit e9a0c3c

File tree

4 files changed

+14
-21
lines changed

4 files changed

+14
-21
lines changed

compiler/rustc_lexer/src/lib.rs

+8-13
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,6 @@ pub enum TokenKind {
9999
/// several tokens: `'r` and `#` and `foo`.
100100
RawLifetime,
101101

102-
/// Similar to the above, but *always* an error on every edition. This is used
103-
/// for emoji identifier recovery, as those are not meant to be ever accepted.
104-
InvalidPrefix,
105-
106102
/// Guarded string literal prefix: `#"` or `##`.
107103
///
108104
/// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
@@ -466,7 +462,7 @@ impl Cursor<'_> {
466462
Literal { kind, suffix_start }
467463
}
468464
// Identifier starting with an emoji. Only lexed for graceful error recovery.
469-
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident_or_prefix(),
465+
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
470466
_ => Unknown,
471467
};
472468
let res = Token::new(token_kind, self.pos_within_token());
@@ -550,23 +546,22 @@ impl Cursor<'_> {
550546
// we see a prefix here, it is definitely an unknown prefix.
551547
match self.first() {
552548
'#' | '"' | '\'' => UnknownPrefix,
553-
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident_or_prefix(),
549+
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
554550
_ => Ident,
555551
}
556552
}
557553

558-
fn invalid_ident_or_prefix(&mut self) -> TokenKind {
554+
fn invalid_ident(&mut self) -> TokenKind {
559555
// Start is already eaten, eat the rest of identifier.
560556
self.eat_while(|c| {
561557
const ZERO_WIDTH_JOINER: char = '\u{200d}';
562558
is_id_continue(c) || (!c.is_ascii() && c.is_emoji_char()) || c == ZERO_WIDTH_JOINER
563559
});
564-
// Known prefixes must have been handled earlier. So if
565-
// we see a prefix here, it is definitely an unknown prefix.
566-
match self.first() {
567-
'#' | '"' | '\'' => InvalidPrefix,
568-
_ => InvalidIdent,
569-
}
560+
// An invalid identifier followed by '#' or '"' or '\'' could be
561+
// interpreted as an invalid literal prefix. We don't bother doing that
562+
// because the treatment of invalid identifiers and invalid prefixes
563+
// would be the same.
564+
InvalidIdent
570565
}
571566

572567
fn c_or_byte_string(

compiler/rustc_parse/src/lexer/mod.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
213213
let ident = Symbol::intern(lifetime_name);
214214
token::Lifetime(ident, IdentIsRaw::No)
215215
}
216-
rustc_lexer::TokenKind::InvalidIdent | rustc_lexer::TokenKind::InvalidPrefix
216+
rustc_lexer::TokenKind::InvalidIdent
217217
// Do not recover an identifier with emoji if the codepoint is a confusable
218218
// with a recoverable substitution token, like `➖`.
219219
if !UNICODE_ARRAY.iter().any(|&(c, _, _)| {
@@ -359,8 +359,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
359359
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
360360

361361
rustc_lexer::TokenKind::Unknown
362-
| rustc_lexer::TokenKind::InvalidIdent
363-
| rustc_lexer::TokenKind::InvalidPrefix => {
362+
| rustc_lexer::TokenKind::InvalidIdent => {
364363
// Don't emit diagnostics for sequences of the same invalid token
365364
if swallow_next_invalid > 0 {
366365
swallow_next_invalid -= 1;

src/librustdoc/html/highlight.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -861,10 +861,9 @@ impl<'src> Classifier<'src> {
861861
},
862862
Some(c) => c,
863863
},
864-
TokenKind::RawIdent
865-
| TokenKind::UnknownPrefix
866-
| TokenKind::InvalidPrefix
867-
| TokenKind::InvalidIdent => Class::Ident(self.new_span(before, text)),
864+
TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
865+
Class::Ident(self.new_span(before, text))
866+
}
868867
TokenKind::Lifetime { .. }
869868
| TokenKind::RawLifetime
870869
| TokenKind::UnknownPrefixLifetime => Class::Lifetime,

src/tools/rust-analyzer/crates/parser/src/lexed_str.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ impl<'a> Converter<'a> {
183183
rustc_lexer::TokenKind::Ident => {
184184
SyntaxKind::from_keyword(token_text, self.edition).unwrap_or(IDENT)
185185
}
186-
rustc_lexer::TokenKind::InvalidPrefix | rustc_lexer::TokenKind::InvalidIdent => {
186+
rustc_lexer::TokenKind::InvalidIdent => {
187187
err = "Ident contains invalid characters";
188188
IDENT
189189
}

0 commit comments

Comments
 (0)