Skip to content

Commit 34fa6da

Browse files
authored
Rollup merge of rust-lang#109203 - Ezrashaw:refactor-ident-parsing, r=Nilstrieb
refactor/feat: refactor identifier parsing a bit \+ error recovery for `expected_ident_found` Prior art: rust-lang#108854
2 parents 0392e29 + 05b5046 commit 34fa6da

File tree

12 files changed

+250
-95
lines changed

12 files changed

+250
-95
lines changed

compiler/rustc_parse/messages.ftl

+1-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ parse_expected_identifier_found_reserved_keyword = expected identifier, found re
336336
parse_expected_identifier_found_doc_comment = expected identifier, found doc comment
337337
parse_expected_identifier = expected identifier
338338
339-
parse_sugg_escape_to_use_as_identifier = escape `{$ident_name}` to use it as an identifier
339+
parse_sugg_escape_identifier = escape `{$ident_name}` to use it as an identifier
340340
341341
parse_sugg_remove_comma = remove this comma
342342

compiler/rustc_parse/src/errors.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -888,12 +888,12 @@ pub(crate) struct InvalidMetaItem {
888888

889889
#[derive(Subdiagnostic)]
890890
#[suggestion(
891-
parse_sugg_escape_to_use_as_identifier,
891+
parse_sugg_escape_identifier,
892892
style = "verbose",
893893
applicability = "maybe-incorrect",
894894
code = "r#"
895895
)]
896-
pub(crate) struct SuggEscapeToUseAsIdentifier {
896+
pub(crate) struct SuggEscapeIdentifier {
897897
#[primary_span]
898898
pub span: Span,
899899
pub ident_name: String,
@@ -937,7 +937,7 @@ impl ExpectedIdentifierFound {
937937
pub(crate) struct ExpectedIdentifier {
938938
pub span: Span,
939939
pub token: Token,
940-
pub suggest_raw: Option<SuggEscapeToUseAsIdentifier>,
940+
pub suggest_raw: Option<SuggEscapeIdentifier>,
941941
pub suggest_remove_comma: Option<SuggRemoveComma>,
942942
pub help_cannot_start_number: Option<HelpIdentifierStartsWithNumber>,
943943
}
@@ -986,7 +986,10 @@ impl<'a, G: EmissionGuarantee> IntoDiagnostic<'a, G> for ExpectedIdentifier {
986986

987987
#[derive(Subdiagnostic)]
988988
#[help(parse_invalid_identifier_with_leading_number)]
989-
pub(crate) struct HelpIdentifierStartsWithNumber;
989+
pub(crate) struct HelpIdentifierStartsWithNumber {
990+
#[primary_span]
991+
pub num_span: Span,
992+
}
990993

991994
pub(crate) struct ExpectedSemi {
992995
pub span: Span,

compiler/rustc_parse/src/parser/diagnostics.rs

+89-34
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ use super::{
66
use crate::errors::{
77
AmbiguousPlus, AttributeOnParamType, BadQPathStage2, BadTypePlus, BadTypePlusSub,
88
ComparisonOperatorsCannotBeChained, ComparisonOperatorsCannotBeChainedSugg,
9-
ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentOnParamType,
10-
DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg,
9+
ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentDoesNotDocumentAnything,
10+
DocCommentOnParamType, DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg,
1111
GenericParamsWithoutAngleBrackets, GenericParamsWithoutAngleBracketsSugg,
1212
HelpIdentifierStartsWithNumber, InInTypo, IncorrectAwait, IncorrectSemicolon,
1313
IncorrectUseOfAwait, ParenthesesInForHead, ParenthesesInForHeadSugg,
1414
PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, SelfParamNotFirst,
1515
StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg, StructLiteralNeedingParens,
16-
StructLiteralNeedingParensSugg, SuggEscapeToUseAsIdentifier, SuggRemoveComma,
16+
StructLiteralNeedingParensSugg, SuggEscapeIdentifier, SuggRemoveComma,
1717
UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration,
1818
UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, UseEqInstead,
1919
};
@@ -38,7 +38,7 @@ use rustc_errors::{
3838
use rustc_session::errors::ExprParenthesesNeeded;
3939
use rustc_span::source_map::Spanned;
4040
use rustc_span::symbol::{kw, sym, Ident};
41-
use rustc_span::{Span, SpanSnippetError, DUMMY_SP};
41+
use rustc_span::{Span, SpanSnippetError, Symbol, DUMMY_SP};
4242
use std::mem::take;
4343
use std::ops::{Deref, DerefMut};
4444
use thin_vec::{thin_vec, ThinVec};
@@ -268,7 +268,21 @@ impl<'a> Parser<'a> {
268268
self.sess.source_map().span_to_snippet(span)
269269
}
270270

271-
pub(super) fn expected_ident_found(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
271+
/// Emits an error with suggestions if an identifier was expected but not found.
272+
///
273+
/// Returns a possibly recovered identifier.
274+
pub(super) fn expected_ident_found(
275+
&mut self,
276+
recover: bool,
277+
) -> PResult<'a, (Ident, /* is_raw */ bool)> {
278+
if let TokenKind::DocComment(..) = self.prev_token.kind {
279+
return Err(DocCommentDoesNotDocumentAnything {
280+
span: self.prev_token.span,
281+
missing_comma: None,
282+
}
283+
.into_diagnostic(&self.sess.span_diagnostic));
284+
}
285+
272286
let valid_follow = &[
273287
TokenKind::Eq,
274288
TokenKind::Colon,
@@ -281,31 +295,51 @@ impl<'a> Parser<'a> {
281295
TokenKind::CloseDelim(Delimiter::Parenthesis),
282296
];
283297

284-
let suggest_raw = match self.token.ident() {
285-
Some((ident, false))
286-
if ident.is_raw_guess()
287-
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind)) =>
288-
{
289-
Some(SuggEscapeToUseAsIdentifier {
290-
span: ident.span.shrink_to_lo(),
291-
// `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`,
292-
// which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#`
293-
ident_name: ident.name.to_string(),
294-
})
295-
}
296-
_ => None,
297-
};
298+
let mut recovered_ident = None;
299+
// we take this here so that the correct original token is retained in
300+
// the diagnostic, regardless of eager recovery.
301+
let bad_token = self.token.clone();
302+
303+
// suggest prepending a keyword in identifier position with `r#`
304+
let suggest_raw = if let Some((ident, false)) = self.token.ident()
305+
&& ident.is_raw_guess()
306+
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind))
307+
{
308+
recovered_ident = Some((ident, true));
309+
310+
// `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`,
311+
// which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#`
312+
let ident_name = ident.name.to_string();
313+
314+
Some(SuggEscapeIdentifier {
315+
span: ident.span.shrink_to_lo(),
316+
ident_name
317+
})
318+
} else { None };
319+
320+
let suggest_remove_comma =
321+
if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) {
322+
if recover {
323+
self.bump();
324+
recovered_ident = self.ident_or_err(false).ok();
325+
};
298326

299-
let suggest_remove_comma = (self.token == token::Comma
300-
&& self.look_ahead(1, |t| t.is_ident()))
301-
.then_some(SuggRemoveComma { span: self.token.span });
327+
Some(SuggRemoveComma { span: bad_token.span })
328+
} else {
329+
None
330+
};
302331

303-
let help_cannot_start_number =
304-
self.is_lit_bad_ident().then_some(HelpIdentifierStartsWithNumber);
332+
let help_cannot_start_number = self.is_lit_bad_ident().map(|(len, valid_portion)| {
333+
let (invalid, valid) = self.token.span.split_at(len as u32);
334+
335+
recovered_ident = Some((Ident::new(valid_portion, valid), false));
336+
337+
HelpIdentifierStartsWithNumber { num_span: invalid }
338+
});
305339

306340
let err = ExpectedIdentifier {
307-
span: self.token.span,
308-
token: self.token.clone(),
341+
span: bad_token.span,
342+
token: bad_token,
309343
suggest_raw,
310344
suggest_remove_comma,
311345
help_cannot_start_number,
@@ -314,6 +348,7 @@ impl<'a> Parser<'a> {
314348

315349
// if the token we have is a `<`
316350
// it *might* be a misplaced generic
351+
// FIXME: could we recover with this?
317352
if self.token == token::Lt {
318353
// all keywords that could have generic applied
319354
let valid_prev_keywords =
@@ -364,18 +399,38 @@ impl<'a> Parser<'a> {
364399
}
365400
}
366401

367-
err
402+
if let Some(recovered_ident) = recovered_ident && recover {
403+
err.emit();
404+
Ok(recovered_ident)
405+
} else {
406+
Err(err)
407+
}
408+
}
409+
410+
pub(super) fn expected_ident_found_err(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
411+
self.expected_ident_found(false).unwrap_err()
368412
}
369413

370414
/// Checks if the current token is a integer or float literal and looks like
371415
/// it could be a invalid identifier with digits at the start.
372-
pub(super) fn is_lit_bad_ident(&mut self) -> bool {
373-
matches!(self.token.uninterpolate().kind, token::Literal(Lit { kind: token::LitKind::Integer | token::LitKind::Float, .. })
374-
// ensure that the integer literal is followed by a *invalid*
375-
// suffix: this is how we know that it is a identifier with an
376-
// invalid beginning.
377-
if rustc_ast::MetaItemLit::from_token(&self.token).is_none()
378-
)
416+
///
417+
/// Returns the number of characters (bytes) composing the invalid portion
418+
/// of the identifier and the valid portion of the identifier.
419+
pub(super) fn is_lit_bad_ident(&mut self) -> Option<(usize, Symbol)> {
420+
// ensure that the integer literal is followed by a *invalid*
421+
// suffix: this is how we know that it is a identifier with an
422+
// invalid beginning.
423+
if let token::Literal(Lit {
424+
kind: token::LitKind::Integer | token::LitKind::Float,
425+
symbol,
426+
suffix,
427+
}) = self.token.kind
428+
&& rustc_ast::MetaItemLit::from_token(&self.token).is_none()
429+
{
430+
Some((symbol.as_str().len(), suffix.unwrap()))
431+
} else {
432+
None
433+
}
379434
}
380435

381436
pub(super) fn expected_one_of_not_found(

compiler/rustc_parse/src/parser/item.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -1181,7 +1181,7 @@ impl<'a> Parser<'a> {
11811181
defaultness: Defaultness,
11821182
) -> PResult<'a, ItemInfo> {
11831183
let impl_span = self.token.span;
1184-
let mut err = self.expected_ident_found();
1184+
let mut err = self.expected_ident_found_err();
11851185

11861186
// Only try to recover if this is implementing a trait for a type
11871187
let mut impl_info = match self.parse_item_impl(attrs, defaultness) {
@@ -1744,7 +1744,7 @@ impl<'a> Parser<'a> {
17441744
/// Parses a field identifier. Specialized version of `parse_ident_common`
17451745
/// for better diagnostics and suggestions.
17461746
fn parse_field_ident(&mut self, adt_ty: &str, lo: Span) -> PResult<'a, Ident> {
1747-
let (ident, is_raw) = self.ident_or_err()?;
1747+
let (ident, is_raw) = self.ident_or_err(true)?;
17481748
if !is_raw && ident.is_reserved() {
17491749
let snapshot = self.create_snapshot_for_diagnostic();
17501750
let err = if self.check_fn_front_matter(false, Case::Sensitive) {
@@ -1776,7 +1776,7 @@ impl<'a> Parser<'a> {
17761776
Err(err) => {
17771777
err.cancel();
17781778
self.restore_snapshot(snapshot);
1779-
self.expected_ident_found()
1779+
self.expected_ident_found_err()
17801780
}
17811781
}
17821782
} else if self.eat_keyword(kw::Struct) {
@@ -1792,11 +1792,11 @@ impl<'a> Parser<'a> {
17921792
Err(err) => {
17931793
err.cancel();
17941794
self.restore_snapshot(snapshot);
1795-
self.expected_ident_found()
1795+
self.expected_ident_found_err()
17961796
}
17971797
}
17981798
} else {
1799-
let mut err = self.expected_ident_found();
1799+
let mut err = self.expected_ident_found_err();
18001800
if self.eat_keyword_noexpect(kw::Let)
18011801
&& let removal_span = self.prev_token.span.until(self.token.span)
18021802
&& let Ok(ident) = self.parse_ident_common(false)

compiler/rustc_parse/src/parser/mod.rs

+19-15
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ use thin_vec::ThinVec;
4242
use tracing::debug;
4343

4444
use crate::errors::{
45-
DocCommentDoesNotDocumentAnything, IncorrectVisibilityRestriction, MismatchedClosingDelimiter,
46-
NonStringAbiLiteral,
45+
IncorrectVisibilityRestriction, MismatchedClosingDelimiter, NonStringAbiLiteral,
4746
};
4847

4948
bitflags::bitflags! {
@@ -552,21 +551,11 @@ impl<'a> Parser<'a> {
552551
self.parse_ident_common(true)
553552
}
554553

555-
fn ident_or_err(&mut self) -> PResult<'a, (Ident, /* is_raw */ bool)> {
556-
self.token.ident().ok_or_else(|| match self.prev_token.kind {
557-
TokenKind::DocComment(..) => DocCommentDoesNotDocumentAnything {
558-
span: self.prev_token.span,
559-
missing_comma: None,
560-
}
561-
.into_diagnostic(&self.sess.span_diagnostic),
562-
_ => self.expected_ident_found(),
563-
})
564-
}
565-
566554
fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, Ident> {
567-
let (ident, is_raw) = self.ident_or_err()?;
555+
let (ident, is_raw) = self.ident_or_err(recover)?;
556+
568557
if !is_raw && ident.is_reserved() {
569-
let mut err = self.expected_ident_found();
558+
let mut err = self.expected_ident_found_err();
570559
if recover {
571560
err.emit();
572561
} else {
@@ -577,6 +566,21 @@ impl<'a> Parser<'a> {
577566
Ok(ident)
578567
}
579568

569+
fn ident_or_err(&mut self, recover: bool) -> PResult<'a, (Ident, /* is_raw */ bool)> {
570+
let result = self.token.ident().ok_or_else(|| self.expected_ident_found(recover));
571+
572+
let (ident, is_raw) = match result {
573+
Ok(ident) => ident,
574+
Err(err) => match err {
575+
// we recovered!
576+
Ok(ident) => ident,
577+
Err(err) => return Err(err),
578+
},
579+
};
580+
581+
Ok((ident, is_raw))
582+
}
583+
580584
/// Checks if the next token is `tok`, and returns `true` if so.
581585
///
582586
/// This method will automatically add `tok` to `expected_tokens` if `tok` is not

compiler/rustc_parse/src/parser/pat.rs

+8-6
Original file line numberDiff line numberDiff line change
@@ -348,10 +348,6 @@ impl<'a> Parser<'a> {
348348
lo = self.token.span;
349349
}
350350

351-
if self.is_lit_bad_ident() {
352-
return Err(self.expected_ident_found());
353-
}
354-
355351
let pat = if self.check(&token::BinOp(token::And)) || self.token.kind == token::AndAnd {
356352
self.parse_pat_deref(expected)?
357353
} else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) {
@@ -395,7 +391,13 @@ impl<'a> Parser<'a> {
395391
} else {
396392
PatKind::Lit(const_expr)
397393
}
398-
} else if self.can_be_ident_pat() {
394+
// Don't eagerly error on semantically invalid tokens when matching
395+
// declarative macros, as the input to those doesn't have to be
396+
// semantically valid. For attribute/derive proc macros this is not the
397+
// case, so doing the recovery for them is fine.
398+
} else if self.can_be_ident_pat()
399+
|| (self.is_lit_bad_ident().is_some() && self.may_recover())
400+
{
399401
// Parse `ident @ pat`
400402
// This can give false positives and parse nullary enums,
401403
// they are dealt with later in resolve.
@@ -594,7 +596,7 @@ impl<'a> Parser<'a> {
594596
// Make sure we don't allow e.g. `let mut $p;` where `$p:pat`.
595597
if let token::Interpolated(nt) = &self.token.kind {
596598
if let token::NtPat(_) = **nt {
597-
self.expected_ident_found().emit();
599+
self.expected_ident_found_err().emit();
598600
}
599601
}
600602

compiler/rustc_span/src/lib.rs

+12
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,18 @@ impl Span {
795795
})
796796
}
797797

798+
/// Splits a span into two composite spans around a certain position.
799+
pub fn split_at(self, pos: u32) -> (Span, Span) {
800+
let len = self.hi().0 - self.lo().0;
801+
debug_assert!(pos <= len);
802+
803+
let split_pos = BytePos(self.lo().0 + pos);
804+
(
805+
Span::new(self.lo(), split_pos, self.ctxt(), self.parent()),
806+
Span::new(split_pos, self.hi(), self.ctxt(), self.parent()),
807+
)
808+
}
809+
798810
/// Returns a `Span` that would enclose both `self` and `end`.
799811
///
800812
/// Note that this can also be used to extend the span "backwards":

tests/ui/parser/ident-recovery.rs

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
fn ,comma() {
2+
//~^ ERROR expected identifier, found `,`
3+
struct Foo {
4+
x: i32,,
5+
//~^ ERROR expected identifier, found `,`
6+
y: u32,
7+
}
8+
}
9+
10+
fn break() {
11+
//~^ ERROR expected identifier, found keyword `break`
12+
let continue = 5;
13+
//~^ ERROR expected identifier, found keyword `continue`
14+
}
15+
16+
fn main() {}

0 commit comments

Comments
 (0)