Skip to content

Commit ec1758d

Browse files
authored
Rollup merge of rust-lang#63017 - matklad:no-fatal, r=petrochenkov
Remove special code-path for handing unknown tokens In `StringReader`, we have a buffer of fatal errors, which is used only in a single case: when we see something which is not a reasonable token at all, like `🦀`. I think a more straightforward thing to do here is to produce an explicit error token in this case, and let the next layer (the parser), deal with it. However currently this leads to duplicated error messages. What should we do with this? Naively, I would think that emitting (just emitting, not raising) `FatalError` should stop other errors, but looks like this is not the case? We can also probably tweak parser on the case-by-case basis, to avoid emitting "expected" errors if the current token is an `Err`. I personally also fine with cascading errors in this case: it's quite unlikely that you actually type a fully invalid token. @petrochenkov, which approach should we take to fight cascading errors?
2 parents 1166e2a + b3e8c8b commit ec1758d

File tree

13 files changed

+223
-116
lines changed

13 files changed

+223
-116
lines changed

src/librustc/ich/impls_syntax.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,8 @@ impl<'a> HashStable<StableHashingContext<'a>> for token::TokenKind {
363363
}
364364

365365
token::DocComment(val) |
366-
token::Shebang(val) => val.hash_stable(hcx, hasher),
366+
token::Shebang(val) |
367+
token::Unknown(val) => val.hash_stable(hcx, hasher),
367368
}
368369
}
369370
}

src/librustdoc/html/highlight.rs

+15-14
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ pub fn render_with_highlighting(
4444

4545
let mut highlighted_source = vec![];
4646
if classifier.write_source(&mut highlighted_source).is_err() {
47-
Err(classifier.lexer.buffer_fatal_errors())
47+
Err(())
4848
} else {
4949
Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
5050
}
@@ -59,14 +59,9 @@ pub fn render_with_highlighting(
5959
}
6060
write_footer(&mut out).unwrap();
6161
}
62-
Err(errors) => {
63-
// If errors are encountered while trying to highlight, cancel the errors and just emit
64-
// the unhighlighted source. The errors will have already been reported in the
65-
// `check-code-block-syntax` pass.
66-
for mut error in errors {
67-
error.cancel();
68-
}
69-
62+
Err(()) => {
63+
// If errors are encountered while trying to highlight, just emit
64+
// the unhighlighted source.
7065
write!(out, "<pre><code>{}</code></pre>", src).unwrap();
7166
}
7267
}
@@ -192,14 +187,20 @@ impl<'a> Classifier<'a> {
192187
if let Some(token) = self.peek_token.take() {
193188
return Ok(token);
194189
}
195-
self.lexer.try_next_token().map_err(|()| HighlightError::LexError)
190+
let token = self.lexer.next_token();
191+
if let token::Unknown(..) = &token.kind {
192+
return Err(HighlightError::LexError);
193+
}
194+
Ok(token)
196195
}
197196

198197
fn peek(&mut self) -> Result<&Token, HighlightError> {
199198
if self.peek_token.is_none() {
200-
self.peek_token = Some(
201-
self.lexer.try_next_token().map_err(|()| HighlightError::LexError)?
202-
);
199+
let token = self.lexer.next_token();
200+
if let token::Unknown(..) = &token.kind {
201+
return Err(HighlightError::LexError);
202+
}
203+
self.peek_token = Some(token);
203204
}
204205
Ok(self.peek_token.as_ref().unwrap())
205206
}
@@ -237,7 +238,7 @@ impl<'a> Classifier<'a> {
237238
return Ok(());
238239
},
239240

240-
token::Whitespace => Class::None,
241+
token::Whitespace | token::Unknown(..) => Class::None,
241242
token::Comment => Class::Comment,
242243
token::DocComment(..) => Class::DocComment,
243244

src/librustdoc/passes/check_code_block_syntax.rs

+9-23
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,20 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
3232
dox[code_block.code].to_owned(),
3333
);
3434

35-
let errors = {
35+
let has_errors = {
36+
let mut has_errors = false;
3637
let mut lexer = Lexer::new(&sess, source_file, None);
37-
while let Ok(token::Token { kind, .. }) = lexer.try_next_token() {
38-
if kind == token::Eof {
39-
break;
38+
loop {
39+
match lexer.next_token().kind {
40+
token::Eof => break,
41+
token::Unknown(..) => has_errors = true,
42+
_ => (),
4043
}
4144
}
42-
43-
let errors = lexer.buffer_fatal_errors();
44-
45-
if !errors.is_empty() {
46-
Err(errors)
47-
} else {
48-
Ok(())
49-
}
45+
has_errors
5046
};
5147

52-
if let Err(errors) = errors {
48+
if has_errors {
5349
let mut diag = if let Some(sp) =
5450
super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
5551
{
@@ -58,11 +54,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
5854
.sess()
5955
.struct_span_warn(sp, "could not parse code block as Rust code");
6056

61-
for mut err in errors {
62-
diag.note(&format!("error from rustc: {}", err.message()));
63-
err.cancel();
64-
}
65-
6657
if code_block.syntax.is_none() && code_block.is_fenced {
6758
let sp = sp.from_inner(InnerSpan::new(0, 3));
6859
diag.span_suggestion(
@@ -82,11 +73,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
8273
"doc comment contains an invalid Rust code block",
8374
);
8475

85-
for mut err in errors {
86-
// Don't bother reporting the error, because we can't show where it happened.
87-
err.cancel();
88-
}
89-
9076
if code_block.syntax.is_none() && code_block.is_fenced {
9177
diag.help("mark blocks that do not contain Rust code as text: ```text");
9278
}

src/libsyntax/ext/proc_macro_server.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
184184
}
185185

186186
OpenDelim(..) | CloseDelim(..) => unreachable!(),
187-
Whitespace | Comment | Shebang(..) | Eof => unreachable!(),
187+
Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => unreachable!(),
188188
}
189189
}
190190
}

src/libsyntax/parse/lexer/mod.rs

+13-60
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::parse::token::{self, Token, TokenKind};
33
use crate::symbol::{sym, Symbol};
44
use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};
55

6-
use errors::{FatalError, Diagnostic, DiagnosticBuilder};
6+
use errors::{FatalError, DiagnosticBuilder};
77
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
88
use rustc_lexer::Base;
99
use rustc_lexer::unescape;
@@ -39,7 +39,6 @@ pub struct StringReader<'a> {
3939
pos: BytePos,
4040
/// Stop reading src at this index.
4141
end_src_index: usize,
42-
fatal_errs: Vec<DiagnosticBuilder<'a>>,
4342
/// Source text to tokenize.
4443
src: Lrc<String>,
4544
override_span: Option<Span>,
@@ -62,7 +61,6 @@ impl<'a> StringReader<'a> {
6261
pos: source_file.start_pos,
6362
end_src_index: src.len(),
6463
src,
65-
fatal_errs: Vec::new(),
6664
override_span,
6765
}
6866
}
@@ -89,29 +87,17 @@ impl<'a> StringReader<'a> {
8987
self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION))
9088
}
9189

92-
fn unwrap_or_abort(&mut self, res: Result<Token, ()>) -> Token {
93-
match res {
94-
Ok(tok) => tok,
95-
Err(_) => {
96-
self.emit_fatal_errors();
97-
FatalError.raise();
98-
}
99-
}
100-
}
101-
10290
/// Returns the next token, including trivia like whitespace or comments.
10391
///
10492
/// `Err(())` means that some errors were encountered, which can be
10593
/// retrieved using `buffer_fatal_errors`.
106-
pub fn try_next_token(&mut self) -> Result<Token, ()> {
107-
assert!(self.fatal_errs.is_empty());
108-
94+
pub fn next_token(&mut self) -> Token {
10995
let start_src_index = self.src_index(self.pos);
11096
let text: &str = &self.src[start_src_index..self.end_src_index];
11197

11298
if text.is_empty() {
11399
let span = self.mk_sp(self.pos, self.pos);
114-
return Ok(Token::new(token::Eof, span));
100+
return Token::new(token::Eof, span);
115101
}
116102

117103
{
@@ -125,7 +111,7 @@ impl<'a> StringReader<'a> {
125111
let kind = token::Shebang(sym);
126112

127113
let span = self.mk_sp(start, self.pos);
128-
return Ok(Token::new(kind, span));
114+
return Token::new(kind, span);
129115
}
130116
}
131117
}
@@ -139,39 +125,10 @@ impl<'a> StringReader<'a> {
139125

140126
// This could use `?`, but that makes code significantly (10-20%) slower.
141127
// https://github.com/rust-lang/rust/issues/37939
142-
let kind = match self.cook_lexer_token(token.kind, start) {
143-
Ok(it) => it,
144-
Err(err) => return Err(self.fatal_errs.push(err)),
145-
};
128+
let kind = self.cook_lexer_token(token.kind, start);
146129

147130
let span = self.mk_sp(start, self.pos);
148-
Ok(Token::new(kind, span))
149-
}
150-
151-
/// Returns the next token, including trivia like whitespace or comments.
152-
///
153-
/// Aborts in case of an error.
154-
pub fn next_token(&mut self) -> Token {
155-
let res = self.try_next_token();
156-
self.unwrap_or_abort(res)
157-
}
158-
159-
fn emit_fatal_errors(&mut self) {
160-
for err in &mut self.fatal_errs {
161-
err.emit();
162-
}
163-
164-
self.fatal_errs.clear();
165-
}
166-
167-
pub fn buffer_fatal_errors(&mut self) -> Vec<Diagnostic> {
168-
let mut buffer = Vec::new();
169-
170-
for err in self.fatal_errs.drain(..) {
171-
err.buffer(&mut buffer);
172-
}
173-
174-
buffer
131+
Token::new(kind, span)
175132
}
176133

177134
/// Report a fatal lexical error with a given span.
@@ -218,8 +175,8 @@ impl<'a> StringReader<'a> {
218175
&self,
219176
token: rustc_lexer::TokenKind,
220177
start: BytePos,
221-
) -> Result<TokenKind, DiagnosticBuilder<'a>> {
222-
let kind = match token {
178+
) -> TokenKind {
179+
match token {
223180
rustc_lexer::TokenKind::LineComment => {
224181
let string = self.str_from(start);
225182
// comments with only more "/"s are not doc comments
@@ -396,16 +353,12 @@ impl<'a> StringReader<'a> {
396353
// this should be inside `rustc_lexer`. However, we should first remove compound
397354
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
398355
// as there will be less overall work to do this way.
399-
return match unicode_chars::check_for_substitution(self, start, c, &mut err) {
400-
Some(token) => {
401-
err.emit();
402-
Ok(token)
403-
}
404-
None => Err(err),
405-
}
356+
let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
357+
.unwrap_or_else(|| token::Unknown(self.symbol_from(start)));
358+
err.emit();
359+
token
406360
}
407-
};
408-
Ok(kind)
361+
}
409362
}
410363

411364
fn cook_lexer_literal(

src/libsyntax/parse/lexer/tokentrees.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ impl<'a> TokenTreesReader<'a> {
217217
loop {
218218
let token = self.string_reader.next_token();
219219
match token.kind {
220-
token::Whitespace | token::Comment | token::Shebang(_) => {
220+
token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => {
221221
self.joint_to_prev = NonJoint;
222222
}
223223
_ => {

src/libsyntax/parse/token.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ pub enum TokenKind {
255255
/// A comment.
256256
Comment,
257257
Shebang(ast::Name),
258+
/// A completely invalid token which should be skipped.
259+
Unknown(ast::Name),
258260

259261
Eof,
260262
}
@@ -603,7 +605,7 @@ impl Token {
603605
DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
604606
Question | OpenDelim(..) | CloseDelim(..) |
605607
Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
606-
Whitespace | Comment | Shebang(..) | Eof => return None,
608+
Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => return None,
607609
};
608610

609611
Some(Token::new(kind, self.span.to(joint.span)))

src/libsyntax/print/pprust.rs

+1
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ fn token_kind_to_string_ext(tok: &TokenKind, convert_dollar_crate: Option<Span>)
288288
token::Whitespace => " ".to_string(),
289289
token::Comment => "/* */".to_string(),
290290
token::Shebang(s) => format!("/* shebang: {}*/", s),
291+
token::Unknown(s) => s.to_string(),
291292

292293
token::Interpolated(ref nt) => nonterminal_to_string(nt),
293294
}

0 commit comments

Comments
 (0)