Skip to content

Commit 047b3bd

Browse files
authored
Rollup merge of #72724 - Aaron1011:revert-tokenstream-expand, r=petrochenkov
Revert recursive `TokenKind::Interpolated` expansion for now The crater run #72622 revealed many root regressions, at least one of which is going to take some time to fix. For now, let's revert #72388 to allow the 709 affected crates to continue building on the latest nightly.
2 parents 875c6b2 + b802eeb commit 047b3bd

File tree

7 files changed

+185
-240
lines changed

7 files changed

+185
-240
lines changed

Cargo.lock

-1
Original file line numberDiff line numberDiff line change
@@ -4144,7 +4144,6 @@ dependencies = [
41444144
"rustc_lexer",
41454145
"rustc_session",
41464146
"rustc_span",
4147-
"smallvec 1.4.0",
41484147
"unicode-normalization",
41494148
]
41504149

src/librustc_ast/token.rs

+56
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,62 @@ impl Token {
673673

674674
Some(Token::new(kind, self.span.to(joint.span)))
675675
}
676+
677+
// See comments in `Nonterminal::to_tokenstream` for why we care about
678+
// *probably* equal here rather than actual equality
679+
crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
680+
if mem::discriminant(&self.kind) != mem::discriminant(&other.kind) {
681+
return false;
682+
}
683+
match (&self.kind, &other.kind) {
684+
(&Eq, &Eq)
685+
| (&Lt, &Lt)
686+
| (&Le, &Le)
687+
| (&EqEq, &EqEq)
688+
| (&Ne, &Ne)
689+
| (&Ge, &Ge)
690+
| (&Gt, &Gt)
691+
| (&AndAnd, &AndAnd)
692+
| (&OrOr, &OrOr)
693+
| (&Not, &Not)
694+
| (&Tilde, &Tilde)
695+
| (&At, &At)
696+
| (&Dot, &Dot)
697+
| (&DotDot, &DotDot)
698+
| (&DotDotDot, &DotDotDot)
699+
| (&DotDotEq, &DotDotEq)
700+
| (&Comma, &Comma)
701+
| (&Semi, &Semi)
702+
| (&Colon, &Colon)
703+
| (&ModSep, &ModSep)
704+
| (&RArrow, &RArrow)
705+
| (&LArrow, &LArrow)
706+
| (&FatArrow, &FatArrow)
707+
| (&Pound, &Pound)
708+
| (&Dollar, &Dollar)
709+
| (&Question, &Question)
710+
| (&Whitespace, &Whitespace)
711+
| (&Comment, &Comment)
712+
| (&Eof, &Eof) => true,
713+
714+
(&BinOp(a), &BinOp(b)) | (&BinOpEq(a), &BinOpEq(b)) => a == b,
715+
716+
(&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b,
717+
718+
(&DocComment(a), &DocComment(b)) | (&Shebang(a), &Shebang(b)) => a == b,
719+
720+
(&Literal(a), &Literal(b)) => a == b,
721+
722+
(&Lifetime(a), &Lifetime(b)) => a == b,
723+
(&Ident(a, b), &Ident(c, d)) => {
724+
b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
725+
}
726+
727+
(&Interpolated(_), &Interpolated(_)) => false,
728+
729+
_ => panic!("forgot to add a token?"),
730+
}
731+
}
676732
}
677733

678734
impl PartialEq<TokenKind> for Token {

src/librustc_ast/tokenstream.rs

+125
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ use rustc_macros::HashStable_Generic;
2121
use rustc_span::{Span, DUMMY_SP};
2222
use smallvec::{smallvec, SmallVec};
2323

24+
use log::debug;
25+
2426
use std::{iter, mem};
2527

2628
/// When the main rust parser encounters a syntax-extension invocation, it
@@ -66,6 +68,23 @@ impl TokenTree {
6668
}
6769
}
6870

71+
// See comments in `Nonterminal::to_tokenstream` for why we care about
72+
// *probably* equal here rather than actual equality
73+
//
74+
// This is otherwise the same as `eq_unspanned`, only recursing with a
75+
// different method.
76+
pub fn probably_equal_for_proc_macro(&self, other: &TokenTree) -> bool {
77+
match (self, other) {
78+
(TokenTree::Token(token), TokenTree::Token(token2)) => {
79+
token.probably_equal_for_proc_macro(token2)
80+
}
81+
(TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
82+
delim == delim2 && tts.probably_equal_for_proc_macro(&tts2)
83+
}
84+
_ => false,
85+
}
86+
}
87+
6988
/// Retrieves the TokenTree's span.
7089
pub fn span(&self) -> Span {
7190
match self {
@@ -288,6 +307,112 @@ impl TokenStream {
288307
t1.next().is_none() && t2.next().is_none()
289308
}
290309

310+
// See comments in `Nonterminal::to_tokenstream` for why we care about
311+
// *probably* equal here rather than actual equality
312+
//
313+
// This is otherwise the same as `eq_unspanned`, only recursing with a
314+
// different method.
315+
pub fn probably_equal_for_proc_macro(&self, other: &TokenStream) -> bool {
316+
// When checking for `probably_eq`, we ignore certain tokens that aren't
317+
// preserved in the AST. Because they are not preserved, the pretty
318+
// printer arbitrarily adds or removes them when printing as token
319+
// streams, making a comparison between a token stream generated from an
320+
// AST and a token stream which was parsed into an AST more reliable.
321+
fn semantic_tree(tree: &TokenTree) -> bool {
322+
if let TokenTree::Token(token) = tree {
323+
if let
324+
// The pretty printer tends to add trailing commas to
325+
// everything, and in particular, after struct fields.
326+
| token::Comma
327+
// The pretty printer emits `NoDelim` as whitespace.
328+
| token::OpenDelim(DelimToken::NoDelim)
329+
| token::CloseDelim(DelimToken::NoDelim)
330+
// The pretty printer collapses many semicolons into one.
331+
| token::Semi
332+
// The pretty printer collapses whitespace arbitrarily and can
333+
// introduce whitespace from `NoDelim`.
334+
| token::Whitespace
335+
// The pretty printer can turn `$crate` into `::crate_name`
336+
| token::ModSep = token.kind {
337+
return false;
338+
}
339+
}
340+
true
341+
}
342+
343+
// When comparing two `TokenStream`s, we ignore the `IsJoint` information.
344+
//
345+
// However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
346+
// use `Token.glue` on adjacent tokens with the proper `IsJoint`.
347+
// Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
348+
// and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
349+
// when determining if two `TokenStream`s are 'probably equal'.
350+
//
351+
// Therefore, we use `break_two_token_op` to convert all tokens
352+
// to the 'unglued' form (if it exists). This ensures that two
353+
// `TokenStream`s which differ only in how their tokens are glued
354+
// will be considered 'probably equal', which allows us to keep spans.
355+
//
356+
// This is important when the original `TokenStream` contained
357+
// extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
358+
// will be omitted when we pretty-print, which can cause the original
359+
// and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
360+
// leading to some tokens being 'glued' together in one stream but not
361+
// the other. See #68489 for more details.
362+
fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
363+
// In almost all cases, we should have either zero or one levels
364+
// of 'unglueing'. However, in some unusual cases, we may need
365+
// to iterate breaking tokens mutliple times. For example:
366+
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
367+
let mut token_trees: SmallVec<[_; 2]>;
368+
if let TokenTree::Token(token) = &tree {
369+
let mut out = SmallVec::<[_; 2]>::new();
370+
out.push(token.clone());
371+
// Iterate to fixpoint:
372+
// * We start off with 'out' containing our initial token, and `temp` empty
373+
// * If we are able to break any tokens in `out`, then `out` will have
374+
// at least one more element than 'temp', so we will try to break tokens
375+
// again.
376+
// * If we cannot break any tokens in 'out', we are done
377+
loop {
378+
let mut temp = SmallVec::<[_; 2]>::new();
379+
let mut changed = false;
380+
381+
for token in out.into_iter() {
382+
if let Some((first, second)) = token.kind.break_two_token_op() {
383+
temp.push(Token::new(first, DUMMY_SP));
384+
temp.push(Token::new(second, DUMMY_SP));
385+
changed = true;
386+
} else {
387+
temp.push(token);
388+
}
389+
}
390+
out = temp;
391+
if !changed {
392+
break;
393+
}
394+
}
395+
token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect();
396+
if token_trees.len() != 1 {
397+
debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
398+
}
399+
} else {
400+
token_trees = SmallVec::new();
401+
token_trees.push(tree);
402+
}
403+
token_trees.into_iter()
404+
}
405+
406+
let mut t1 = self.trees().filter(semantic_tree).flat_map(break_tokens);
407+
let mut t2 = other.trees().filter(semantic_tree).flat_map(break_tokens);
408+
for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
409+
if !t1.probably_equal_for_proc_macro(&t2) {
410+
return false;
411+
}
412+
}
413+
t1.next().is_none() && t2.next().is_none()
414+
}
415+
291416
pub fn map_enumerated<F: FnMut(usize, TokenTree) -> TokenTree>(self, mut f: F) -> TokenStream {
292417
TokenStream(Lrc::new(
293418
self.0

src/librustc_parse/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ doctest = false
1212
[dependencies]
1313
bitflags = "1.0"
1414
log = "0.4"
15-
smallvec = { version = "1.0", features = ["union", "may_dangle"] }
1615
rustc_ast_pretty = { path = "../librustc_ast_pretty" }
1716
rustc_data_structures = { path = "../librustc_data_structures" }
1817
rustc_feature = { path = "../librustc_feature" }

0 commit comments

Comments
 (0)