Skip to content

Commit 87ba8f2

Browse files
authored
Rollup merge of rust-lang#68848 - nnethercote:hasten-macro-parsing, r=petrochenkov
Hasten macro parsing r? @eddyb
2 parents 2a20133 + 2a13b24 commit 87ba8f2

File tree

6 files changed

+73
-64
lines changed

6 files changed

+73
-64
lines changed

src/librustc_expand/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#![feature(cow_is_borrowed)]
12
#![feature(crate_visibility_modifier)]
23
#![feature(decl_macro)]
34
#![feature(proc_macro_diagnostic)]

src/librustc_expand/mbe/macro_parser.rs

Lines changed: 14 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -78,20 +78,19 @@ use crate::mbe::{self, TokenTree};
7878

7979
use rustc_ast_pretty::pprust;
8080
use rustc_parse::parser::{FollowedByType, Parser, PathStyle};
81-
use rustc_parse::Directory;
8281
use rustc_session::parse::ParseSess;
8382
use rustc_span::symbol::{kw, sym, Symbol};
8483
use syntax::ast::{Ident, Name};
8584
use syntax::ptr::P;
8685
use syntax::token::{self, DocComment, Nonterminal, Token};
87-
use syntax::tokenstream::TokenStream;
8886

8987
use rustc_errors::{FatalError, PResult};
9088
use rustc_span::Span;
9189
use smallvec::{smallvec, SmallVec};
9290

9391
use rustc_data_structures::fx::FxHashMap;
9492
use rustc_data_structures::sync::Lrc;
93+
use std::borrow::Cow;
9594
use std::collections::hash_map::Entry::{Occupied, Vacant};
9695
use std::mem;
9796
use std::ops::{Deref, DerefMut};
@@ -613,28 +612,9 @@ fn inner_parse_loop<'root, 'tt>(
613612
Success(())
614613
}
615614

616-
/// Use the given sequence of token trees (`ms`) as a matcher. Match the given token stream `tts`
617-
/// against it and return the match.
618-
///
619-
/// # Parameters
620-
///
621-
/// - `sess`: The session into which errors are emitted
622-
/// - `tts`: The tokenstream we are matching against the pattern `ms`
623-
/// - `ms`: A sequence of token trees representing a pattern against which we are matching
624-
/// - `directory`: Information about the file locations (needed for the black-box parser)
625-
/// - `recurse_into_modules`: Whether or not to recurse into modules (needed for the black-box
626-
/// parser)
627-
pub(super) fn parse(
628-
sess: &ParseSess,
629-
tts: TokenStream,
630-
ms: &[TokenTree],
631-
directory: Option<Directory<'_>>,
632-
recurse_into_modules: bool,
633-
) -> NamedParseResult {
634-
// Create a parser that can be used for the "black box" parts.
635-
let mut parser =
636-
Parser::new(sess, tts, directory, recurse_into_modules, true, rustc_parse::MACRO_ARGUMENTS);
637-
615+
/// Use the given sequence of token trees (`ms`) as a matcher. Match the token
616+
/// stream from the given `parser` against it and return the match.
617+
pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> NamedParseResult {
638618
// A queue of possible matcher positions. We initialize it with the matcher position in which
639619
// the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then
640620
// processes all of these possible matcher positions and produces possible next positions into
@@ -659,7 +639,7 @@ pub(super) fn parse(
659639
// parsing from the black-box parser done. The result is that `next_items` will contain a
660640
// bunch of possible next matcher positions in `next_items`.
661641
match inner_parse_loop(
662-
sess,
642+
parser.sess,
663643
&mut cur_items,
664644
&mut next_items,
665645
&mut eof_items,
@@ -684,7 +664,7 @@ pub(super) fn parse(
684664
if eof_items.len() == 1 {
685665
let matches =
686666
eof_items[0].matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap());
687-
return nameize(sess, ms, matches);
667+
return nameize(parser.sess, ms, matches);
688668
} else if eof_items.len() > 1 {
689669
return Error(
690670
parser.token.span,
@@ -709,9 +689,14 @@ pub(super) fn parse(
709689
// unnecessary implicit clone later in Rc::make_mut.
710690
drop(eof_items);
711691

692+
// If there are no possible next positions AND we aren't waiting for the black-box parser,
693+
// then there is a syntax error.
694+
if bb_items.is_empty() && next_items.is_empty() {
695+
return Failure(parser.token.clone(), "no rules expected this token in macro call");
696+
}
712697
// Another possibility is that we need to call out to parse some rust nonterminal
713698
// (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong.
714-
if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 {
699+
else if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 {
715700
let nts = bb_items
716701
.iter()
717702
.map(|item| match item.top_elts.get_tt(item.idx) {
@@ -733,16 +718,11 @@ pub(super) fn parse(
733718
),
734719
);
735720
}
736-
// If there are no possible next positions AND we aren't waiting for the black-box parser,
737-
// then there is a syntax error.
738-
else if bb_items.is_empty() && next_items.is_empty() {
739-
return Failure(parser.token.take(), "no rules expected this token in macro call");
740-
}
741721
// Dump all possible `next_items` into `cur_items` for the next iteration.
742722
else if !next_items.is_empty() {
743723
// Now process the next token
744724
cur_items.extend(next_items.drain(..));
745-
parser.bump();
725+
parser.to_mut().bump();
746726
}
747727
// Finally, we have the case where we need to call the black-box parser to get some
748728
// nonterminal.
@@ -754,7 +734,7 @@ pub(super) fn parse(
754734
let match_cur = item.match_cur;
755735
item.push_match(
756736
match_cur,
757-
MatchedNonterminal(Lrc::new(parse_nt(&mut parser, span, ident.name))),
737+
MatchedNonterminal(Lrc::new(parse_nt(parser.to_mut(), span, ident.name))),
758738
);
759739
item.idx += 1;
760740
item.match_cur += 1;

src/librustc_expand/mbe/macro_rules.rs

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander};
1+
use crate::base::{DummyResult, ExpansionData, ExtCtxt, MacResult, TTMacroExpander};
22
use crate::base::{SyntaxExtension, SyntaxExtensionKind};
33
use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind};
44
use crate::mbe;
55
use crate::mbe::macro_check;
6-
use crate::mbe::macro_parser::parse;
6+
use crate::mbe::macro_parser::parse_tt;
77
use crate::mbe::macro_parser::{Error, Failure, Success};
8-
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, NamedParseResult};
8+
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
99
use crate::mbe::transcribe::transcribe;
1010

1111
use rustc_ast_pretty::pprust;
@@ -166,9 +166,9 @@ impl TTMacroExpander for MacroRulesMacroExpander {
166166
}
167167
}
168168

169-
fn trace_macros_note(cx: &mut ExtCtxt<'_>, sp: Span, message: String) {
169+
fn trace_macros_note(cx_expansions: &mut FxHashMap<Span, Vec<String>>, sp: Span, message: String) {
170170
let sp = sp.macro_backtrace().last().map(|trace| trace.call_site).unwrap_or(sp);
171-
cx.expansions.entry(sp).or_default().push(message);
171+
cx_expansions.entry(sp).or_default().push(message);
172172
}
173173

174174
/// Given `lhses` and `rhses`, this is the new macro we create
@@ -184,12 +184,36 @@ fn generic_extension<'cx>(
184184
) -> Box<dyn MacResult + 'cx> {
185185
if cx.trace_macros() {
186186
let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(arg.clone()));
187-
trace_macros_note(cx, sp, msg);
187+
trace_macros_note(&mut cx.expansions, sp, msg);
188188
}
189189

190190
// Which arm's failure should we report? (the one furthest along)
191191
let mut best_failure: Option<(Token, &str)> = None;
192+
193+
// We create a base parser that can be used for the "black box" parts.
194+
// Every iteration needs a fresh copy of that base parser. However, the
195+
// parser is not mutated on many of the iterations, particularly when
196+
// dealing with macros like this:
197+
//
198+
// macro_rules! foo {
199+
// ("a") => (A);
200+
// ("b") => (B);
201+
// ("c") => (C);
202+
// // ... etc. (maybe hundreds more)
203+
// }
204+
//
205+
// as seen in the `html5ever` benchmark. We use a `Cow` so that the base
206+
// parser is only cloned when necessary (upon mutation). Furthermore, we
207+
// reinitialize the `Cow` with the base parser at the start of every
208+
// iteration, so that any mutated parsers are not reused. This is all quite
209+
// hacky, but speeds up the `html5ever` benchmark significantly. (Issue
210+
// 68836 suggests a more comprehensive but more complex change to deal with
211+
// this situation.)
212+
let base_parser = base_parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());
213+
192214
for (i, lhs) in lhses.iter().enumerate() {
215+
let mut parser = Cow::Borrowed(&base_parser);
216+
193217
// try each arm's matchers
194218
let lhs_tt = match *lhs {
195219
mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
@@ -202,7 +226,7 @@ fn generic_extension<'cx>(
202226
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
203227
let mut gated_spans_snaphot = mem::take(&mut *cx.parse_sess.gated_spans.spans.borrow_mut());
204228

205-
match parse_tt(cx, lhs_tt, arg.clone()) {
229+
match parse_tt(&mut parser, lhs_tt) {
206230
Success(named_matches) => {
207231
// The matcher was `Success(..)`ful.
208232
// Merge the gated spans from parsing the matcher with the pre-existing ones.
@@ -232,11 +256,11 @@ fn generic_extension<'cx>(
232256

233257
if cx.trace_macros() {
234258
let msg = format!("to `{}`", pprust::tts_to_string(tts.clone()));
235-
trace_macros_note(cx, sp, msg);
259+
trace_macros_note(&mut cx.expansions, sp, msg);
236260
}
237261

238262
let directory = Directory {
239-
path: Cow::from(cx.current_expansion.module.directory.as_path()),
263+
path: cx.current_expansion.module.directory.clone(),
240264
ownership: cx.current_expansion.directory_ownership,
241265
};
242266
let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false, None);
@@ -269,6 +293,7 @@ fn generic_extension<'cx>(
269293
// Restore to the state before snapshotting and maybe try again.
270294
mem::swap(&mut gated_spans_snaphot, &mut cx.parse_sess.gated_spans.spans.borrow_mut());
271295
}
296+
drop(base_parser);
272297

273298
let (token, label) = best_failure.expect("ran no matchers");
274299
let span = token.span.substitute_dummy(sp);
@@ -286,7 +311,9 @@ fn generic_extension<'cx>(
286311
mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
287312
_ => continue,
288313
};
289-
match parse_tt(cx, lhs_tt, arg.clone()) {
314+
let base_parser =
315+
base_parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());
316+
match parse_tt(&mut Cow::Borrowed(&base_parser), lhs_tt) {
290317
Success(_) => {
291318
if comma_span.is_dummy() {
292319
err.note("you might be missing a comma");
@@ -368,7 +395,8 @@ pub fn compile_declarative_macro(
368395
),
369396
];
370397

371-
let argument_map = match parse(sess, body, &argument_gram, None, true) {
398+
let base_parser = Parser::new(sess, body, None, true, true, rustc_parse::MACRO_ARGUMENTS);
399+
let argument_map = match parse_tt(&mut Cow::Borrowed(&base_parser), &argument_gram) {
372400
Success(m) => m,
373401
Failure(token, msg) => {
374402
let s = parse_failure_msg(&token);
@@ -1184,14 +1212,16 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
11841212
}
11851213
}
11861214

1187-
/// Use this token tree as a matcher to parse given tts.
1188-
fn parse_tt(cx: &ExtCtxt<'_>, mtch: &[mbe::TokenTree], tts: TokenStream) -> NamedParseResult {
1189-
// `None` is because we're not interpolating
1215+
fn base_parser_from_cx<'cx>(
1216+
current_expansion: &'cx ExpansionData,
1217+
sess: &'cx ParseSess,
1218+
tts: TokenStream,
1219+
) -> Parser<'cx> {
11901220
let directory = Directory {
1191-
path: Cow::from(cx.current_expansion.module.directory.as_path()),
1192-
ownership: cx.current_expansion.directory_ownership,
1221+
path: current_expansion.module.directory.clone(),
1222+
ownership: current_expansion.directory_ownership,
11931223
};
1194-
parse(cx.parse_sess(), tts, mtch, Some(directory), true)
1224+
Parser::new(sess, tts, Some(directory), true, true, rustc_parse::MACRO_ARGUMENTS)
11951225
}
11961226

11971227
/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For

src/librustc_parse/lib.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ use syntax::ast;
1212
use syntax::token::{self, Nonterminal};
1313
use syntax::tokenstream::{self, TokenStream, TokenTree};
1414

15-
use std::borrow::Cow;
16-
use std::path::Path;
15+
use std::path::{Path, PathBuf};
1716
use std::str;
1817

1918
use log::info;
@@ -29,8 +28,8 @@ pub mod validate_attr;
2928
pub mod config;
3029

3130
#[derive(Clone)]
32-
pub struct Directory<'a> {
33-
pub path: Cow<'a, Path>,
31+
pub struct Directory {
32+
pub path: PathBuf,
3433
pub ownership: DirectoryOwnership,
3534
}
3635

@@ -274,7 +273,7 @@ pub fn stream_to_parser<'a>(
274273
pub fn stream_to_parser_with_base_dir<'a>(
275274
sess: &'a ParseSess,
276275
stream: TokenStream,
277-
base_dir: Directory<'a>,
276+
base_dir: Directory,
278277
) -> Parser<'a> {
279278
Parser::new(sess, stream, Some(base_dir), true, false, None)
280279
}

src/librustc_parse/parser/mod.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ use syntax::token::{self, DelimToken, Token, TokenKind};
2929
use syntax::tokenstream::{self, DelimSpan, TokenStream, TokenTree, TreeAndJoint};
3030
use syntax::util::comments::{doc_comment_style, strip_doc_comment_decoration};
3131

32-
use std::borrow::Cow;
3332
use std::path::PathBuf;
3433
use std::{cmp, mem, slice};
3534

@@ -108,7 +107,7 @@ pub struct Parser<'a> {
108107
pub prev_span: Span,
109108
restrictions: Restrictions,
110109
/// Used to determine the path to externally loaded source files.
111-
pub(super) directory: Directory<'a>,
110+
pub(super) directory: Directory,
112111
/// `true` to parse sub-modules in other files.
113112
// Public for rustfmt usage.
114113
pub recurse_into_file_modules: bool,
@@ -370,7 +369,7 @@ impl<'a> Parser<'a> {
370369
pub fn new(
371370
sess: &'a ParseSess,
372371
tokens: TokenStream,
373-
directory: Option<Directory<'a>>,
372+
directory: Option<Directory>,
374373
recurse_into_file_modules: bool,
375374
desugar_doc_comments: bool,
376375
subparser_name: Option<&'static str>,
@@ -385,7 +384,7 @@ impl<'a> Parser<'a> {
385384
restrictions: Restrictions::empty(),
386385
recurse_into_file_modules,
387386
directory: Directory {
388-
path: Cow::from(PathBuf::new()),
387+
path: PathBuf::new(),
389388
ownership: DirectoryOwnership::Owned { relative: None },
390389
},
391390
root_module_name: None,
@@ -413,7 +412,7 @@ impl<'a> Parser<'a> {
413412
&sess.source_map().lookup_char_pos(parser.token.span.lo()).file.unmapped_path
414413
{
415414
if let Some(directory_path) = path.parent() {
416-
parser.directory.path = Cow::from(directory_path.to_path_buf());
415+
parser.directory.path = directory_path.to_path_buf();
417416
}
418417
}
419418
}

src/librustc_parse/parser/module.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ impl<'a> Parser<'a> {
285285

286286
fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) {
287287
if let Some(path) = attr::first_attr_value_str_by_name(attrs, sym::path) {
288-
self.directory.path.to_mut().push(&*path.as_str());
288+
self.directory.path.push(&*path.as_str());
289289
self.directory.ownership = DirectoryOwnership::Owned { relative: None };
290290
} else {
291291
// We have to push on the current module name in the case of relative
@@ -297,10 +297,10 @@ impl<'a> Parser<'a> {
297297
if let DirectoryOwnership::Owned { relative } = &mut self.directory.ownership {
298298
if let Some(ident) = relative.take() {
299299
// remove the relative offset
300-
self.directory.path.to_mut().push(&*ident.as_str());
300+
self.directory.path.push(&*ident.as_str());
301301
}
302302
}
303-
self.directory.path.to_mut().push(&*id.as_str());
303+
self.directory.path.push(&*id.as_str());
304304
}
305305
}
306306
}

0 commit comments

Comments
 (0)