Skip to content

Commit 67da45f

Browse files
committed
Avoid instantiating many Parser structs in generic_extension.
Currently, every iteration of the main loop in `generic_extension` instantiates a `Parser`, which is expensive because `Parser` is a large type. Many of those instantiations are only used immutably, particularly for simple-but-repetitive macros of the sort seen in `html5ever` and PR 68836. This commit initializes a single parser outside the loop, and then uses `Cow` to avoid cloning it except for the mutating iterations. This speeds up `html5ever` runs by up to 15%.
1 parent a19edd6 commit 67da45f

File tree

2 files changed

+52
-45
lines changed

2 files changed

+52
-45
lines changed

src/librustc_expand/mbe/macro_parser.rs

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -78,20 +78,19 @@ use crate::mbe::{self, TokenTree};
7878

7979
use rustc_ast_pretty::pprust;
8080
use rustc_parse::parser::{FollowedByType, Parser, PathStyle};
81-
use rustc_parse::Directory;
8281
use rustc_session::parse::ParseSess;
8382
use rustc_span::symbol::{kw, sym, Symbol};
8483
use syntax::ast::{Ident, Name};
8584
use syntax::ptr::P;
8685
use syntax::token::{self, DocComment, Nonterminal, Token};
87-
use syntax::tokenstream::TokenStream;
8886

8987
use rustc_errors::{FatalError, PResult};
9088
use rustc_span::Span;
9189
use smallvec::{smallvec, SmallVec};
9290

9391
use rustc_data_structures::fx::FxHashMap;
9492
use rustc_data_structures::sync::Lrc;
93+
use std::borrow::Cow;
9594
use std::collections::hash_map::Entry::{Occupied, Vacant};
9695
use std::mem;
9796
use std::ops::{Deref, DerefMut};
@@ -613,28 +612,9 @@ fn inner_parse_loop<'root, 'tt>(
613612
Success(())
614613
}
615614

616-
/// Use the given sequence of token trees (`ms`) as a matcher. Match the given token stream `tts`
617-
/// against it and return the match.
618-
///
619-
/// # Parameters
620-
///
621-
/// - `sess`: The session into which errors are emitted
622-
/// - `tts`: The tokenstream we are matching against the pattern `ms`
623-
/// - `ms`: A sequence of token trees representing a pattern against which we are matching
624-
/// - `directory`: Information about the file locations (needed for the black-box parser)
625-
/// - `recurse_into_modules`: Whether or not to recurse into modules (needed for the black-box
626-
/// parser)
627-
pub(super) fn parse(
628-
sess: &ParseSess,
629-
tts: TokenStream,
630-
ms: &[TokenTree],
631-
directory: Option<Directory<'_>>,
632-
recurse_into_modules: bool,
633-
) -> NamedParseResult {
634-
// Create a parser that can be used for the "black box" parts.
635-
let mut parser =
636-
Parser::new(sess, tts, directory, recurse_into_modules, true, rustc_parse::MACRO_ARGUMENTS);
637-
615+
/// Use the given sequence of token trees (`ms`) as a matcher. Match the token
616+
/// stream from the given `parser` against it and return the match.
617+
pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> NamedParseResult {
638618
// A queue of possible matcher positions. We initialize it with the matcher position in which
639619
// the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then
640620
// processes all of these possible matcher positions and produces possible next positions into
@@ -659,7 +639,7 @@ pub(super) fn parse(
659639
// parsing from the black-box parser done. The result is that `next_items` will contain a
660640
// bunch of possible next matcher positions in `next_items`.
661641
match inner_parse_loop(
662-
sess,
642+
parser.sess,
663643
&mut cur_items,
664644
&mut next_items,
665645
&mut eof_items,
@@ -684,7 +664,7 @@ pub(super) fn parse(
684664
if eof_items.len() == 1 {
685665
let matches =
686666
eof_items[0].matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap());
687-
return nameize(sess, ms, matches);
667+
return nameize(parser.sess, ms, matches);
688668
} else if eof_items.len() > 1 {
689669
return Error(
690670
parser.token.span,
@@ -736,13 +716,13 @@ pub(super) fn parse(
736716
// If there are no possible next positions AND we aren't waiting for the black-box parser,
737717
// then there is a syntax error.
738718
else if bb_items.is_empty() && next_items.is_empty() {
739-
return Failure(parser.token.take(), "no rules expected this token in macro call");
719+
return Failure(parser.token.clone(), "no rules expected this token in macro call");
740720
}
741721
// Dump all possible `next_items` into `cur_items` for the next iteration.
742722
else if !next_items.is_empty() {
743723
// Now process the next token
744724
cur_items.extend(next_items.drain(..));
745-
parser.bump();
725+
parser.to_mut().bump();
746726
}
747727
// Finally, we have the case where we need to call the black-box parser to get some
748728
// nonterminal.
@@ -754,7 +734,7 @@ pub(super) fn parse(
754734
let match_cur = item.match_cur;
755735
item.push_match(
756736
match_cur,
757-
MatchedNonterminal(Lrc::new(parse_nt(&mut parser, span, ident.name))),
737+
MatchedNonterminal(Lrc::new(parse_nt(parser.to_mut(), span, ident.name))),
758738
);
759739
item.idx += 1;
760740
item.match_cur += 1;

src/librustc_expand/mbe/macro_rules.rs

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander};
1+
use crate::base::{DummyResult, ExpansionData, ExtCtxt, MacResult, TTMacroExpander};
22
use crate::base::{SyntaxExtension, SyntaxExtensionKind};
33
use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind};
44
use crate::mbe;
55
use crate::mbe::macro_check;
6-
use crate::mbe::macro_parser::parse;
6+
use crate::mbe::macro_parser::parse_tt;
77
use crate::mbe::macro_parser::{Error, Failure, Success};
8-
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, NamedParseResult};
8+
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
99
use crate::mbe::transcribe::transcribe;
1010

1111
use rustc_ast_pretty::pprust;
@@ -166,9 +166,9 @@ impl TTMacroExpander for MacroRulesMacroExpander {
166166
}
167167
}
168168

169-
fn trace_macros_note(cx: &mut ExtCtxt<'_>, sp: Span, message: String) {
169+
fn trace_macros_note(cx_expansions: &mut FxHashMap<Span, Vec<String>>, sp: Span, message: String) {
170170
let sp = sp.macro_backtrace().last().map(|trace| trace.call_site).unwrap_or(sp);
171-
cx.expansions.entry(sp).or_default().push(message);
171+
cx_expansions.entry(sp).or_default().push(message);
172172
}
173173

174174
/// Given `lhses` and `rhses`, this is the new macro we create
@@ -184,11 +184,33 @@ fn generic_extension<'cx>(
184184
) -> Box<dyn MacResult + 'cx> {
185185
if cx.trace_macros() {
186186
let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(arg.clone()));
187-
trace_macros_note(cx, sp, msg);
187+
trace_macros_note(&mut cx.expansions, sp, msg);
188188
}
189189

190190
// Which arm's failure should we report? (the one furthest along)
191191
let mut best_failure: Option<(Token, &str)> = None;
192+
193+
// We create a base parser that can be used for the "black box" parts.
194+
// Every iteration needs a fresh copy of that parser. However, the parser
195+
// is not mutated on many of the iterations, particularly when dealing with
196+
// macros like this:
197+
//
198+
// macro_rules! foo {
199+
// ("a") => (A);
200+
// ("b") => (B);
201+
// ("c") => (C);
202+
// // ... etc. (maybe hundreds more)
203+
// }
204+
//
205+
// as seen in the `html5ever` benchmark. We use a `Cow` so that the base
206+
// parser is only cloned when necessary (upon mutation). Furthermore, we
207+
// reinitialize the `Cow` with the base parser at the start of every
208+
// iteration, so that any mutated parsers are not reused. This is all quite
209+
// hacky, but speeds up the `html5ever` benchmark significantly. (Issue
210+
// 68836 suggests a more comprehensive but more complex change to deal with
211+
// this situation.)
212+
let parser = parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());
213+
192214
for (i, lhs) in lhses.iter().enumerate() {
193215
// try each arm's matchers
194216
let lhs_tt = match *lhs {
@@ -202,7 +224,7 @@ fn generic_extension<'cx>(
202224
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
203225
let mut gated_spans_snaphot = mem::take(&mut *cx.parse_sess.gated_spans.spans.borrow_mut());
204226

205-
match parse_tt(cx, lhs_tt, arg.clone()) {
227+
match parse_tt(&mut Cow::Borrowed(&parser), lhs_tt) {
206228
Success(named_matches) => {
207229
// The matcher was `Success(..)`ful.
208230
// Merge the gated spans from parsing the matcher with the pre-existing ones.
@@ -232,7 +254,7 @@ fn generic_extension<'cx>(
232254

233255
if cx.trace_macros() {
234256
let msg = format!("to `{}`", pprust::tts_to_string(tts.clone()));
235-
trace_macros_note(cx, sp, msg);
257+
trace_macros_note(&mut cx.expansions, sp, msg);
236258
}
237259

238260
let directory = Directory {
@@ -269,6 +291,7 @@ fn generic_extension<'cx>(
269291
// Restore to the state before snapshotting and maybe try again.
270292
mem::swap(&mut gated_spans_snaphot, &mut cx.parse_sess.gated_spans.spans.borrow_mut());
271293
}
294+
drop(parser);
272295

273296
let (token, label) = best_failure.expect("ran no matchers");
274297
let span = token.span.substitute_dummy(sp);
@@ -286,7 +309,8 @@ fn generic_extension<'cx>(
286309
mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
287310
_ => continue,
288311
};
289-
match parse_tt(cx, lhs_tt, arg.clone()) {
312+
let parser = parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());
313+
match parse_tt(&mut Cow::Borrowed(&parser), lhs_tt) {
290314
Success(_) => {
291315
if comma_span.is_dummy() {
292316
err.note("you might be missing a comma");
@@ -368,7 +392,8 @@ pub fn compile_declarative_macro(
368392
),
369393
];
370394

371-
let argument_map = match parse(sess, body, &argument_gram, None, true) {
395+
let parser = Parser::new(sess, body, None, true, true, rustc_parse::MACRO_ARGUMENTS);
396+
let argument_map = match parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) {
372397
Success(m) => m,
373398
Failure(token, msg) => {
374399
let s = parse_failure_msg(&token);
@@ -1184,14 +1209,16 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
11841209
}
11851210
}
11861211

1187-
/// Use this token tree as a matcher to parse given tts.
1188-
fn parse_tt(cx: &ExtCtxt<'_>, mtch: &[mbe::TokenTree], tts: TokenStream) -> NamedParseResult {
1189-
// `None` is because we're not interpolating
1212+
fn parser_from_cx<'cx>(
1213+
current_expansion: &'cx ExpansionData,
1214+
sess: &'cx ParseSess,
1215+
tts: TokenStream,
1216+
) -> Parser<'cx> {
11901217
let directory = Directory {
1191-
path: Cow::from(cx.current_expansion.module.directory.as_path()),
1192-
ownership: cx.current_expansion.directory_ownership,
1218+
path: Cow::from(current_expansion.module.directory.as_path()),
1219+
ownership: current_expansion.directory_ownership,
11931220
};
1194-
parse(cx.parse_sess(), tts, mtch, Some(directory), true)
1221+
Parser::new(sess, tts, Some(directory), true, true, rustc_parse::MACRO_ARGUMENTS)
11951222
}
11961223

11971224
/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For

0 commit comments

Comments
 (0)