Skip to content
This repository was archived by the owner on Jan 7, 2022. It is now read-only.

Commit 14d78dd

Browse files
bors[bot]vext01
andcommitted
Merge #12
12: simpler dominance frontiers r=ltratt a=vext01 These changes are optimisations and simplifications to the machinery around computing dominance frontiers and inserting phi nodes. Most notably instead of using the frontier algorithm from Appel book, we instead use the one from the paper "A Simple,Fast Dominance Algorithm. Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy": https://www.doc.ic.ac.uk/~livshits/classes/CO444H/reading/dom14.pdf The Rust compiler already implements most of this paper, I just had to add the last little bit. I had been using grmtools as a benchmark for these changes and was underwhelmed when I saw that they made no performance difference whatsoever :( I was going to suggest we include these changes anyway, as they make the code shorter and easier to read. But then I tried with these optimisations on the compiler itself and found it shaved off about 2 hours and 15 minutes! I thought it was a fluke, so I repeated the build another two times over the weekend. Sure enough, consistent speedup! http://bencher12.soft-dev.org:8010/#/builders/2/builds/117 http://bencher12.soft-dev.org:8010/#/builders/2/builds/118 http://bencher12.soft-dev.org:8010/#/builders/2/builds/119 And here's a build from before where the build was slower: http://bencher12.soft-dev.org:8010/#/builders/2/builds/110 So it was about 6 hours 15 mins, now it's about 4 hours. The build is now faster than when we had a hand-rolled serialiser. There is one more potential optimisation: remove `RefCell`s. I did a little benchmark and found that refcells have a fair overhead if you use them in a tight loop (especially in release mode). However, to remove the `RefCell`s from our conversion code, we'd need quite a bit of re-factoring. Since I'm getting itchy to move on, I suggest we don't worry about that for now. Co-authored-by: Edd Barrett <vext01@gmail.com>
2 parents cc5cf54 + 29bf888 commit 14d78dd

File tree

2 files changed

+86
-155
lines changed

2 files changed

+86
-155
lines changed

src/librustc_data_structures/graph/dominators/mod.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use super::super::indexed_vec::{Idx, IndexVec};
88
use super::iterate::reverse_post_order;
99
use super::ControlFlowGraph;
10+
use crate::bit_set::BitSet;
1011

1112
use std::fmt;
1213

@@ -159,6 +160,36 @@ impl<'dom, Node: Idx> Iterator for Iter<'dom, Node> {
159160
}
160161
}
161162

163+
pub struct DominatorFrontiers<G: ControlFlowGraph> {
164+
dfs: IndexVec<G::Node, BitSet<G::Node>>,
165+
}
166+
167+
impl<G: ControlFlowGraph> DominatorFrontiers<G> {
168+
pub fn new(graph: &G, doms: &Dominators<G::Node>) -> Self {
169+
let num_nodes = graph.num_nodes();
170+
let mut dfs = IndexVec::from_elem_n(BitSet::new_empty(num_nodes), num_nodes);
171+
172+
for b in (0..num_nodes).map(|i| G::Node::new(i)) {
173+
if graph.predecessors(b).take(2).count() > 1 {
174+
// Iterator isn't clonable, so we have to make a new one.
175+
let preds = graph.predecessors(b);
176+
for p in preds {
177+
let mut runner = p; // Not strictly necessary, but for parity with the paper.
178+
while runner != doms.immediate_dominator(b) {
179+
dfs[runner].insert(b);
180+
runner = doms.immediate_dominator(runner);
181+
}
182+
}
183+
}
184+
}
185+
Self { dfs }
186+
}
187+
188+
pub fn frontier(&self, n: G::Node) -> &BitSet<G::Node> {
189+
&self.dfs[n]
190+
}
191+
}
192+
162193
pub struct DominatorTree<N: Idx> {
163194
root: N,
164195
children: IndexVec<N, Vec<N>>,

src/librustc_yk_sections/emit_tir.rs

Lines changed: 55 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ use std::cell::{Cell, RefCell};
3131
use std::mem::size_of;
3232
use rustc_data_structures::bit_set::BitSet;
3333
use rustc_data_structures::indexed_vec::{IndexVec, Idx};
34-
use rustc_data_structures::graph::dominators::Dominators;
34+
use rustc_data_structures::graph::dominators::DominatorFrontiers;
3535
use ykpack;
3636
use ykpack::LocalIndex as TirLocal;
37+
use rustc_data_structures::fx::FxHashSet;
3738

3839
const SECTION_NAME: &'static str = ".yk_tir";
3940
const TMP_EXT: &'static str = ".yk_tir.tmp";
@@ -53,6 +54,8 @@ struct ConvCx<'a, 'tcx, 'gcx> {
5354
tcx: &'a TyCtxt<'a, 'tcx, 'gcx>,
5455
/// Maps TIR variables to their definition sites.
5556
def_sites: RefCell<Vec<BitSet<BasicBlock>>>,
57+
/// Maps each block to the variable it defines. This is what Appel calls `A_{orig}`.
58+
block_defines: RefCell<IndexVec<BasicBlock, FxHashSet<TirLocal>>>,
5659
/// Monotonically increasing number used to give TIR variables a unique ID.
5760
next_tir_var: Cell<TirLocal>,
5861
/// A mapping from MIR variables to TIR variables.
@@ -64,13 +67,15 @@ struct ConvCx<'a, 'tcx, 'gcx> {
6467
impl<'a, 'tcx, 'gcx> ConvCx<'a, 'tcx, 'gcx> {
6568
fn new(tcx: &'a TyCtxt<'a, 'tcx, 'gcx>, mir: &Mir<'tcx>) -> Self {
6669
let var_map = IndexVec::new();
70+
let num_blks = mir.basic_blocks().len();
6771

6872
Self {
6973
tcx,
7074
def_sites: RefCell::new(Vec::new()),
75+
block_defines: RefCell::new(IndexVec::from_elem_n(FxHashSet::default(), num_blks)),
7176
next_tir_var: Cell::new(0),
7277
var_map: RefCell::new(var_map),
73-
num_blks: mir.basic_blocks().len(),
78+
num_blks: num_blks,
7479
}
7580
}
7681

@@ -101,8 +106,9 @@ impl<'a, 'tcx, 'gcx> ConvCx<'a, 'tcx, 'gcx> {
101106
})
102107
}
103108

104-
fn def_sites(self) -> Vec<BitSet<BasicBlock>> {
105-
self.def_sites.into_inner()
109+
/// Finalise the conversion context, returning the definition sites and block defines mappings.
110+
fn done(self) -> (Vec<BitSet<BasicBlock>>, IndexVec<BasicBlock, FxHashSet<TirLocal>>) {
111+
(self.def_sites.into_inner(), self.block_defines.into_inner())
106112
}
107113

108114
/// Add `bb` as a definition site of the TIR variable `var`.
@@ -118,6 +124,9 @@ impl<'a, 'tcx, 'gcx> ConvCx<'a, 'tcx, 'gcx> {
118124
BitSet::new_empty(self.num_blks));
119125
}
120126
sites[var_usize].insert(bb);
127+
128+
// Also push into the inverse mapping (blocks to defined vars).
129+
self.block_defines.borrow_mut()[bb].insert(var);
121130
}
122131
}
123132

@@ -183,18 +192,19 @@ fn do_generate_tir<'a, 'tcx, 'gcx>(
183192
let mir = tcx.optimized_mir(*def_id);
184193
let ccx = ConvCx::new(tcx, mir);
185194

186-
// Get an initial TIR (not yet in SSA form).
187-
let pre_ssa_pack = (&ccx, def_id, tcx.optimized_mir(*def_id)).to_pack();
188-
189-
// Add PHI nodes.
190-
let phied_pack = PhiInserter::new(mir, pre_ssa_pack, ccx.def_sites()).pack();
195+
let mut pack = (&ccx, def_id, tcx.optimized_mir(*def_id)).to_pack();
196+
{
197+
let ykpack::Pack::Mir(ykpack::Mir{ref mut blocks, ..}) = pack;
198+
let (def_sites, block_defines) = ccx.done();
199+
insert_phis(blocks, mir, def_sites, block_defines);
200+
}
191201

192202
// FIXME - rename variables with fresh SSA names.
193203

194204
if let Some(ref mut e) = enc {
195-
e.serialise(phied_pack)?;
205+
e.serialise(pack)?;
196206
} else {
197-
write!(textdump_file.as_ref().unwrap(), "{}", phied_pack)?;
207+
write!(textdump_file.as_ref().unwrap(), "{}", pack)?;
198208
}
199209
}
200210
}
@@ -208,160 +218,50 @@ fn do_generate_tir<'a, 'tcx, 'gcx>(
208218
Ok(tir_path)
209219
}
210220

211-
/// Lazy computation of dominance frontiers.
212-
///
213-
/// See p404 of the 2nd edition of the Appel book mentioned above.
221+
/// Insert PHI nodes into the initial pre-SSA TIR pack.
214222
///
215-
/// Since the frontier of one node may depend on the frontiers of other nodes (depending upon their
216-
/// relationships), we cache the frontiers so as to avoid computing things more than once.
217-
struct DominatorFrontiers<'a, 'tcx> {
218-
/// The MIR we are working on.
219-
mir: &'a Mir<'tcx>,
220-
/// The dominators of the above MIR.
221-
doms: Dominators<BasicBlock>,
222-
/// The dominance frontier cache. Lazily computed. `None` means as-yet uncomputed.
223-
df: IndexVec<BasicBlock, Option<BitSet<BasicBlock>>>,
224-
/// The number of MIR (and thus TIR) blocks.
225-
num_blks: usize,
226-
}
227-
228-
impl<'a, 'tcx> DominatorFrontiers<'a, 'tcx> {
229-
fn new(mir: &'a Mir<'tcx>) -> Self {
230-
let num_blks = mir.basic_blocks().len();
231-
let df = IndexVec::from_elem_n(None, num_blks);
232-
233-
Self {
234-
mir,
235-
doms: mir.dominators(),
236-
df,
237-
num_blks: mir.basic_blocks().len(),
238-
}
239-
}
240-
241-
/// Get the dominance frontier of the given basic block, computing it if we have not already
242-
/// done so. Since computing the frontiers for a full CFG requests the same frontiers
243-
/// repeatedly, the requested frontier (and its dependencies) are inserted into a cache to
244-
/// avoid duplicate computations.
245-
fn frontier(&mut self, n: BasicBlock) -> &BitSet<BasicBlock> {
246-
if self.df[n].is_none() {
247-
// We haven't yet computed dominator frontiers for this node. Compute them.
248-
let mut s: BitSet<BasicBlock> = BitSet::new_empty(self.num_blks);
249-
250-
// Append what Appel calls 'DF_{local}[n]'.
251-
for y in self.mir.basic_blocks()[n].terminator().successors() {
252-
if self.doms.immediate_dominator(*y) != n {
253-
s.insert(*y);
254-
}
255-
}
256-
257-
// The second stage of the algorithm needs the children nodes in the dominator tree.
258-
let mut children = Vec::new();
259-
for (b, _) in self.mir.basic_blocks().iter_enumerated() {
260-
let b = BasicBlock::from_u32(b.as_u32());
261-
if self.doms.is_dominated_by(b, n) && b != n {
262-
children.push(b);
263-
// Force the frontier of `b` into the cache (`self.df`). Doing this here avoids
264-
// a simultaneous mutable + immutable borrow of `self` in the final stage of
265-
// the algorithm.
266-
self.frontier(b);
267-
}
268-
}
269-
270-
// Append what Appel calls `DF_{up}[c]` for each dominator tree child `c` of `n`.
271-
for c in children {
272-
for w in self.df[c].as_ref().unwrap().iter() {
273-
if n == w || !self.doms.is_dominated_by(w, n) {
274-
s.insert(w);
223+
/// Algorithm reference:
224+
/// Bottom of p406 of 'Modern Compiler Implementation in Java (2nd ed.)' by Andrew Appel.
225+
fn insert_phis(blocks: &mut Vec<ykpack::BasicBlock>, mir: &Mir,
226+
mut def_sites: Vec<BitSet<BasicBlock>>,
227+
a_orig: IndexVec<BasicBlock, FxHashSet<TirLocal>>) {
228+
let doms = mir.dominators();
229+
let df = DominatorFrontiers::new(mir, &doms);
230+
let num_tir_vars = def_sites.len();
231+
let num_tir_blks = a_orig.len();
232+
233+
let mut a_phi: Vec<BitSet<TirLocal>> = Vec::with_capacity(num_tir_blks);
234+
a_phi.resize(num_tir_blks, BitSet::new_empty(num_tir_vars));
235+
236+
// We don't need the elements of `def_sites` again past this point, so we can take them out
237+
// of `def_sites` with a draining iterator and mutate in-place.
238+
for (a, mut w) in def_sites.drain(..).enumerate() {
239+
while !w.is_empty() {
240+
let n = bitset_pop(&mut w);
241+
for y in df.frontier(n).iter() {
242+
let y_usize = y.index();
243+
// `def_sites` is guaranteed to only contain indices expressible by `u32`.
244+
let a_u32 = a as u32;
245+
if !a_phi[y_usize].contains(a_u32) {
246+
a_phi[y_usize].insert(a_u32);
247+
if !a_orig[y].contains(&a_u32) {
248+
// The assertion in `tir_var()` has already checked the cast is safe.
249+
insert_phi(&mut blocks[y_usize], a as u32, mir.predecessors_for(y).len());
250+
w.insert(y);
275251
}
276252
}
277253
}
278-
self.df[n] = Some(s);
279254
}
280-
281-
self.df[n].as_ref().unwrap()
282255
}
283256
}
284257

285-
/// This struct deals with inserting PHI nodes into the initial pre-SSA TIR pack.
286-
///
287-
/// See the bottom of p406 of the 2nd edition of the Appel book mentioned above.
288-
struct PhiInserter<'a, 'tcx> {
289-
mir: &'a Mir<'tcx>,
290-
pack: ykpack::Pack,
291-
def_sites: Vec<BitSet<BasicBlock>>,
258+
fn insert_phi(block: &mut ykpack::BasicBlock, var: TirLocal, arity: usize) {
259+
let lhs = ykpack::Place::Local(var);
260+
let rhs_vars = (0..arity).map(|_| lhs.clone()).collect();
261+
let rhs = ykpack::Rvalue::Phi(rhs_vars);
262+
block.stmts.insert(0, ykpack::Statement::Assign(lhs, rhs));
292263
}
293264

294-
impl<'a, 'tcx> PhiInserter<'a, 'tcx> {
295-
fn new(mir: &'a Mir<'tcx>, pack: ykpack::Pack, def_sites: Vec<BitSet<BasicBlock>>) -> Self {
296-
Self {
297-
mir,
298-
pack,
299-
def_sites,
300-
}
301-
}
302-
303-
/// Insert PHI nodes, returning the mutated pack.
304-
fn pack(mut self) -> ykpack::Pack {
305-
let mut df = DominatorFrontiers::new(self.mir);
306-
let num_tir_vars = self.def_sites.len();
307-
308-
// We first need a mapping from block to the variables it defines. Appel calls this
309-
// `A_{orig}`. We can derive this from our definition sites.
310-
let (a_orig, num_tir_blks) = {
311-
let ykpack::Pack::Mir(ykpack::Mir{ref blocks, ..}) = self.pack;
312-
let num_tir_blks = blocks.len();
313-
314-
let mut a_orig: IndexVec<BasicBlock, BitSet<TirLocal>> =
315-
IndexVec::from_elem_n(BitSet::new_empty(num_tir_vars), num_tir_blks);
316-
for (a, def_blks) in self.def_sites.iter().enumerate() {
317-
for bb in def_blks.iter() {
318-
// `def_sites` is guaranteed to have at most `u32::max_value()` items.
319-
a_orig[bb].insert(a as u32);
320-
}
321-
}
322-
(a_orig, num_tir_blks)
323-
};
324-
325-
let mut a_phi: Vec<BitSet<TirLocal>> = Vec::with_capacity(num_tir_blks);
326-
a_phi.resize(num_tir_blks, BitSet::new_empty(num_tir_vars));
327-
// We don't need the elements of `def_sites` again past this point, so we can take them out
328-
// of `def_sites` with a draining iterator and mutate in-place.
329-
for (a, mut w) in self.def_sites.drain(..).enumerate() {
330-
while !w.is_empty() {
331-
let n = bitset_pop(&mut w);
332-
for y in df.frontier(n).iter() {
333-
let y_usize = y.index();
334-
// `self.def_sites` is guaranteed to only contain indices expressible by `u32`.
335-
let a_u32 = a as u32;
336-
if !a_phi[y_usize].contains(a_u32) {
337-
// Appel would insert the Phi node here. We use a second pass to keep the
338-
// borrow checker happy (self is already borrowed in this loop).
339-
a_phi[y_usize].insert(a_u32);
340-
if !a_orig[y].contains(a_u32) {
341-
w.insert(y);
342-
}
343-
}
344-
}
345-
}
346-
}
347-
348-
// `a_phi` now tells use where to insert PHI nodes.
349-
{
350-
let ykpack::Pack::Mir(ykpack::Mir{ref mut blocks, ..}) = self.pack;
351-
for (bb, mut bb_data) in blocks.iter_mut().enumerate() {
352-
for a in a_phi[bb].iter() {
353-
let lhs = ykpack::Place::Local(a);
354-
let num_preds = self.mir.predecessors_for(BasicBlock::new(bb)).len();
355-
let rhs_vars = (0..num_preds).map(|_| lhs.clone()).collect();
356-
let rhs = ykpack::Rvalue::Phi(rhs_vars);
357-
bb_data.stmts.insert(0, ykpack::Statement::Assign(lhs, rhs));
358-
}
359-
}
360-
}
361-
362-
self.pack
363-
}
364-
}
365265

366266
/// The trait for converting MIR data structures into a bytecode packs.
367267
trait ToPack<T> {

0 commit comments

Comments
 (0)