Skip to content

Commit 0052ddd

Browse files
committed
introduce a generic SCC computation
1 parent dab206f commit 0052ddd

File tree

5 files changed

+531
-3
lines changed

5 files changed

+531
-3
lines changed

src/librustc_data_structures/graph/implementation/tests.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use graph::*;
11+
use graph::implementation::*;
1212
use std::fmt::Debug;
1313

1414
type TestGraph = Graph<&'static str, &'static str>;

src/librustc_data_structures/graph/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ pub mod dominators;
1414
pub mod implementation;
1515
pub mod iterate;
1616
mod reference;
17+
pub mod scc;
1718

1819
#[cfg(test)]
1920
mod test;
Lines changed: 341 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,341 @@
1+
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
//! Routine to compute the strongly connected components (SCCs) of a
12+
//! graph, as well as the resulting DAG if each SCC is replaced with a
13+
//! node in the graph. This uses Tarjan's algorithm that completes in
14+
//! O(n) time.
15+
16+
use fx::FxHashSet;
17+
use graph::{DirectedGraph, WithNumNodes, WithSuccessors};
18+
use indexed_vec::{Idx, IndexVec};
19+
use std::ops::Range;
20+
21+
mod test;
22+
23+
/// Strongly connected components (SCC) of a graph. The type `N` is
24+
/// the index type for the graph nodes and `S` is the index type for
25+
/// the SCCs. We can map from each node to the SCC that it
26+
/// participates in, and we also have the successors of each SCC.
27+
pub struct Sccs<N: Idx, S: Idx> {
28+
/// For each node, what is the SCC index of the SCC to which it
29+
/// belongs.
30+
scc_indices: IndexVec<N, S>,
31+
32+
/// Data about each SCC.
33+
scc_data: SccData<S>,
34+
}
35+
36+
struct SccData<S: Idx> {
37+
/// For each SCC, the range of `all_successors` where its
38+
/// successors can be found.
39+
ranges: IndexVec<S, Range<usize>>,
40+
41+
/// Contains the succcessors for all the Sccs, concatenated. The
42+
/// range of indices corresponding to a given SCC is found in its
43+
/// SccData.
44+
all_successors: Vec<S>,
45+
}
46+
47+
impl<N: Idx, S: Idx> Sccs<N, S> {
48+
pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self {
49+
SccsConstruction::construct(graph)
50+
}
51+
52+
/// Returns the number of SCCs in the graph.
53+
pub fn num_sccs(&self) -> usize {
54+
self.scc_data.len()
55+
}
56+
57+
/// Returns the SCC to which a node `r` belongs.
58+
pub fn scc(&self, r: N) -> S {
59+
self.scc_indices[r]
60+
}
61+
62+
/// Returns the successor of the given SCC.
63+
pub fn successors(&self, scc: S) -> &[S] {
64+
self.scc_data.successors(scc)
65+
}
66+
}
67+
68+
impl<S: Idx> SccData<S> {
69+
/// Number of SCCs,
70+
fn len(&self) -> usize {
71+
self.ranges.len()
72+
}
73+
74+
/// Returns the successor of the given SCC.
75+
fn successors(&self, scc: S) -> &[S] {
76+
// Annoyingly, `range` does not implement `Copy`, so we have
77+
// to do `range.start..range.end`:
78+
let range = &self.ranges[scc];
79+
&self.all_successors[range.start..range.end]
80+
}
81+
82+
/// Creates a new SCC with `successors` as its successors and
83+
/// returns the resulting index.
84+
fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
85+
// Store the successors on `scc_successors_vec`, remembering
86+
// the range of indices.
87+
let all_successors_start = self.all_successors.len();
88+
self.all_successors.extend(successors);
89+
let all_successors_end = self.all_successors.len();
90+
91+
debug!(
92+
"create_scc({:?}) successors={:?}",
93+
self.ranges.len(),
94+
&self.all_successors[all_successors_start..all_successors_end],
95+
);
96+
97+
self.ranges.push(all_successors_start..all_successors_end)
98+
}
99+
}
100+
101+
struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors + 'c, S: Idx> {
102+
graph: &'c G,
103+
104+
/// The state of each node; used during walk to record the stack
105+
/// and after walk to record what cycle each node ended up being
106+
/// in.
107+
node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
108+
109+
/// The stack of nodes that we are visiting as part of the DFS.
110+
node_stack: Vec<G::Node>,
111+
112+
/// The stack of successors: as we visit a node, we mark our
113+
/// position in this stack, and when we encounter a successor SCC,
114+
/// we push it on the stack. When we complete an SCC, we can pop
115+
/// everything off the stack that was found along the way.
116+
successors_stack: Vec<S>,
117+
118+
/// A set used to strip duplicates. As we accumulate successors
119+
/// into the successors_stack, we sometimes get duplicate entries.
120+
/// We use this set to remove those -- we keep it around between
121+
/// successors to amortize memory allocation costs.
122+
duplicate_set: FxHashSet<S>,
123+
124+
scc_data: SccData<S>,
125+
}
126+
127+
#[derive(Copy, Clone, Debug)]
128+
enum NodeState<N, S> {
129+
/// This node has not yet been visited as part of the DFS.
130+
///
131+
/// After SCC construction is complete, this state ought to be
132+
/// impossible.
133+
NotVisited,
134+
135+
/// This node is currently being walk as part of our DFS. It is on
136+
/// the stack at the depth `depth`.
137+
///
138+
/// After SCC construction is complete, this state ought to be
139+
/// impossible.
140+
BeingVisited { depth: usize },
141+
142+
/// Indicates that this node is a member of the given cycle.
143+
InCycle { scc_index: S },
144+
145+
/// Indicates that this node is a member of whatever cycle
146+
/// `parent` is a member of. This state is transient: whenever we
147+
/// see it, we try to overwrite it with the current state of
148+
/// `parent` (this is the "path compression" step of a union-find
149+
/// algorithm).
150+
InCycleWith { parent: N },
151+
}
152+
153+
#[derive(Copy, Clone, Debug)]
154+
enum WalkReturn<S> {
155+
Cycle { min_depth: usize },
156+
Complete { scc_index: S },
157+
}
158+
159+
impl<'c, G, S> SccsConstruction<'c, G, S>
160+
where
161+
G: DirectedGraph + WithNumNodes + WithSuccessors,
162+
S: Idx,
163+
{
164+
/// Identifies SCCs in the graph `G` and computes the resulting
165+
/// DAG. This uses a variant of [Tarjan's
166+
/// algorithm][wikipedia]. The high-level summary of the algorithm
167+
/// is that we do a depth-first search. Along the way, we keep a
168+
/// stack of each node whose successors are being visited. We
169+
/// track the depth of each node on this stack (there is no depth
170+
/// if the node is not on the stack). When we find that some node
171+
/// N with depth D can reach some other node N' with lower depth
172+
/// D' (i.e., D' < D), we know that N, N', and all nodes in
173+
/// between them on the stack are part of an SCC.
174+
///
175+
/// For each node, we track the lowest depth of any successor we
176+
/// have found, along with that
177+
///
178+
/// [wikipedia]: https://bit.ly/2EZIx84
179+
fn construct(graph: &'c G) -> Sccs<G::Node, S> {
180+
let num_nodes = graph.num_nodes();
181+
182+
let mut this = Self {
183+
graph,
184+
node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
185+
node_stack: Vec::with_capacity(num_nodes),
186+
successors_stack: Vec::new(),
187+
scc_data: SccData {
188+
ranges: IndexVec::new(),
189+
all_successors: Vec::new(),
190+
},
191+
duplicate_set: FxHashSet::default(),
192+
};
193+
194+
let scc_indices = (0..num_nodes)
195+
.map(G::Node::new)
196+
.map(|node| match this.walk_node(0, node) {
197+
WalkReturn::Complete { scc_index } => scc_index,
198+
WalkReturn::Cycle { min_depth } => panic!(
199+
"`walk_node(0, {:?})` returned cycle with depth {:?}",
200+
node, min_depth
201+
),
202+
})
203+
.collect();
204+
205+
Sccs {
206+
scc_indices,
207+
scc_data: this.scc_data,
208+
}
209+
}
210+
211+
fn walk_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
212+
debug!("walk_node(depth = {:?}, node = {:?})", depth, node);
213+
match self.find_state(node) {
214+
NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
215+
216+
NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
217+
218+
NodeState::NotVisited => self.walk_unvisited_node(depth, node),
219+
220+
NodeState::InCycleWith { parent } => panic!(
221+
"`find_state` returned `InCycleWith({:?})`, which ought to be impossible",
222+
parent
223+
),
224+
}
225+
}
226+
227+
/// Fetches the state of the node `r`. If `r` is recorded as being
228+
/// in a cycle with some other node `r2`, then fetches the state
229+
/// of `r2` (and updates `r` to reflect current result). This is
230+
/// basically the "find" part of a standard union-find algorithm
231+
/// (with path compression).
232+
fn find_state(&mut self, r: G::Node) -> NodeState<G::Node, S> {
233+
debug!("find_state(r = {:?} in state {:?})", r, self.node_states[r]);
234+
match self.node_states[r] {
235+
NodeState::InCycle { scc_index } => NodeState::InCycle { scc_index },
236+
NodeState::BeingVisited { depth } => NodeState::BeingVisited { depth },
237+
NodeState::NotVisited => NodeState::NotVisited,
238+
NodeState::InCycleWith { parent } => {
239+
let parent_state = self.find_state(parent);
240+
debug!("find_state: parent_state = {:?}", parent_state);
241+
match parent_state {
242+
NodeState::InCycle { .. } => {
243+
self.node_states[r] = parent_state;
244+
parent_state
245+
}
246+
247+
NodeState::BeingVisited { depth } => {
248+
self.node_states[r] = NodeState::InCycleWith {
249+
parent: self.node_stack[depth],
250+
};
251+
parent_state
252+
}
253+
254+
NodeState::NotVisited | NodeState::InCycleWith { .. } => {
255+
panic!("invalid parent state: {:?}", parent_state)
256+
}
257+
}
258+
}
259+
}
260+
}
261+
262+
/// Walks a node that has never been visited before.
263+
fn walk_unvisited_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
264+
debug!(
265+
"walk_unvisited_node(depth = {:?}, node = {:?})",
266+
depth, node
267+
);
268+
269+
debug_assert!(match self.node_states[node] {
270+
NodeState::NotVisited => true,
271+
_ => false,
272+
});
273+
274+
self.node_states[node] = NodeState::BeingVisited { depth };
275+
self.node_stack.push(node);
276+
277+
// Walk each successor of the node, looking to see if any of
278+
// them can reach a node that is presently on the stack. If
279+
// so, that means they can also reach us.
280+
let mut min_depth = depth;
281+
let mut min_cycle_root = node;
282+
let successors_len = self.successors_stack.len();
283+
for successor_node in self.graph.successors(node) {
284+
debug!(
285+
"walk_unvisited_node: node = {:?} successor_ode = {:?}",
286+
node, successor_node
287+
);
288+
match self.walk_node(depth + 1, successor_node) {
289+
WalkReturn::Cycle {
290+
min_depth: successor_min_depth,
291+
} => {
292+
assert!(successor_min_depth <= depth);
293+
if successor_min_depth < min_depth {
294+
debug!(
295+
"walk_unvisited_node: node = {:?} successor_min_depth = {:?}",
296+
node, successor_min_depth
297+
);
298+
min_depth = successor_min_depth;
299+
min_cycle_root = successor_node;
300+
}
301+
}
302+
303+
WalkReturn::Complete {
304+
scc_index: successor_scc_index,
305+
} => {
306+
debug!(
307+
"walk_unvisited_node: node = {:?} successor_scc_index = {:?}",
308+
node, successor_scc_index
309+
);
310+
self.successors_stack.push(successor_scc_index);
311+
}
312+
}
313+
}
314+
315+
let r = self.node_stack.pop();
316+
debug_assert_eq!(r, Some(node));
317+
318+
if min_depth == depth {
319+
// Note that successor stack may have duplicates, so we
320+
// want to remove those:
321+
let deduplicated_successors = {
322+
let duplicate_set = &mut self.duplicate_set;
323+
duplicate_set.clear();
324+
self.successors_stack
325+
.drain(successors_len..)
326+
.filter(move |&i| duplicate_set.insert(i))
327+
};
328+
let scc_index = self.scc_data.create_scc(deduplicated_successors);
329+
self.node_states[node] = NodeState::InCycle { scc_index };
330+
WalkReturn::Complete { scc_index }
331+
} else {
332+
// We are not the head of the cycle. Return back to our
333+
// caller. They will take ownership of the
334+
// `self.successors` data that we pushed.
335+
self.node_states[node] = NodeState::InCycleWith {
336+
parent: min_cycle_root,
337+
};
338+
WalkReturn::Cycle { min_depth }
339+
}
340+
}
341+
}

0 commit comments

Comments
 (0)