Skip to content

Commit 0d6e07a

Browse files
committed
coverage: Collect a function's coverage mappings into a single list
1 parent b854778 commit 0d6e07a

File tree

3 files changed

+74
-98
lines changed

3 files changed

+74
-98
lines changed

compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs

Lines changed: 44 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};
22

33
use rustc_data_structures::fx::FxIndexSet;
4+
use rustc_index::bit_set::BitSet;
45
use rustc_index::IndexVec;
5-
use rustc_middle::mir::coverage::{CodeRegion, CounterId, CovTerm, ExpressionId, Op};
6+
use rustc_middle::mir::coverage::{CodeRegion, CounterId, CovTerm, ExpressionId, Mapping, Op};
67
use rustc_middle::ty::Instance;
78
use rustc_middle::ty::TyCtxt;
89

@@ -11,28 +12,21 @@ pub struct Expression {
1112
lhs: CovTerm,
1213
op: Op,
1314
rhs: CovTerm,
14-
code_regions: Vec<CodeRegion>,
1515
}
1616

17-
/// Collects all of the coverage regions associated with (a) injected counters, (b) counter
18-
/// expressions (additions or subtraction), and (c) unreachable regions (always counted as zero),
19-
/// for a given Function. This struct also stores the `function_source_hash`,
20-
/// computed during instrumentation, and forwarded with counters.
21-
///
22-
/// Note, it may be important to understand LLVM's definitions of `unreachable` regions versus "gap
23-
/// regions" (or "gap areas"). A gap region is a code region within a counted region (either counter
24-
/// or expression), but the line or lines in the gap region are not executable (such as lines with
25-
/// only whitespace or comments). According to LLVM Code Coverage Mapping documentation, "A count
26-
/// for a gap area is only used as the line execution count if there are no other regions on a
27-
/// line."
17+
/// Holds all of the coverage mapping data associated with a function instance,
18+
/// collected during traversal of `Coverage` statements in the function's MIR.
2819
#[derive(Debug)]
2920
pub struct FunctionCoverage<'tcx> {
3021
instance: Instance<'tcx>,
3122
source_hash: u64,
3223
is_used: bool,
33-
counters: IndexVec<CounterId, Option<Vec<CodeRegion>>>,
24+
25+
/// Tracks which counters have been seen, to avoid duplicate mappings
26+
/// that might be introduced by MIR inlining.
27+
counters_seen: BitSet<CounterId>,
3428
expressions: IndexVec<ExpressionId, Option<Expression>>,
35-
unreachable_regions: Vec<CodeRegion>,
29+
mappings: Vec<Mapping>,
3630
}
3731

3832
impl<'tcx> FunctionCoverage<'tcx> {
@@ -56,9 +50,9 @@ impl<'tcx> FunctionCoverage<'tcx> {
5650
instance,
5751
source_hash: 0, // will be set with the first `add_counter()`
5852
is_used,
59-
counters: IndexVec::from_elem_n(None, coverageinfo.num_counters as usize),
53+
counters_seen: BitSet::new_empty(coverageinfo.num_counters as usize),
6054
expressions: IndexVec::from_elem_n(None, coverageinfo.num_expressions as usize),
61-
unreachable_regions: Vec::new(),
55+
mappings: Vec::new(),
6256
}
6357
}
6458

@@ -80,19 +74,8 @@ impl<'tcx> FunctionCoverage<'tcx> {
8074
/// Adds code regions to be counted by an injected counter intrinsic.
8175
#[instrument(level = "debug", skip(self))]
8276
pub(crate) fn add_counter(&mut self, id: CounterId, code_regions: &[CodeRegion]) {
83-
if code_regions.is_empty() {
84-
return;
85-
}
86-
87-
let slot = &mut self.counters[id];
88-
match slot {
89-
None => *slot = Some(code_regions.to_owned()),
90-
// If this counter ID slot has already been filled, it should
91-
// contain identical information.
92-
Some(ref previous_regions) => assert_eq!(
93-
previous_regions, code_regions,
94-
"add_counter: code regions for id changed"
95-
),
77+
if self.counters_seen.insert(id) {
78+
self.add_mappings(CovTerm::Counter(id), code_regions);
9679
}
9780
}
9881

@@ -120,10 +103,13 @@ impl<'tcx> FunctionCoverage<'tcx> {
120103
self,
121104
);
122105

123-
let expression = Expression { lhs, op, rhs, code_regions: code_regions.to_owned() };
106+
let expression = Expression { lhs, op, rhs };
124107
let slot = &mut self.expressions[expression_id];
125108
match slot {
126-
None => *slot = Some(expression),
109+
None => {
110+
*slot = Some(expression);
111+
self.add_mappings(CovTerm::Expression(expression_id), code_regions);
112+
}
127113
// If this expression ID slot has already been filled, it should
128114
// contain identical information.
129115
Some(ref previous_expression) => assert_eq!(
@@ -137,7 +123,22 @@ impl<'tcx> FunctionCoverage<'tcx> {
137123
#[instrument(level = "debug", skip(self))]
138124
pub(crate) fn add_unreachable_regions(&mut self, code_regions: &[CodeRegion]) {
139125
assert!(!code_regions.is_empty(), "unreachable regions always have code regions");
140-
self.unreachable_regions.extend_from_slice(code_regions);
126+
self.add_mappings(CovTerm::Zero, code_regions);
127+
}
128+
129+
#[instrument(level = "debug", skip(self))]
130+
fn add_mappings(&mut self, kind: CovTerm, code_regions: &[CodeRegion]) {
131+
self.mappings
132+
.extend(code_regions.iter().cloned().map(|code_region| Mapping { kind, code_region }));
133+
}
134+
135+
pub(crate) fn finalize(&mut self) {
136+
self.assert_source_hash_is_set();
137+
138+
self.simplify_expressions();
139+
140+
// Sort all of the collected mappings into a predictable order.
141+
self.mappings.sort_unstable();
141142
}
142143

143144
/// Perform some simplifications to make the final coverage mappings
@@ -146,7 +147,7 @@ impl<'tcx> FunctionCoverage<'tcx> {
146147
/// This method mainly exists to preserve the simplifications that were
147148
/// already being performed by the Rust-side expression renumbering, so that
148149
/// the resulting coverage mappings don't get worse.
149-
pub(crate) fn simplify_expressions(&mut self) {
150+
fn simplify_expressions(&mut self) {
150151
// The set of expressions that either were optimized out entirely, or
151152
// have zero as both of their operands, and will therefore always have
152153
// a value of zero. Other expressions that refer to these as operands
@@ -198,49 +199,17 @@ impl<'tcx> FunctionCoverage<'tcx> {
198199
self.source_hash
199200
}
200201

201-
/// Generate an array of CounterExpressions, and an iterator over all `Counter`s and their
202-
/// associated `Regions` (from which the LLVM-specific `CoverageMapGenerator` will create
203-
/// `CounterMappingRegion`s.
204-
pub fn get_expressions_and_counter_regions(
205-
&self,
206-
) -> (Vec<CounterExpression>, impl Iterator<Item = (Counter, &CodeRegion)>) {
202+
fn assert_source_hash_is_set(&self) {
207203
assert!(
208204
self.source_hash != 0 || !self.is_used,
209205
"No counters provided the source_hash for used function: {:?}",
210206
self.instance
211207
);
212-
213-
let counter_expressions = self.counter_expressions();
214-
// Expression IDs are indices into `self.expressions`, and on the LLVM
215-
// side they will be treated as indices into `counter_expressions`, so
216-
// the two vectors should correspond 1:1.
217-
assert_eq!(self.expressions.len(), counter_expressions.len());
218-
219-
let counter_regions = self.counter_regions();
220-
let expression_regions = self.expression_regions();
221-
let unreachable_regions = self.unreachable_regions();
222-
223-
let counter_regions =
224-
counter_regions.chain(expression_regions.into_iter().chain(unreachable_regions));
225-
(counter_expressions, counter_regions)
226-
}
227-
228-
fn counter_regions(&self) -> impl Iterator<Item = (Counter, &CodeRegion)> {
229-
self.counters
230-
.iter_enumerated()
231-
// Filter out counter IDs that we never saw during MIR traversal.
232-
// This can happen if a counter was optimized out by MIR transforms
233-
// (and replaced with `CoverageKind::Unreachable` instead).
234-
.filter_map(|(id, maybe_code_regions)| Some((id, maybe_code_regions.as_ref()?)))
235-
.flat_map(|(id, code_regions)| {
236-
let counter = Counter::counter_value_reference(id);
237-
code_regions.iter().map(move |region| (counter, region))
238-
})
239208
}
240209

241210
/// Convert this function's coverage expression data into a form that can be
242211
/// passed through FFI to LLVM.
243-
fn counter_expressions(&self) -> Vec<CounterExpression> {
212+
pub(crate) fn expressions_for_ffi(&self) -> Vec<CounterExpression> {
244213
// We know that LLVM will optimize out any unused expressions before
245214
// producing the final coverage map, so there's no need to do the same
246215
// thing on the Rust side unless we're confident we can do much better.
@@ -268,24 +237,12 @@ impl<'tcx> FunctionCoverage<'tcx> {
268237
.collect::<Vec<_>>()
269238
}
270239

271-
fn expression_regions(&self) -> Vec<(Counter, &CodeRegion)> {
272-
// Find all of the expression IDs that weren't optimized out AND have
273-
// one or more attached code regions, and return the corresponding
274-
// mappings as counter/region pairs.
275-
self.expressions
276-
.iter_enumerated()
277-
.filter_map(|(id, maybe_expression)| {
278-
let code_regions = &maybe_expression.as_ref()?.code_regions;
279-
Some((id, code_regions))
280-
})
281-
.flat_map(|(id, code_regions)| {
282-
let counter = Counter::expression(id);
283-
code_regions.iter().map(move |code_region| (counter, code_region))
284-
})
285-
.collect::<Vec<_>>()
286-
}
287-
288-
fn unreachable_regions(&self) -> impl Iterator<Item = (Counter, &CodeRegion)> {
289-
self.unreachable_regions.iter().map(|region| (Counter::ZERO, region))
240+
/// Converts this function's coverage mappings into an intermediate form
241+
/// that will be used by `mapgen` when preparing for FFI.
242+
pub(crate) fn mappings_for_ffi(&self) -> impl Iterator<Item = (Counter, &CodeRegion)> {
243+
self.mappings.iter().map(|&Mapping { kind, ref code_region }| {
244+
let counter = Counter::from_term(kind);
245+
(counter, code_region)
246+
})
290247
}
291248
}

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
6262
let mut function_data = Vec::new();
6363
for (instance, mut function_coverage) in function_coverage_map {
6464
debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
65-
function_coverage.simplify_expressions();
65+
function_coverage.finalize();
66+
let function_coverage = function_coverage;
6667

6768
let mangled_function_name = tcx.symbol_name(instance).name;
6869
let source_hash = function_coverage.source_hash();
@@ -159,20 +160,21 @@ fn encode_mappings_for_function(
159160
global_file_table: &mut GlobalFileTable,
160161
function_coverage: &FunctionCoverage<'_>,
161162
) -> Vec<u8> {
162-
let (expressions, counter_regions) = function_coverage.get_expressions_and_counter_regions();
163+
let expressions = function_coverage.expressions_for_ffi();
164+
let mut counter_regions = function_coverage.mappings_for_ffi().collect::<Vec<_>>();
163165

164-
let mut counter_regions = counter_regions.collect::<Vec<_>>();
165166
if counter_regions.is_empty() {
166167
return Vec::new();
167168
}
168169

169170
let mut virtual_file_mapping = IndexVec::<u32, u32>::new();
170171
let mut mapping_regions = Vec::with_capacity(counter_regions.len());
171172

172-
// Sort the list of (counter, region) mapping pairs by region, so that they
173-
// can be grouped by filename. Prepare file IDs for each filename, and
174-
// prepare the mapping data so that we can pass it through FFI to LLVM.
175-
counter_regions.sort_by_key(|(_counter, region)| *region);
173+
// Sort and group the list of (counter, region) mapping pairs by filename.
174+
// (Preserve any further ordering imposed by `FunctionCoverage`.)
175+
// Prepare file IDs for each filename, and prepare the mapping data so that
176+
// we can pass it through FFI to LLVM.
177+
counter_regions.sort_by_key(|(_counter, region)| region.file_name);
176178
for counter_regions_for_file in
177179
counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name)
178180
{

compiler/rustc_middle/src/mir/coverage.rs

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,19 @@ impl ExpressionId {
3535
pub const START: Self = Self::from_u32(0);
3636
}
3737

38-
/// Enum that can hold a constant zero value, the ID of an physical coverage
39-
/// counter, or the ID of a coverage-counter expression.
38+
/// Enum that can hold the ID of a physical coverage counter, the ID of a
39+
/// coverage-counter expression, or a constant zero value.
4040
///
4141
/// This was originally only used for expression operands (and named `Operand`),
4242
/// but the zero/counter/expression distinction is also useful for representing
4343
/// the value of code/gap mappings, and the true/false arms of branch mappings.
44-
#[derive(Copy, Clone, PartialEq, Eq)]
44+
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
4545
#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
4646
pub enum CovTerm {
47-
Zero,
47+
// Coverage codegen relies on this variant order when it sorts a function's mappings.
4848
Counter(CounterId),
4949
Expression(ExpressionId),
50+
Zero,
5051
}
5152

5253
impl Debug for CovTerm {
@@ -102,6 +103,8 @@ impl Debug for CoverageKind {
102103
#[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq, Eq, PartialOrd, Ord)]
103104
#[derive(TypeFoldable, TypeVisitable)]
104105
pub struct CodeRegion {
106+
// Coverage codegen relies on filenames being sorted first, because it
107+
// needs to group mappings by file.
105108
pub file_name: Symbol,
106109
pub start_line: u32,
107110
pub start_col: u32,
@@ -135,3 +138,17 @@ impl Op {
135138
matches!(self, Self::Subtract)
136139
}
137140
}
141+
142+
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
143+
pub struct Mapping {
144+
// Coverage codegen relies on this field order when it sorts a function's mappings.
145+
pub code_region: CodeRegion,
146+
147+
/// Kind of coverage region that this mapping represents.
148+
///
149+
/// Currently we only support ordinary `Code` regions, so the "kind" is
150+
/// just a term. When we add support for other region kinds allowed by
151+
/// LLVM (e.g. branch regions, expansion regions), we'll need to replace
152+
/// this with a dedicated mapping-kind enum.
153+
pub kind: CovTerm,
154+
}

0 commit comments

Comments
 (0)