Skip to content

Commit 753498e

Browse files
shawbyoungaaupov
andauthored
[BOLT] Add sink block to flow CFG in profile inference (#95047)
Summary: Constructing an artificial sink block for the flow CFG in stale profile inference to allow profile inference to be run on CFGs with blocks that terminate and have successors. Testing Plan: Added infer_no_exits.test to verify that functions with exit blocks with a landing pad are covered by stale profile inference. --------- Co-authored-by: Amir Ayupov <fads93@gmail.com>
1 parent a02010b commit 753498e

File tree

3 files changed

+216
-12
lines changed

3 files changed

+216
-12
lines changed

bolt/lib/Profile/StaleProfileMatching.cpp

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -307,21 +307,21 @@ void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
307307
BB->setHash(BlendedHashes[I].combine());
308308
}
309309
}
310-
310+
// TODO: mediate the difference between flow function construction here in BOLT
311+
// and in the compiler by splitting blocks with exception throwing calls at the
312+
// call and adding the landing pad as the successor.
311313
/// Create a wrapper flow function to use with the profile inference algorithm,
312314
/// and initialize its jumps and metadata.
313315
FlowFunction
314316
createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
315317
FlowFunction Func;
316318

317319
// Add a special "dummy" source so that there is always a unique entry point.
318-
// Because of the extra source, for all other blocks in FlowFunction it holds
319-
// that Block.Index == BB->getIndex() + 1
320320
FlowBlock EntryBlock;
321321
EntryBlock.Index = 0;
322322
Func.Blocks.push_back(EntryBlock);
323323

324-
// Create FlowBlock for every basic block in the binary function
324+
// Create FlowBlock for every basic block in the binary function.
325325
for (const BinaryBasicBlock *BB : BlockOrder) {
326326
Func.Blocks.emplace_back();
327327
FlowBlock &Block = Func.Blocks.back();
@@ -331,7 +331,12 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
331331
"incorrectly assigned basic block index");
332332
}
333333

334-
// Create FlowJump for each jump between basic blocks in the binary function
334+
// Add a special "dummy" sink block so there is always a unique sink.
335+
FlowBlock SinkBlock;
336+
SinkBlock.Index = Func.Blocks.size();
337+
Func.Blocks.push_back(SinkBlock);
338+
339+
// Create FlowJump for each jump between basic blocks in the binary function.
335340
std::vector<uint64_t> InDegree(Func.Blocks.size(), 0);
336341
for (const BinaryBasicBlock *SrcBB : BlockOrder) {
337342
std::unordered_set<const BinaryBasicBlock *> UniqueSuccs;
@@ -348,6 +353,16 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
348353
InDegree[Jump.Target]++;
349354
UniqueSuccs.insert(DstBB);
350355
}
356+
// TODO: set jump from exit block to landing pad to Unlikely.
357+
// If the block is an exit, add a dummy edge from it to the sink block.
358+
if (UniqueSuccs.empty()) {
359+
Func.Jumps.emplace_back();
360+
FlowJump &Jump = Func.Jumps.back();
361+
Jump.Source = SrcBB->getIndex() + 1;
362+
Jump.Target = Func.Blocks.size() - 1;
363+
InDegree[Jump.Target]++;
364+
}
365+
351366
// Collect jumps to landing pads
352367
for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) {
353368
// Ignoring parallel edges
@@ -364,9 +379,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
364379
}
365380

366381
// Add dummy edges to the extra sources. If there are multiple entry blocks,
367-
// add an unlikely edge from 0 to the subsequent ones
382+
// add an unlikely edge from 0 to the subsequent ones. Skips the sink block.
368383
assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors");
369-
for (uint64_t I = 1; I < Func.Blocks.size(); I++) {
384+
for (uint64_t I = 1; I < Func.Blocks.size() - 1; I++) {
370385
const BinaryBasicBlock *BB = BlockOrder[I - 1];
371386
if (BB->isEntryPoint() || InDegree[I] == 0) {
372387
Func.Jumps.emplace_back();
@@ -400,7 +415,7 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
400415
size_t matchWeightsByHashes(
401416
BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
402417
const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) {
403-
assert(Func.Blocks.size() == BlockOrder.size() + 1);
418+
assert(Func.Blocks.size() == BlockOrder.size() + 2);
404419

405420
std::vector<FlowBlock *> Blocks;
406421
std::vector<BlendedBlockHash> BlendedHashes;
@@ -592,9 +607,9 @@ bool canApplyInference(const FlowFunction &Func,
592607
opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size())
593608
return false;
594609

595-
bool HasExitBlocks = llvm::any_of(
596-
Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
597-
if (!HasExitBlocks)
610+
// Returns false if the artificial sink block has no predecessors meaning
611+
// there are no exit blocks.
612+
if (Func.Blocks[Func.Blocks.size() - 1].isEntry())
598613
return false;
599614

600615
return true;
@@ -631,7 +646,7 @@ void assignProfile(BinaryFunction &BF,
631646
FlowFunction &Func) {
632647
BinaryContext &BC = BF.getBinaryContext();
633648

634-
assert(Func.Blocks.size() == BlockOrder.size() + 1);
649+
assert(Func.Blocks.size() == BlockOrder.size() + 2);
635650
for (uint64_t I = 0; I < BlockOrder.size(); I++) {
636651
FlowBlock &Block = Func.Blocks[I + 1];
637652
BinaryBasicBlock *BB = BlockOrder[I];
@@ -653,6 +668,9 @@ void assignProfile(BinaryFunction &BF,
653668
if (Jump->Flow == 0)
654669
continue;
655670

671+
// Skips the artificial sink block.
672+
if (Jump->Target == Func.Blocks.size() - 1)
673+
continue;
656674
BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1];
657675
// Check if the edge corresponds to a regular jump or a landing pad
658676
if (BB->getSuccessor(SuccBB.getLabel())) {

bolt/test/X86/Inputs/infer_no_exits.s

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
.text
2+
.file "infer_no_exits.cpp"
3+
.globl _Z3fooi # -- Begin function _Z3fooi
4+
.p2align 4, 0x90
5+
.type _Z3fooi,@function
6+
_Z3fooi: # @_Z3fooi
7+
.Lfunc_begin0:
8+
.cfi_startproc
9+
.cfi_personality 155, DW.ref.__gxx_personality_v0
10+
.cfi_lsda 27, .Lexception0
11+
# %bb.0: # %entry
12+
pushq %rbp
13+
.cfi_def_cfa_offset 16
14+
.cfi_offset %rbp, -16
15+
movq %rsp, %rbp
16+
.cfi_def_cfa_register %rbp
17+
subq $32, %rsp
18+
movl %edi, -4(%rbp)
19+
cmpl $0, -4(%rbp)
20+
jne .LBB0_4
21+
# %bb.1: # %if.then
22+
movl $16, %edi
23+
callq __cxa_allocate_exception@PLT
24+
movq %rax, %rdi
25+
movq %rdi, %rax
26+
movq %rax, -32(%rbp) # 8-byte Spill
27+
.Ltmp0:
28+
leaq .L.str(%rip), %rsi
29+
callq _ZNSt12out_of_rangeC1EPKc@PLT
30+
.Ltmp1:
31+
jmp .LBB0_2
32+
.LBB0_2: # %invoke.cont
33+
movq -32(%rbp), %rdi # 8-byte Reload
34+
movq _ZTISt12out_of_range@GOTPCREL(%rip), %rsi
35+
movq _ZNSt12out_of_rangeD1Ev@GOTPCREL(%rip), %rdx
36+
callq __cxa_throw@PLT
37+
.LBB0_3: # %lpad
38+
.Ltmp2:
39+
movq -32(%rbp), %rdi # 8-byte Reload
40+
movq %rax, %rcx
41+
movl %edx, %eax
42+
movq %rcx, -16(%rbp)
43+
movl %eax, -20(%rbp)
44+
callq __cxa_free_exception@PLT
45+
jmp .LBB0_5
46+
.LBB0_4: # %if.end
47+
xorl %eax, %eax
48+
addq $32, %rsp
49+
popq %rbp
50+
.cfi_def_cfa %rsp, 8
51+
retq
52+
.LBB0_5: # %eh.resume
53+
.cfi_def_cfa %rbp, 16
54+
movq -16(%rbp), %rdi
55+
callq _Unwind_Resume@PLT
56+
.Lfunc_end0:
57+
.size _Z3fooi, .Lfunc_end0-_Z3fooi
58+
.cfi_endproc
59+
.section .gcc_except_table,"a",@progbits
60+
.p2align 2, 0x0
61+
GCC_except_table0:
62+
.Lexception0:
63+
.byte 255 # @LPStart Encoding = omit
64+
.byte 255 # @TType Encoding = omit
65+
.byte 1 # Call site Encoding = uleb128
66+
.uleb128 .Lcst_end0-.Lcst_begin0
67+
.Lcst_begin0:
68+
.uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
69+
.uleb128 .Ltmp0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Ltmp0
70+
.byte 0 # has no landing pad
71+
.byte 0 # On action: cleanup
72+
.uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 2 <<
73+
.uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1
74+
.uleb128 .Ltmp2-.Lfunc_begin0 # jumps to .Ltmp2
75+
.byte 0 # On action: cleanup
76+
.uleb128 .Ltmp1-.Lfunc_begin0 # >> Call Site 3 <<
77+
.uleb128 .Lfunc_end0-.Ltmp1 # Call between .Ltmp1 and .Lfunc_end0
78+
.byte 0 # has no landing pad
79+
.byte 0 # On action: cleanup
80+
.Lcst_end0:
81+
.p2align 2, 0x0
82+
# -- End function
83+
.text
84+
.globl main # -- Begin function main
85+
.p2align 4, 0x90
86+
.type main,@function
87+
main: # @main
88+
.Lfunc_begin1:
89+
.cfi_startproc
90+
.cfi_personality 155, DW.ref.__gxx_personality_v0
91+
.cfi_lsda 27, .Lexception1
92+
# %bb.0: # %entry
93+
pushq %rbp
94+
.cfi_def_cfa_offset 16
95+
.cfi_offset %rbp, -16
96+
movq %rsp, %rbp
97+
.cfi_def_cfa_register %rbp
98+
subq $32, %rsp
99+
movl $0, -4(%rbp)
100+
jmp .Ltmp3
101+
.LBB1_2: # %lpad
102+
movq %rax, %rcx
103+
movl %edx, %eax
104+
movq %rcx, -16(%rbp)
105+
movl %eax, -20(%rbp)
106+
.Lcatch:
107+
# %bb.3: # %catch
108+
movq -16(%rbp), %rdi
109+
callq __cxa_begin_catch@PLT
110+
callq _ZSt9terminatev@PLT
111+
.Ltmp3:
112+
xorl %edi, %edi
113+
callq _Z3fooi
114+
xorl %eax, %eax
115+
addq $32, %rsp
116+
popq %rbp
117+
.cfi_def_cfa %rsp, 8
118+
retq
119+
.Lgarbage:
120+
121+
.Lfunc_end1:
122+
.size main, .Lfunc_end1-main
123+
.cfi_endproc
124+
.section .gcc_except_table,"a",@progbits
125+
.p2align 2, 0x0
126+
GCC_except_table1:
127+
.Lexception1:
128+
.byte 255 # @LPStart Encoding = omit
129+
.byte 155 # @TType Encoding = indirect pcrel sdata4
130+
.uleb128 .Lttbase0-.Lttbaseref0
131+
.Lttbaseref0:
132+
.byte 1 # Call site Encoding = uleb128
133+
.uleb128 .Lcst_end1-.Lcst_begin1
134+
.Lcst_begin1:
135+
.uleb128 .Ltmp3-.Lfunc_begin1 # >> Call Site 1 <<
136+
.uleb128 .Lgarbage-.Ltmp3 # Call between .Ltmp3 and .Ltmp4
137+
.uleb128 .LBB1_2-.Lfunc_begin1 # jumps to .LBB1_2
138+
.byte 1 # On action: 1
139+
.uleb128 .Lcatch-.Lfunc_begin1 # >> Call Site 2 <<
140+
.uleb128 .Lfunc_end1-.Ltmp3 # Call between .Ltmp4 and .Lfunc_end1
141+
# .uleb128 .LBB1_2-.Lfunc_begin1 # jumps to .LBB1_2
142+
.byte 0 # On action: cleanup
143+
.byte 0 # On action: cleanup
144+
.Lcst_end1:
145+
.byte 1 # >> Action Record 1 <<
146+
# Catch TypeInfo 1
147+
.byte 0 # No further actions
148+
.p2align 2, 0x0
149+
# >> Catch TypeInfos <<
150+
.long 0 # TypeInfo 1
151+
.Lttbase0:
152+
.p2align 2, 0x0
153+
# -- End function
154+
.type .L.str,@object # @.str
155+
.section .rodata.str1.1,"aMS",@progbits,1
156+
.L.str:
157+
.asciz "bad value"
158+
.size .L.str, 10
159+
160+
.hidden DW.ref.__gxx_personality_v0
161+
.weak DW.ref.__gxx_personality_v0
162+
.section .data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat
163+
.p2align 3, 0x0
164+
.type DW.ref.__gxx_personality_v0,@object
165+
.section ".note.GNU-stack","",@progbits
166+
.addrsig
167+
.addrsig_sym _Z3fooi
168+
.addrsig_sym __cxa_allocate_exception
169+
.addrsig_sym __gxx_personality_v0
170+
.addrsig_sym __cxa_free_exception
171+
.addrsig_sym __cxa_throw
172+
.addrsig_sym __cxa_begin_catch
173+
.addrsig_sym _ZSt9terminatev
174+
.addrsig_sym _Unwind_Resume
175+
.addrsig_sym _ZTISt12out_of_range

bolt/test/X86/infer_no_exits.test

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
## This verifies that functions where an exit block has a landing pad are covered by stale profile inference.
2+
# RUN: %clangxx %cxxflags %p/Inputs/infer_no_exits.s -o %t.exe
3+
# RUN: link_fdata %s %t.exe %t.preagg PREAGG
4+
# RUN: perf2bolt %t.exe -p %t.preagg --pa -o %t.fdata -w %t.yaml
5+
# RUN: sed -i '0,/hash:/s/0x[0-9A-Fa-f]\{16\}/0x0000000000000000/' %t.yaml
6+
# RUN: llvm-bolt %t.exe -data %t.yaml -o %t.null -v=1 -infer-stale-profile 2>&1 \
7+
# RUN: | FileCheck %s
8+
9+
# PREAGG: B X:0 #main# 1 0
10+
11+
# CHECK: BOLT-INFO: inferred profile for 1 (100.00% of profiled, 100.00% of stale) functions responsible for -nan% samples (0 out of 0)

0 commit comments

Comments
 (0)