Skip to content

Commit 3968ebd

Browse files
authored
[BOLT] Keep multi-entry functions simple in aggregation mode (llvm#128253)
BOLT used to mark multi-entry functions non-simple in non-relocation mode with the reasoning that we can't move them due to potentially undetected references. However, in aggregation mode it doesn't apply as BOLT doesn't perform optimizations. Relax this constraint in case of an aggregation job. Test Plan: added entry-point-fallthru.s
1 parent a778930 commit 3968ebd

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "bolt/Core/DynoStats.h"
1616
#include "bolt/Core/HashUtilities.h"
1717
#include "bolt/Core/MCPlusBuilder.h"
18+
#include "bolt/Utils/CommandLineOpts.h"
1819
#include "bolt/Utils/NameResolver.h"
1920
#include "bolt/Utils/NameShortener.h"
2021
#include "bolt/Utils/Utils.h"
@@ -1753,8 +1754,8 @@ void BinaryFunction::postProcessEntryPoints() {
17531754
// In non-relocation mode there's potentially an external undetectable
17541755
// reference to the entry point and hence we cannot move this entry
17551756
// point. Optimizing without moving could be difficult.
1756-
// In BAT mode, register any known entry points for CFG construction.
1757-
if (!BC.HasRelocations && !BC.HasBATSection)
1757+
// In aggregation, register any known entry points for CFG construction.
1758+
if (!BC.HasRelocations && !opts::AggregateOnly)
17581759
setSimple(false);
17591760

17601761
const uint32_t Offset = KV.first;

bolt/test/X86/entry-point-fallthru.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
## Checks that fallthroughs spanning entry points are accepted in aggregation
2+
## mode.
3+
4+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
5+
# RUN: ld.lld %t.o -o %t
6+
# RUN: link_fdata %s %t %t.preagg PREAGG
7+
# RUN: perf2bolt %t -p %t.preagg --pa -o %t.fdata | FileCheck %s
8+
# CHECK: traces mismatching disassembled function contents: 0
9+
10+
.globl main
11+
main:
12+
.cfi_startproc
13+
vmovaps %zmm31,%zmm3
14+
15+
next:
16+
add $0x4,%r9
17+
add $0x40,%r10
18+
dec %r14
19+
Ljmp:
20+
jne main
21+
# PREAGG: T #Ljmp# #main# #Ljmp# 1
22+
ret
23+
.cfi_endproc
24+
.size main,.-main

0 commit comments

Comments
 (0)