Skip to content

Commit bc4de87

Browse files
authored
Merge pull request #155 from sx-aurora-dev/feature/merge-upstream-20220208
Feature/merge upstream 20220208
2 parents ad0434b + 0b41a5d commit bc4de87

File tree

287 files changed

+7292
-3768
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

287 files changed

+7292
-3768
lines changed

bolt/include/bolt/Passes/BinaryPasses.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,16 @@ class ShortenInstructions : public BinaryFunctionPass {
295295

296296
/// Perform simple peephole optimizations.
297297
class Peepholes : public BinaryFunctionPass {
298+
public:
299+
enum PeepholeOpts : char {
300+
PEEP_NONE = 0x0,
301+
PEEP_DOUBLE_JUMPS = 0x2,
302+
PEEP_TAILCALL_TRAPS = 0x4,
303+
PEEP_USELESS_BRANCHES = 0x8,
304+
PEEP_ALL = 0xf
305+
};
306+
307+
private:
298308
uint64_t NumDoubleJumps{0};
299309
uint64_t TailCallTraps{0};
300310
uint64_t NumUselessCondBranches{0};

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -105,29 +105,19 @@ MinBranchClusters("min-branch-clusters",
105105
cl::Hidden,
106106
cl::cat(BoltOptCategory));
107107

108-
enum PeepholeOpts : char {
109-
PEEP_NONE = 0x0,
110-
PEEP_DOUBLE_JUMPS = 0x2,
111-
PEEP_TAILCALL_TRAPS = 0x4,
112-
PEEP_USELESS_BRANCHES = 0x8,
113-
PEEP_ALL = 0xf
114-
};
115-
116-
static cl::list<PeepholeOpts>
117-
Peepholes("peepholes",
118-
cl::CommaSeparated,
119-
cl::desc("enable peephole optimizations"),
120-
cl::value_desc("opt1,opt2,opt3,..."),
121-
cl::values(
122-
clEnumValN(PEEP_NONE, "none", "disable peepholes"),
123-
clEnumValN(PEEP_DOUBLE_JUMPS, "double-jumps",
124-
"remove double jumps when able"),
125-
clEnumValN(PEEP_TAILCALL_TRAPS, "tailcall-traps", "insert tail call traps"),
126-
clEnumValN(PEEP_USELESS_BRANCHES, "useless-branches",
127-
"remove useless conditional branches"),
128-
clEnumValN(PEEP_ALL, "all", "enable all peephole optimizations")),
129-
cl::ZeroOrMore,
130-
cl::cat(BoltOptCategory));
108+
static cl::list<Peepholes::PeepholeOpts> Peepholes(
109+
"peepholes", cl::CommaSeparated, cl::desc("enable peephole optimizations"),
110+
cl::value_desc("opt1,opt2,opt3,..."),
111+
cl::values(clEnumValN(Peepholes::PEEP_NONE, "none", "disable peepholes"),
112+
clEnumValN(Peepholes::PEEP_DOUBLE_JUMPS, "double-jumps",
113+
"remove double jumps when able"),
114+
clEnumValN(Peepholes::PEEP_TAILCALL_TRAPS, "tailcall-traps",
115+
"insert tail call traps"),
116+
clEnumValN(Peepholes::PEEP_USELESS_BRANCHES, "useless-branches",
117+
"remove useless conditional branches"),
118+
clEnumValN(Peepholes::PEEP_ALL, "all",
119+
"enable all peephole optimizations")),
120+
cl::ZeroOrMore, cl::cat(BoltOptCategory));
131121

132122
static cl::opt<unsigned>
133123
PrintFuncStat("print-function-statistics",
@@ -1092,20 +1082,20 @@ void Peepholes::removeUselessCondBranches(BinaryFunction &Function) {
10921082
}
10931083

10941084
void Peepholes::runOnFunctions(BinaryContext &BC) {
1095-
const char Opts = std::accumulate(
1096-
opts::Peepholes.begin(), opts::Peepholes.end(), 0,
1097-
[](const char A, const opts::PeepholeOpts B) { return A | B; });
1098-
if (Opts == opts::PEEP_NONE || !BC.isX86())
1085+
const char Opts =
1086+
std::accumulate(opts::Peepholes.begin(), opts::Peepholes.end(), 0,
1087+
[](const char A, const PeepholeOpts B) { return A | B; });
1088+
if (Opts == PEEP_NONE)
10991089
return;
11001090

11011091
for (auto &It : BC.getBinaryFunctions()) {
11021092
BinaryFunction &Function = It.second;
11031093
if (shouldOptimize(Function)) {
1104-
if (Opts & opts::PEEP_DOUBLE_JUMPS)
1094+
if (Opts & PEEP_DOUBLE_JUMPS)
11051095
NumDoubleJumps += fixDoubleJumps(Function, false);
1106-
if (Opts & opts::PEEP_TAILCALL_TRAPS)
1096+
if (Opts & PEEP_TAILCALL_TRAPS)
11071097
addTailcallTraps(Function);
1108-
if (Opts & opts::PEEP_USELESS_BRANCHES)
1098+
if (Opts & PEEP_USELESS_BRANCHES)
11091099
removeUselessCondBranches(Function);
11101100
assert(Function.validateCFG());
11111101
}

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,13 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
798798
createShortJmp(Seq, Target, Ctx, /*IsTailCall*/ true);
799799
}
800800

801+
bool createTrap(MCInst &Inst) const override {
802+
Inst.clear();
803+
Inst.setOpcode(AArch64::BRK);
804+
Inst.addOperand(MCOperand::createImm(1));
805+
return true;
806+
}
807+
801808
bool convertJmpToTailCall(MCInst &Inst) override {
802809
setTailCall(Inst);
803810
return true;

bolt/runtime/CMakeLists.txt

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
1-
cmake_minimum_required(VERSION 3.1.0)
2-
31
include(CheckIncludeFiles)
42

5-
set(CMAKE_CXX_STANDARD 11)
6-
set(CMAKE_CXX_STANDARD_REQUIRED ON)
73
set(CMAKE_CXX_EXTENSIONS OFF)
84

95
project(libbolt_rt_project)
@@ -21,10 +17,16 @@ add_library(bolt_rt_hugify STATIC
2117
${CMAKE_CURRENT_BINARY_DIR}/config.h
2218
)
2319

20+
set(BOLT_RT_FLAGS
21+
-ffreestanding
22+
-fno-exceptions
23+
-fno-rtti
24+
-fno-stack-protector)
25+
2426
# Don't let the compiler think it can create calls to standard libs
25-
target_compile_options(bolt_rt_instr PRIVATE -ffreestanding -fno-exceptions -fno-rtti -fno-stack-protector -fPIE)
27+
target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE)
2628
target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
27-
target_compile_options(bolt_rt_hugify PRIVATE -ffreestanding -fno-exceptions -fno-rtti -fno-stack-protector)
29+
target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS})
2830
target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
2931

3032
install(TARGETS bolt_rt_instr DESTINATION lib)
@@ -38,9 +40,6 @@ if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*")
3840
target_include_directories(bolt_rt_instr_osx PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
3941
target_compile_options(bolt_rt_instr_osx PRIVATE
4042
-target x86_64-apple-darwin19.6.0
41-
-ffreestanding
42-
-fno-exceptions
43-
-fno-rtti
44-
-fno-stack-protector)
43+
${BOLT_RT_FLAGS})
4544
install(TARGETS bolt_rt_instr_osx DESTINATION lib)
4645
endif()

bolt/runtime/common.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111
#include <cstddef>
1212
#include <cstdint>
1313

14+
#include "config.h"
15+
16+
#ifdef HAVE_ELF_H
17+
#include <elf.h>
18+
#endif
19+
1420
#else
1521

1622
typedef __SIZE_TYPE__ size_t;
@@ -32,12 +38,6 @@ typedef int int32_t;
3238

3339
#endif
3440

35-
#include "config.h"
36-
37-
#ifdef HAVE_ELF_H
38-
#include <elf.h>
39-
#endif
40-
4141
// Save all registers while keeping 16B stack alignment
4242
#define SAVE_ALL \
4343
"push %%rax\n" \

bolt/test/AArch64/double_jump.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// A contrived example to test the double jump removal peephole.
2+
3+
// RUN: %clang %cflags -O0 %s -o %t.exe
4+
// RUN: llvm-bolt %t.exe -o %t.bolt -peepholes=double-jumps | \
5+
// RUN: FileCheck %s -check-prefix=CHECKBOLT
6+
// RUN: llvm-objdump -d %t.bolt | FileCheck %s
7+
8+
// CHECKBOLT: BOLT-INFO: Peephole: 1 double jumps patched.
9+
10+
// CHECK: <_Z3foom>:
11+
// CHECK-NEXT: sub sp, sp, #16
12+
// CHECK-NEXT: str x0, [sp, #8]
13+
// CHECK-NEXT: ldr [[REG:x[0-28]+]], [sp, #8]
14+
// CHECK-NEXT: cmp [[REG]], #0
15+
// CHECK-NEXT: b.eq {{.*}} <_Z3foom+0x34>
16+
// CHECK-NEXT: add [[REG]], [[REG]], #1
17+
// CHECK-NEXT: add [[REG]], [[REG]], #1
18+
// CHECK-NEXT: cmp [[REG]], #2
19+
// CHECK-NEXT: b.eq {{.*}} <_Z3foom+0x28>
20+
// CHECK-NEXT: add [[REG]], [[REG]], #1
21+
// CHECK-NEXT: mov [[REG]], x1
22+
// CHECK-NEXT: ldr x1, [sp]
23+
// CHECK-NEXT: b {{.*}} <bar>
24+
// CHECK-NEXT: ldr x1, [sp]
25+
// CHECK-NEXT: add [[REG]], [[REG]], #1
26+
// CHECK-NEXT: b {{.*}} <bar>
27+
28+
extern "C" unsigned long bar(unsigned long count) { return count + 1; }
29+
30+
unsigned long foo(unsigned long count) {
31+
asm volatile(" cmp %0,#0\n"
32+
" b.eq .L7\n"
33+
" add %0, %0, #1\n"
34+
" b .L1\n"
35+
".L1: b .L2\n"
36+
".L2: add %0, %0, #1\n"
37+
" cmp %0, #2\n"
38+
" b.ne .L3\n"
39+
" b .L4\n"
40+
".L3: b .L5\n"
41+
".L5: add %0, %0, #1\n"
42+
".L4: mov %0,x1\n"
43+
" ldr x1, [sp]\n"
44+
" b .L6\n"
45+
".L7: ldr x1, [sp]\n"
46+
" add %0, %0, #1\n"
47+
" b .L6\n"
48+
".L6: b bar\n"
49+
:
50+
: "r"(count)
51+
:);
52+
return count;
53+
}
54+
55+
int main(int argc, const char *argv[]) { return foo(38); }

bolt/test/AArch64/tailcall_traps.s

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
## Tests the peephole that adds trap instructions following indirect tail calls.
2+
3+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
4+
# RUN: %s -o %t.o
5+
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
6+
# RUN: llvm-bolt %t.exe -o %t.bolt -peepholes=tailcall-traps \
7+
# RUN: -print-peepholes -funcs=foo,bar 2>&1 | FileCheck %s
8+
9+
# CHECK: Binary Function "foo"
10+
# CHECK: br x0 # TAILCALL
11+
# CHECK-NEXT: brk #0x1
12+
# CHECK: End of Function "foo"
13+
14+
# CHECK: Binary Function "bar"
15+
# CHECK: b foo # TAILCALL
16+
# CHECK: End of Function "bar"
17+
18+
.text
19+
.align 4
20+
.global main
21+
.type main, %function
22+
main:
23+
nop
24+
ret
25+
.size main, .-main
26+
27+
.global foo
28+
.type foo, %function
29+
foo:
30+
br x0
31+
.size foo, .-foo
32+
33+
.global bar
34+
.type bar, %function
35+
bar:
36+
b foo
37+
.size bar, .-bar

bolt/test/X86/Inputs/double_jump.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
/*
2-
* A contrived example to test the double jump removal peephole.
3-
*/
1+
// A contrived example to test the double jump removal peephole.
42

53
extern "C" unsigned long bar(unsigned long count) {
64
return count + 1;

bolt/test/lit.cfg.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,12 @@
5656
llvm_config.use_default_substitutions()
5757

5858
llvm_config.config.environment['CLANG'] = config.bolt_clang
59-
llvm_config.config.environment['LLD'] = config.bolt_lld
6059
llvm_config.use_clang()
61-
llvm_config.use_llvm_tool('lld', required=True, search_env='LLD')
60+
61+
llvm_config.config.environment['LD_LLD'] = config.bolt_lld
62+
ld_lld = llvm_config.use_llvm_tool('ld.lld', required=True, search_env='LD_LLD')
63+
llvm_config.config.available_features.add('ld.lld')
64+
llvm_config.add_tool_substitutions([ToolSubst(r'ld\.lld', command=ld_lld)])
6265

6366
config.substitutions.append(('%cflags', '-no-pie -gdwarf-4'))
6467
config.substitutions.append(('%cxxflags', '-no-pie -gdwarf-4'))

clang/docs/LanguageExtensions.rst

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -595,22 +595,26 @@ elementwise to the input.
595595

596596
Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±infinity
597597

598-
========================================= ================================================================ =========================================
599-
Name Operation Supported element types
600-
========================================= ================================================================ =========================================
601-
T __builtin_elementwise_abs(T x) return the absolute value of a number x; the absolute value of signed integer and floating point types
602-
the most negative integer remains the most negative integer
603-
T __builtin_elementwise_ceil(T x) return the smallest integral value greater than or equal to x floating point types
604-
T __builtin_elementwise_floor(T x) return the largest integral value less than or equal to x floating point types
605-
T __builtin_elementwise_roundeven(T x) round x to the nearest integer value in floating point format, floating point types
606-
rounding halfway cases to even (that is, to the nearest value
607-
that is an even integer), regardless of the current rounding
608-
direction.
609-
T__builtin_elementwise_trunc(T x) return the integral value nearest to but no larger in floating point types
610-
magnitude than x
611-
T __builtin_elementwise_max(T x, T y) return x or y, whichever is larger integer and floating point types
612-
T __builtin_elementwise_min(T x, T y) return x or y, whichever is smaller integer and floating point types
613-
========================================= ================================================================ =========================================
598+
=========================================== ================================================================ =========================================
599+
Name Operation Supported element types
600+
=========================================== ================================================================ =========================================
601+
T __builtin_elementwise_abs(T x) return the absolute value of a number x; the absolute value of signed integer and floating point types
602+
the most negative integer remains the most negative integer
603+
T __builtin_elementwise_ceil(T x) return the smallest integral value greater than or equal to x floating point types
604+
T __builtin_elementwise_floor(T x) return the largest integral value less than or equal to x floating point types
605+
T __builtin_elementwise_roundeven(T x) round x to the nearest integer value in floating point format, floating point types
606+
rounding halfway cases to even (that is, to the nearest value
607+
that is an even integer), regardless of the current rounding
608+
direction.
609+
T__builtin_elementwise_trunc(T x) return the integral value nearest to but no larger in floating point types
610+
magnitude than x
611+
T __builtin_elementwise_max(T x, T y) return x or y, whichever is larger integer and floating point types
612+
T __builtin_elementwise_min(T x, T y) return x or y, whichever is smaller integer and floating point types
613+
T __builtin_elementwise_add_sat(T x, T y) return the sum of x and y, clamped to the range of integer types
614+
representable values for the signed/unsigned integer type.
615+
T __builtin_elementwise_sub_sat(T x, T y) return the difference of x and y, clamped to the range of integer types
616+
representable values for the signed/unsigned integer type.
617+
=========================================== ================================================================ =========================================
614618

615619

616620
*Reduction Builtins*

0 commit comments

Comments
 (0)