sx-aurora-dev
diff --git a/‎bolt/include/bolt/Passes/BinaryPasses.h
Lines changed: 10 additions & 0 deletions b/‎bolt/include/bolt/Passes/BinaryPasses.h
Lines changed: 10 additions & 0 deletions
diff --git a/‎bolt/lib/Passes/BinaryPasses.cpp
Lines changed: 20 additions & 30 deletions b/‎bolt/lib/Passes/BinaryPasses.cpp
Lines changed: 20 additions & 30 deletions
diff --git a/‎bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Lines changed: 7 additions & 0 deletions b/‎bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Lines changed: 7 additions & 0 deletions
diff --git a/‎bolt/runtime/CMakeLists.txt
Lines changed: 9 additions & 10 deletions b/‎bolt/runtime/CMakeLists.txt
Lines changed: 9 additions & 10 deletions
diff --git a/‎bolt/runtime/common.h
Lines changed: 6 additions & 6 deletions b/‎bolt/runtime/common.h
Lines changed: 6 additions & 6 deletions
diff --git a/‎bolt/test/AArch64/double_jump.cpp
Lines changed: 55 additions & 0 deletions b/‎bolt/test/AArch64/double_jump.cpp
Lines changed: 55 additions & 0 deletions
diff --git a/‎bolt/test/AArch64/tailcall_traps.s
Lines changed: 37 additions & 0 deletions b/‎bolt/test/AArch64/tailcall_traps.s
Lines changed: 37 additions & 0 deletions
diff --git a/‎bolt/test/X86/Inputs/double_jump.cpp
Lines changed: 1 addition & 3 deletions b/‎bolt/test/X86/Inputs/double_jump.cpp
Lines changed: 1 addition & 3 deletions
diff --git a/‎bolt/test/lit.cfg.py
Lines changed: 5 additions & 2 deletions b/‎bolt/test/lit.cfg.py
Lines changed: 5 additions & 2 deletions
diff --git a/‎clang/docs/LanguageExtensions.rst
Lines changed: 20 additions & 16 deletions b/‎clang/docs/LanguageExtensions.rst
Lines changed: 20 additions & 16 deletions
@@ -295,6 +295,16 @@ class ShortenInstructions : public BinaryFunctionPass {
 
 /// Perform simple peephole optimizations.
 class Peepholes : public BinaryFunctionPass {
+public:
+  enum PeepholeOpts : char {
+    PEEP_NONE = 0x0,
+    PEEP_DOUBLE_JUMPS = 0x2,
+    PEEP_TAILCALL_TRAPS = 0x4,
+    PEEP_USELESS_BRANCHES = 0x8,
+    PEEP_ALL = 0xf
+  };
+
+private:
   uint64_t NumDoubleJumps{0};
   uint64_t TailCallTraps{0};
   uint64_t NumUselessCondBranches{0};
 
@@ -105,29 +105,19 @@ MinBranchClusters("min-branch-clusters",
   cl::Hidden,
   cl::cat(BoltOptCategory));
 
-enum PeepholeOpts : char {
-  PEEP_NONE             = 0x0,
-  PEEP_DOUBLE_JUMPS     = 0x2,
-  PEEP_TAILCALL_TRAPS   = 0x4,
-  PEEP_USELESS_BRANCHES = 0x8,
-  PEEP_ALL              = 0xf
-};
-
-static cl::list<PeepholeOpts>
-Peepholes("peepholes",
-  cl::CommaSeparated,
-  cl::desc("enable peephole optimizations"),
-  cl::value_desc("opt1,opt2,opt3,..."),
-  cl::values(
-    clEnumValN(PEEP_NONE, "none", "disable peepholes"),
-    clEnumValN(PEEP_DOUBLE_JUMPS, "double-jumps",
-               "remove double jumps when able"),
-    clEnumValN(PEEP_TAILCALL_TRAPS, "tailcall-traps", "insert tail call traps"),
-    clEnumValN(PEEP_USELESS_BRANCHES, "useless-branches",
-               "remove useless conditional branches"),
-    clEnumValN(PEEP_ALL, "all", "enable all peephole optimizations")),
-  cl::ZeroOrMore,
-  cl::cat(BoltOptCategory));
+static cl::list<Peepholes::PeepholeOpts> Peepholes(
+    "peepholes", cl::CommaSeparated, cl::desc("enable peephole optimizations"),
+    cl::value_desc("opt1,opt2,opt3,..."),
+    cl::values(clEnumValN(Peepholes::PEEP_NONE, "none", "disable peepholes"),
+               clEnumValN(Peepholes::PEEP_DOUBLE_JUMPS, "double-jumps",
+                          "remove double jumps when able"),
+               clEnumValN(Peepholes::PEEP_TAILCALL_TRAPS, "tailcall-traps",
+                          "insert tail call traps"),
+               clEnumValN(Peepholes::PEEP_USELESS_BRANCHES, "useless-branches",
+                          "remove useless conditional branches"),
+               clEnumValN(Peepholes::PEEP_ALL, "all",
+                          "enable all peephole optimizations")),
+    cl::ZeroOrMore, cl::cat(BoltOptCategory));
 
 static cl::opt<unsigned>
 PrintFuncStat("print-function-statistics",
@@ -1092,20 +1082,20 @@ void Peepholes::removeUselessCondBranches(BinaryFunction &Function) {
 }
 
 void Peepholes::runOnFunctions(BinaryContext &BC) {
-  const char Opts = std::accumulate(
-      opts::Peepholes.begin(), opts::Peepholes.end(), 0,
-      [](const char A, const opts::PeepholeOpts B) { return A | B; });
-  if (Opts == opts::PEEP_NONE || !BC.isX86())
+  const char Opts =
+      std::accumulate(opts::Peepholes.begin(), opts::Peepholes.end(), 0,
+                      [](const char A, const PeepholeOpts B) { return A | B; });
+  if (Opts == PEEP_NONE)
     return;
 
   for (auto &It : BC.getBinaryFunctions()) {
     BinaryFunction &Function = It.second;
     if (shouldOptimize(Function)) {
-      if (Opts & opts::PEEP_DOUBLE_JUMPS)
+      if (Opts & PEEP_DOUBLE_JUMPS)
         NumDoubleJumps += fixDoubleJumps(Function, false);
-      if (Opts & opts::PEEP_TAILCALL_TRAPS)
+      if (Opts & PEEP_TAILCALL_TRAPS)
         addTailcallTraps(Function);
-      if (Opts & opts::PEEP_USELESS_BRANCHES)
+      if (Opts & PEEP_USELESS_BRANCHES)
         removeUselessCondBranches(Function);
       assert(Function.validateCFG());
     }
 
@@ -798,6 +798,13 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     createShortJmp(Seq, Target, Ctx, /*IsTailCall*/ true);
   }
 
+  bool createTrap(MCInst &Inst) const override {
+    Inst.clear();
+    Inst.setOpcode(AArch64::BRK);
+    Inst.addOperand(MCOperand::createImm(1));
+    return true;
+  }
+
   bool convertJmpToTailCall(MCInst &Inst) override {
     setTailCall(Inst);
     return true;
 
@@ -1,9 +1,5 @@
-cmake_minimum_required(VERSION 3.1.0)
-
 include(CheckIncludeFiles)
 
-set(CMAKE_CXX_STANDARD 11)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
 project(libbolt_rt_project)
@@ -21,10 +17,16 @@ add_library(bolt_rt_hugify STATIC
   ${CMAKE_CURRENT_BINARY_DIR}/config.h
   )
 
+set(BOLT_RT_FLAGS
+  -ffreestanding
+  -fno-exceptions
+  -fno-rtti
+  -fno-stack-protector)
+
 # Don't let the compiler think it can create calls to standard libs
-target_compile_options(bolt_rt_instr PRIVATE -ffreestanding -fno-exceptions -fno-rtti -fno-stack-protector -fPIE)
+target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE)
 target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
-target_compile_options(bolt_rt_hugify PRIVATE -ffreestanding -fno-exceptions -fno-rtti -fno-stack-protector)
+target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS})
 target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
 
 install(TARGETS bolt_rt_instr DESTINATION lib)
@@ -38,9 +40,6 @@ if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*")
   target_include_directories(bolt_rt_instr_osx PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
   target_compile_options(bolt_rt_instr_osx PRIVATE
     -target x86_64-apple-darwin19.6.0
-    -ffreestanding
-    -fno-exceptions
-    -fno-rtti
-    -fno-stack-protector)
+    ${BOLT_RT_FLAGS})
   install(TARGETS bolt_rt_instr_osx DESTINATION lib)
 endif()
@@ -11,6 +11,12 @@
 #include <cstddef>
 #include <cstdint>
 
+#include "config.h"
+
+#ifdef HAVE_ELF_H
+#include <elf.h>
+#endif
+
 #else
 
 typedef __SIZE_TYPE__ size_t;
@@ -32,12 +38,6 @@ typedef int int32_t;
 
 #endif
 
-#include "config.h"
-
-#ifdef HAVE_ELF_H
-#include <elf.h>
-#endif
-
 // Save all registers while keeping 16B stack alignment
 #define SAVE_ALL                                                               \
   "push %%rax\n"                                                               \
 
@@ -0,0 +1,55 @@
+// A contrived example to test the double jump removal peephole.
+
+// RUN: %clang %cflags -O0 %s -o %t.exe
+// RUN: llvm-bolt %t.exe -o %t.bolt -peepholes=double-jumps | \
+// RUN:   FileCheck %s -check-prefix=CHECKBOLT
+// RUN: llvm-objdump -d %t.bolt | FileCheck %s
+
+// CHECKBOLT: BOLT-INFO: Peephole: 1 double jumps patched.
+
+// CHECK: <_Z3foom>:
+// CHECK-NEXT: sub     sp, sp, #16
+// CHECK-NEXT: str     x0, [sp, #8]
+// CHECK-NEXT: ldr     [[REG:x[0-28]+]], [sp, #8]
+// CHECK-NEXT: cmp     [[REG]], #0
+// CHECK-NEXT: b.eq    {{.*}} <_Z3foom+0x34>
+// CHECK-NEXT: add     [[REG]], [[REG]], #1
+// CHECK-NEXT: add     [[REG]], [[REG]], #1
+// CHECK-NEXT: cmp     [[REG]], #2
+// CHECK-NEXT: b.eq    {{.*}} <_Z3foom+0x28>
+// CHECK-NEXT: add     [[REG]], [[REG]], #1
+// CHECK-NEXT: mov     [[REG]], x1
+// CHECK-NEXT: ldr     x1, [sp]
+// CHECK-NEXT: b       {{.*}} <bar>
+// CHECK-NEXT: ldr     x1, [sp]
+// CHECK-NEXT: add     [[REG]], [[REG]], #1
+// CHECK-NEXT: b       {{.*}} <bar>
+
+extern "C" unsigned long bar(unsigned long count) { return count + 1; }
+
+unsigned long foo(unsigned long count) {
+  asm volatile("     cmp %0,#0\n"
+               "     b.eq .L7\n"
+               "     add %0, %0, #1\n"
+               "     b .L1\n"
+               ".L1: b .L2\n"
+               ".L2: add  %0, %0, #1\n"
+               "     cmp  %0, #2\n"
+               "     b.ne .L3\n"
+               "     b .L4\n"
+               ".L3: b .L5\n"
+               ".L5: add %0, %0, #1\n"
+               ".L4: mov %0,x1\n"
+               "     ldr x1, [sp]\n"
+               "     b .L6\n"
+               ".L7: ldr x1, [sp]\n"
+               "     add %0, %0, #1\n"
+               "     b .L6\n"
+               ".L6: b bar\n"
+               :
+               : "r"(count)
+               :);
+  return count;
+}
+
+int main(int argc, const char *argv[]) { return foo(38); }
@@ -0,0 +1,37 @@
+## Tests the peephole that adds trap instructions following indirect tail calls.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN:   %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt -peepholes=tailcall-traps \
+# RUN:   -print-peepholes -funcs=foo,bar 2>&1 | FileCheck %s
+
+# CHECK:  Binary Function "foo"
+# CHECK:        br     x0  # TAILCALL
+# CHECK-NEXT:   brk    #0x1
+# CHECK:  End of Function "foo"
+
+# CHECK:  Binary Function "bar"
+# CHECK:        b     foo # TAILCALL
+# CHECK:  End of Function "bar"
+
+  .text
+  .align 4
+  .global main
+  .type main, %function
+main:
+  nop
+  ret
+  .size main, .-main
+
+  .global foo
+  .type foo, %function
+foo:
+  br x0
+  .size foo, .-foo
+
+  .global bar
+  .type bar, %function
+bar:
+  b foo
+  .size bar, .-bar
@@ -1,6 +1,4 @@
-/*
- * A contrived example to test the double jump removal peephole.
- */
+// A contrived example to test the double jump removal peephole.
 
 extern "C" unsigned long bar(unsigned long count) {
   return count + 1;
 
@@ -56,9 +56,12 @@
 llvm_config.use_default_substitutions()
 
 llvm_config.config.environment['CLANG'] = config.bolt_clang
-llvm_config.config.environment['LLD'] = config.bolt_lld
 llvm_config.use_clang()
-llvm_config.use_llvm_tool('lld', required=True, search_env='LLD')
+
+llvm_config.config.environment['LD_LLD'] = config.bolt_lld
+ld_lld = llvm_config.use_llvm_tool('ld.lld', required=True, search_env='LD_LLD')
+llvm_config.config.available_features.add('ld.lld')
+llvm_config.add_tool_substitutions([ToolSubst(r'ld\.lld', command=ld_lld)])
 
 config.substitutions.append(('%cflags', '-no-pie -gdwarf-4'))
 config.substitutions.append(('%cxxflags', '-no-pie -gdwarf-4'))
 
@@ -595,22 +595,26 @@ elementwise to the input.
 
 Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±infinity
 
-========================================= ================================================================ =========================================
-         Name                              Operation                                                        Supported element types
-========================================= ================================================================ =========================================
- T __builtin_elementwise_abs(T x)          return the absolute value of a number x; the absolute value of   signed integer and floating point types
-                                           the most negative integer remains the most negative integer
- T __builtin_elementwise_ceil(T x)         return the smallest integral value greater than or equal to x    floating point types
- T __builtin_elementwise_floor(T x)        return the largest integral value less than or equal to x        floating point types
- T __builtin_elementwise_roundeven(T x)    round x to the nearest integer value in floating point format,   floating point types
-                                           rounding halfway cases to even (that is, to the nearest value
-                                           that is an even integer), regardless of the current rounding
-                                           direction.
- T__builtin_elementwise_trunc(T x)         return the integral value nearest to but no larger in            floating point types
-                                           magnitude than x
- T __builtin_elementwise_max(T x, T y)     return x or y, whichever is larger                               integer and floating point types
- T __builtin_elementwise_min(T x, T y)     return x or y, whichever is smaller                              integer and floating point types
-========================================= ================================================================ =========================================
+=========================================== ================================================================ =========================================
+         Name                                Operation                                                        Supported element types
+=========================================== ================================================================ =========================================
+ T __builtin_elementwise_abs(T x)            return the absolute value of a number x; the absolute value of   signed integer and floating point types
+                                             the most negative integer remains the most negative integer
+ T __builtin_elementwise_ceil(T x)           return the smallest integral value greater than or equal to x    floating point types
+ T __builtin_elementwise_floor(T x)          return the largest integral value less than or equal to x        floating point types
+ T __builtin_elementwise_roundeven(T x)      round x to the nearest integer value in floating point format,   floating point types
+                                             rounding halfway cases to even (that is, to the nearest value
+                                             that is an even integer), regardless of the current rounding
+                                             direction.
+ T__builtin_elementwise_trunc(T x)           return the integral value nearest to but no larger in            floating point types
+                                             magnitude than x
+ T __builtin_elementwise_max(T x, T y)       return x or y, whichever is larger                               integer and floating point types
+ T __builtin_elementwise_min(T x, T y)       return x or y, whichever is smaller                              integer and floating point types
+ T __builtin_elementwise_add_sat(T x, T y)   return the sum of x and y, clamped to the range of               integer types
+                                             representable values for the signed/unsigned integer type.
+ T __builtin_elementwise_sub_sat(T x, T y)   return the difference of x and y, clamped to the range of        integer types
+                                             representable values for the signed/unsigned integer type.
+=========================================== ================================================================ =========================================
 
 
 *Reduction Builtins*