kripken · kripken · Jun 10, 2025 · Jun 10, 2025 · Jun 11, 2025 · Jun 11, 2025
diff --git a/lld/test/wasm/Inputs/branch-hints-multifile.ll b/lld/test/wasm/Inputs/branch-hints-multifile.ll
@@ -0,0 +1,14 @@
+define i32 @bw_bh_test_2(i32 %a, i32 %b) {
+entry:
+  %1 = icmp ult i32 %a, %b
+  br i1 %1, label %fail, label %success, !prof !0
+
+fail:
+  ret i32 -1
+
+success:
+  ret i32 0
+}
+
+!0 = !{!"branch_weights", !"expected", i32 1, i32 2000}
+
diff --git a/lld/test/wasm/branch-hints-multifile.ll b/lld/test/wasm/branch-hints-multifile.ll
@@ -0,0 +1,30 @@
+
+; RUN: llc -mtriple=wasm32-unknown-unknown -filetype=obj -o %t1.o < %s
+; RUN: llc -mtriple=wasm32-unknown-unknown -filetype=obj -o %t2.o < %S/Inputs/branch-hints-multifile.ll
+; RUN: wasm-ld -o %t.wasm %t1.o %t2.o --no-entry --no-gc-sections
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+define i32 @bw_bh_test_1(i32 %a, i32 %b) {
+entry:
+  %1 = icmp ult i32 %a, %b
+  br i1 %1, label %fail, label %success, !prof !0
+
+fail:
+  ret i32 -1
+
+success:
+  ret i32 0
+}
+
+!0 = !{!"branch_weights", !"expected", i32 2000, i32 1}
+
+; Test that we combine branch hint sections properly. The number of functions
+; should be reported once at the start (even though it appears in each object
+; file, and the hints for each object file should then be concatenated).
+; CHECK:       - Type:            CUSTOM
+; CHECK-NEXT:    Name:            metadata.code.branch_hint
+; CHECK-NEXT:    Payload: '8280808000818080800001080100828080800001080101'
+;                          ^^ two functions (5-byte padded LEB)
+;                                    ^^hint for func 1^
+;                                                      ^^hint for func 2^
+
diff --git a/lld/test/wasm/branch-hints.ll b/lld/test/wasm/branch-hints.ll
@@ -0,0 +1,31 @@
+
+; RUN: llc -mtriple=wasm32-unknown-unknown -filetype=obj -o %t.o < %s
+; RUN: wasm-ld -o %t.wasm %t.o --no-entry --no-gc-sections
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+define i32 @bw_bh_test(i32 %a, i32 %b) {
+; The weights below mean we are far more likely to go to %fail and return -1.
+; Codegen will emit the -1 first, so we emit a hint of 0, below, for the value
+; of the hint (as in llvm/test/Codegen/WebAssembly/branch-hints.ll).
+entry:
+  %1 = icmp ult i32 %a, %b
+  br i1 %1, label %fail, label %success, !prof !0
+
+fail:
+  ret i32 -1
+
+success:
+  ret i32 0
+}
+
+!0 = !{!"branch_weights", !"expected", i32 2000, i32 1}
+
+; CHECK:       - Type:            CUSTOM
+; CHECK-NEXT:    Name:            metadata.code.branch_hint
+; CHECK-NEXT:    Payload: '8180808000818080800001080100'
+;                          ^^ one function (5-byte padded LEB)
+;                                    ^^^^^^^^^^ LEB of function index 1
+;                                              ^^ one hint in function
+;                                                ^^ offset 8
+;                                                  ^^ hint size 1
+;                                                    ^^ hint value: 0
diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h
@@ -354,9 +354,11 @@ class InputSection : public InputChunk {
 
   const uint64_t tombstoneValue;
 
+  // XXX
+  const WasmSection &section;
+
 protected:
   static uint64_t getTombstoneForSection(StringRef name);
-  const WasmSection &section;
 };
 
 } // namespace wasm

diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
@@ -28,6 +28,7 @@
 #include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/RandomNumberGenerator.h"
 #include "llvm/Support/SHA1.h"
@@ -94,6 +95,7 @@ class Writer {
   void addSections();
 
   void createCustomSections();
+  void createBranchHintSection();
   void createSyntheticSections();
   void createSyntheticSectionsPostLayout();
   void finalizeSections();
@@ -164,7 +166,11 @@ void Writer::createCustomSections() {
   log("createCustomSections");
   for (auto &pair : customSectionMapping) {
     StringRef name = pair.first;
-    LLVM_DEBUG(dbgs() << "createCustomSection: " << name << "\n");
+
+    if (name == "metadata.code.branch_hint") // XXX unneeded
+      continue;
+
+    dbgs() << "createCustomSection: " << name << "\n";
 
     OutputSection *sec = make<CustomSection>(std::string(name), pair.second);
     if (ctx.arg.relocatable || ctx.arg.emitRelocs) {
@@ -176,6 +182,122 @@ void Writer::createCustomSections() {
   }
 }
 
+// A Branch Hint section is a Custom Section with some custom rules for how it
+// is created. Rather than simply concatenate the input sections, we must also
+// adjust the field that reports the number of functions, as follows.
+//
+// Our input chunks each begin with a 5-byte LEB of the number of functions. If
+// we simply concatenated, we'd get this:
+//
+//   ;; from object file 1
+//   [num functions_1] : 5 byte LEB
+//   [..data_1..]
+//   ;; from object file 2
+//   [num functions_2] : 5 byte LEB
+//   [..data_2..]
+//   ..
+//   ;; from object file N
+//   [num functions_N] : 5 byte LEB
+//   [..data_N..]
+//
+// But the final output should report the total number of functions at the very
+// start. To fix that, we must accumulate the total number of functions and use
+// that at the very start (which comes from the first object file), and we must
+// remove the first 5 bytes of the others:
+//
+//   [num functions_1 + _2 + .. + _N] : 5 byte LEB
+//   [..data_1..]
+//   [..data_2..]
+//   ..
+//   [..data_N..]
+//
+// That is now correct.
+//
+// Also, the Branch Hint section must appear *before* the code, so we call this
+// earlier than for other custom sections.
+void Writer::createBranchHintSection() {
+  std::string name = "metadata.code.branch_hint";
+
+  auto iter = customSectionMapping.find(name);
+  if (iter == customSectionMapping.end())
+    return;
+  auto& inputChunks = iter->second;
+
+  dbgs() << "createBranchHintSection!: " << name << "\n";
+
+  assert(!inputChunks.empty());
+  CustomSection *sec;
+  if (inputChunks.size() == 1) {
+    // Just use the originals. We don't need to do any work.
+    sec = make<CustomSection>(name, inputChunks);
+  } else {
+    // We need to merge the input chunks in the special format that the spec
+    // expects, as explained above. To do so, create new input chunks with those
+    // minor modifications, and then the normal custom section behavior of
+    // concatenating the chunks will give us the right output.
+    auto *newInputChunksAlloc = make<std::vector<InputChunk *>>(inputChunks.size());
+    auto &newInputChunks = *newInputChunksAlloc;
+
+    // Remove the first 5 bytes from all sections but the first, and count how
+    // many functions there are (so we can add that to the first).
+    uint64_t totalFunctions = 0;
+    for (unsigned i = 1; i < inputChunks.size(); i++) {
+      assert(InputSection::classof(inputChunks[i]));
+      auto *section = static_cast<InputSection*>(inputChunks[i]);
+      const WasmSection &wasmSection = section->section;
+
+      // Read the number of functions in this section.
+      totalFunctions += decodeULEB128(wasmSection.Content.data());
+
+      // Create an adjusted wasm section, without the first 5 bytes.
+      WasmSection *adjustedWasmSection = make<WasmSection>(wasmSection);
+      adjustedWasmSection->Content = adjustedWasmSection->Content.slice(5);
+      for (auto& relocation : adjustedWasmSection->Relocations)
+        relocation.Offset -= 5;
+
+      newInputChunks[i] = make<InputSection>(*adjustedWasmSection, section->file, section->alignment);
+      newInputChunks[i]->setRelocations(adjustedWasmSection->Relocations);
+    }
+
+    // Add the number of functions to the first section.
+    {
+      assert(InputSection::classof(inputChunks[0]));
+      auto *section = static_cast<InputSection*>(inputChunks[0]);
+      const WasmSection &wasmSection = section->section;
+
+      // Read the number of functions in this section.
+      totalFunctions += decodeULEB128(wasmSection.Content.data());
+
+      // Create an adjusted wasm section, with the first 5 bytes modified so that
+      // we apply the total number of functions.
+      WasmSection *adjustedWasmSection = make<WasmSection>(wasmSection);
+      auto* adjustedContent = make<std::vector<uint8_t>>(adjustedWasmSection->Content.begin(), adjustedWasmSection->Content.end());
+
+      std::string str;
+      raw_string_ostream os(str);
+      encodeULEB128(totalFunctions, os, 5);
+      memcpy(adjustedContent->data(), str.data(), 5);
+      adjustedWasmSection->Content = ArrayRef(adjustedContent->data(), adjustedContent->size());
+
+      newInputChunks[0] = make<InputSection>(*adjustedWasmSection, section->file, section->alignment);
+      newInputChunks[0]->setRelocations(adjustedWasmSection->Relocations);
+    }
+
+    sec = make<CustomSection>(name, newInputChunks);
+  }
+
+  // Otherwise, add the section normally, like any custom section.
+  auto *sym = make<OutputSectionSymbol>(sec);
+  out.linkingSec->addToSymtab(sym);
+  sec->sectionSym = sym;
+  addSection(sec);
+
+  // After emitting this section, avoid processing it again in the place that
+  // custom sections are normally created, which is later in the binary (inside
+  // createCustomSections).
+  customSectionMapping.erase("branch_hint");
+}
+
 // Create relocations sections in the final output.
 // These are only created when relocatable output is requested.
 void Writer::createRelocSections() {
@@ -544,6 +666,9 @@ void Writer::addSections() {
   addSection(out.elemSec);
   addSection(out.dataCountSec);
 
+  // The Branch Hints section must be emitted before the code section.
+  createBranchHintSection();
+
   addSection(make<CodeSection>(out.functionSec->inputFunctions));
   addSection(make<DataSection>(segments));
 

diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -127,6 +127,11 @@ class LLVM_ABI MCAsmBackend {
                           const MCValue &Target, MutableArrayRef<char> Data,
                           uint64_t Value, bool IsResolved) = 0;
 
+  /// Given a ULEB128 of a particular padded size, return the fixup for it.
+  virtual MCFixupKind getULEB128Fixup(unsigned PadTo) const {
+    llvm_unreachable("Need to implement hook if target has ULEB128 fixups");
+  }
+
   /// @}
 
   /// \name Target Relaxation Interfaces

diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -117,7 +117,8 @@ class MCObjectStreamer : public MCStreamer {
                                  const MCExpr *Value) override;
   void emitValueImpl(const MCExpr *Value, unsigned Size,
                      SMLoc Loc = SMLoc()) override;
-  void emitULEB128Value(const MCExpr *Value) override;
+  void emitULEB128Value(const MCExpr *Value,
+                        unsigned PadTo = 0) override;
   void emitSLEB128Value(const MCExpr *Value) override;
   void emitWeakReference(MCSymbol *Alias, const MCSymbol *Target) override;
   void changeSection(MCSection *Section, uint32_t Subsection = 0) override;

diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
@@ -750,7 +750,8 @@ class LLVM_ABI MCStreamer {
     emitIntValue(Value, Size);
   }
 
-  virtual void emitULEB128Value(const MCExpr *Value);
+  virtual void emitULEB128Value(const MCExpr *Value,
+                                unsigned PadTo = 0);
 
   virtual void emitSLEB128Value(const MCExpr *Value);
 

diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -256,7 +256,8 @@ class MCAsmStreamer final : public MCStreamer {
   void emitIntValueInHex(uint64_t Value, unsigned Size) override;
   void emitIntValueInHexWithPadding(uint64_t Value, unsigned Size) override;
 
-  void emitULEB128Value(const MCExpr *Value) override;
+  void emitULEB128Value(const MCExpr *Value,
+                        unsigned PadTo) override;
 
   void emitSLEB128Value(const MCExpr *Value) override;
 
@@ -1402,13 +1403,21 @@ void MCAsmStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
   }
 }
 
-void MCAsmStreamer::emitULEB128Value(const MCExpr *Value) {
+void MCAsmStreamer::emitULEB128Value(const MCExpr *Value,
+                                     unsigned PadTo) {
   int64_t IntValue;
   if (Value->evaluateAsAbsolute(IntValue)) {
     emitULEB128IntValue(IntValue);
     return;
   }
-  OS << "\t.uleb128 ";
+  if (!PadTo)
+    OS << "\t.uleb128 ";
+  else {
+    // A padding size has been specified. For now, all that is supported is a 5-
+    // byte LEB, which is an int32.
+    assert(PadTo == 5);
+    OS << "\t.uleb128_int32 ";
+  }
   Value->print(OS, MAI);
   EmitEOL();
 }

diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -259,13 +259,30 @@ void MCObjectStreamer::emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc,
   Symbol->setOffset(Offset);
 }
 
-void MCObjectStreamer::emitULEB128Value(const MCExpr *Value) {
+void MCObjectStreamer::emitULEB128Value(const MCExpr *Value,
+                                        unsigned PadTo) {
   int64_t IntValue;
+  // Avoid fixups when possible.
   if (Value->evaluateAsAbsolute(IntValue, getAssemblerPtr())) {
     emitULEB128IntValue(IntValue);
     return;
   }
-  insert(getContext().allocFragment<MCLEBFragment>(*Value, false));
+
+  if (!PadTo) {
+    // Emit the Value as best we can without padding or a fixup.
+    insert(getContext().allocFragment<MCLEBFragment>(*Value, false));
+    return;
+  }
+
+  // Use the proper fixup from the specific assembler backend.
+  const MCAsmBackend &MAB = getAssembler().getBackend();
+  MCFixupKind Fixup = MAB.getULEB128Fixup(PadTo);
+
+  // Use the given padding and fixup.
+  MCDataFragment *DF = getOrCreateDataFragment();
+  DF->getFixups().push_back(MCFixup::create(
+      DF->getContents().size(), Value, Fixup));
+  DF->appendContents(PadTo, 0);
 }
 
 void MCObjectStreamer::emitSLEB128Value(const MCExpr *Value) {

diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
@@ -1323,7 +1323,8 @@ void MCStreamer::emitBinaryData(StringRef Data) { emitBytes(Data); }
 void MCStreamer::emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) {
   visitUsedExpr(*Value);
 }
-void MCStreamer::emitULEB128Value(const MCExpr *Value) {}
+void MCStreamer::emitULEB128Value(const MCExpr *Value,
+                                  unsigned PadTo) {}
 void MCStreamer::emitSLEB128Value(const MCExpr *Value) {}
 void MCStreamer::emitFill(const MCExpr &NumBytes, uint64_t Value, SMLoc Loc) {}
 void MCStreamer::emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr,

diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -875,7 +875,7 @@ MipsDelaySlotFiller::selectSuccBB(MachineBasicBlock &B) const {
   if (B.succ_empty())
     return nullptr;
 
-  // Select the successor with the larget edge weight.
+  // Select the successor with the largest edge weight.
   auto &Prob = getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
   MachineBasicBlock *S =
       *llvm::max_element(B.successors(), [&](const MachineBasicBlock *Dst0,

diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -1116,6 +1116,15 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
       return expect(AsmToken::EndOfStatement, "EOL");
     }
 
+    if (DirectiveID.getString() == ".uleb128_i32") {
+      const MCExpr *Val;
+      SMLoc End;
+      if (Parser.parseExpression(Val, End))
+        return error("Cannot parse .uleb128_i32 expression: ", Lexer.getTok());
+      Out.emitULEB128Value(Val, 5);
+      return expect(AsmToken::EndOfStatement, "EOL");
+    }
+
     if (DirectiveID.getString() == ".asciz") {
       if (checkDataSection())
         return ParseStatus::Failure;

diff --git a/llvm/lib/Target/WebAssembly/CMakeLists.txt b/llvm/lib/Target/WebAssembly/CMakeLists.txt
@@ -18,6 +18,7 @@ add_llvm_target(WebAssemblyCodeGen
   WebAssemblyAddMissingPrototypes.cpp
   WebAssemblyArgumentMove.cpp
   WebAssemblyAsmPrinter.cpp
+  WebAssemblyBranchHinting.cpp
   WebAssemblyCFGStackify.cpp
   WebAssemblyCleanCodeAfterTrap.cpp
   WebAssemblyCFGSort.cpp