Skip to content

Commit 60434bf

Browse files
author
z1.cciauto
committed
merge main into amd-staging
2 parents f444c81 + 6a030b3 commit 60434bf

File tree

149 files changed

+2471
-742
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

149 files changed

+2471
-742
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,16 @@ class MCPlusBuilder {
637637
return false;
638638
}
639639

640+
virtual bool isAddXri(const MCInst &Inst) const {
641+
llvm_unreachable("not implemented");
642+
return false;
643+
}
644+
645+
virtual bool isMOVW(const MCInst &Inst) const {
646+
llvm_unreachable("not implemented");
647+
return false;
648+
}
649+
640650
virtual bool isMoveMem2Reg(const MCInst &Inst) const { return false; }
641651

642652
virtual bool mayLoad(const MCInst &Inst) const {

bolt/include/bolt/Core/Relocation.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,6 @@ struct Relocation {
6464
/// Skip relocations that we don't want to handle in BOLT
6565
static bool skipRelocationType(uint32_t Type);
6666

67-
/// Handle special cases when relocation should not be processed by BOLT or
68-
/// change relocation \p Type to proper one before continuing if \p Contents
69-
/// and \P Type mismatch occurred.
70-
static bool skipRelocationProcess(uint32_t &Type, uint64_t Contents);
71-
7267
/// Adjust value depending on relocation type (make it PC relative or not).
7368
static uint64_t encodeValue(uint32_t Type, uint64_t Value, uint64_t PC);
7469

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,10 +1473,19 @@ Error BinaryFunction::disassemble() {
14731473
}
14741474
}
14751475

1476+
uint64_t Addend = Relocation.Addend;
1477+
1478+
// For GOT relocations, create a reference against GOT entry ignoring
1479+
// the relocation symbol.
1480+
if (Relocation::isGOT(Relocation.Type)) {
1481+
assert(Relocation::isPCRelative(Relocation.Type) &&
1482+
"GOT relocation must be PC-relative on RISC-V");
1483+
Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
1484+
Addend = Relocation.Value + Relocation.Offset + getAddress();
1485+
}
14761486
int64_t Value = Relocation.Value;
14771487
const bool Result = BC.MIB->replaceImmWithSymbolRef(
1478-
Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
1479-
Relocation.Type);
1488+
Instruction, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
14801489
(void)Result;
14811490
assert(Result && "cannot replace immediate with relocation");
14821491
}

bolt/lib/Core/Relocation.cpp

Lines changed: 0 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -257,78 +257,6 @@ static bool skipRelocationTypeRISCV(uint32_t Type) {
257257
}
258258
}
259259

260-
static bool skipRelocationProcessX86(uint32_t &Type, uint64_t Contents) {
261-
return false;
262-
}
263-
264-
static bool skipRelocationProcessAArch64(uint32_t &Type, uint64_t Contents) {
265-
auto IsMov = [](uint64_t Contents) -> bool {
266-
// The bits 28-23 are 0b100101
267-
return (Contents & 0x1f800000) == 0x12800000;
268-
};
269-
270-
auto IsB = [](uint64_t Contents) -> bool {
271-
// The bits 31-26 are 0b000101
272-
return (Contents & 0xfc000000) == 0x14000000;
273-
};
274-
275-
auto IsAddImm = [](uint64_t Contents) -> bool {
276-
// The bits 30-23 are 0b00100010
277-
return (Contents & 0x7F800000) == 0x11000000;
278-
};
279-
280-
// The linker might relax ADRP+LDR instruction sequence for loading symbol
281-
// address from GOT table to ADRP+ADD sequence that would point to the
282-
// binary-local symbol. Change relocation type in order to process it right.
283-
if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) {
284-
Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
285-
return false;
286-
}
287-
288-
// The linker might perform TLS relocations relaxations, such as
289-
// changed TLS access model (e.g. changed global dynamic model
290-
// to initial exec), thus changing the instructions. The static
291-
// relocations might be invalid at this point and we might no
292-
// need to process these relocations anymore.
293-
// More information could be found by searching
294-
// elfNN_aarch64_tls_relax in bfd
295-
switch (Type) {
296-
default:
297-
break;
298-
case ELF::R_AARCH64_TLSDESC_LD64_LO12:
299-
case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
300-
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
301-
case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: {
302-
if (IsMov(Contents))
303-
return true;
304-
}
305-
}
306-
307-
// The linker might replace load/store instruction with jump and
308-
// veneer due to errata 843419
309-
// https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d
310-
// Thus load/store relocations for these instructions must be ignored
311-
// NOTE: We only process GOT and TLS relocations this way since the
312-
// addend used in load/store instructions won't change after bolt
313-
// (it is important since the instruction in veneer won't have relocation)
314-
switch (Type) {
315-
default:
316-
break;
317-
case ELF::R_AARCH64_LD64_GOT_LO12_NC:
318-
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
319-
case ELF::R_AARCH64_TLSDESC_LD64_LO12: {
320-
if (IsB(Contents))
321-
return true;
322-
}
323-
}
324-
325-
return false;
326-
}
327-
328-
static bool skipRelocationProcessRISCV(uint32_t &Type, uint64_t Contents) {
329-
return false;
330-
}
331-
332260
static uint64_t encodeValueX86(uint32_t Type, uint64_t Value, uint64_t PC) {
333261
switch (Type) {
334262
default:
@@ -798,19 +726,6 @@ bool Relocation::skipRelocationType(uint32_t Type) {
798726
}
799727
}
800728

801-
bool Relocation::skipRelocationProcess(uint32_t &Type, uint64_t Contents) {
802-
switch (Arch) {
803-
default:
804-
llvm_unreachable("Unsupported architecture");
805-
case Triple::aarch64:
806-
return skipRelocationProcessAArch64(Type, Contents);
807-
case Triple::riscv64:
808-
return skipRelocationProcessRISCV(Type, Contents);
809-
case Triple::x86_64:
810-
return skipRelocationProcessX86(Type, Contents);
811-
}
812-
}
813-
814729
uint64_t Relocation::encodeValue(uint32_t Type, uint64_t Value, uint64_t PC) {
815730
switch (Arch) {
816731
default:

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2229,8 +2229,6 @@ bool RewriteInstance::analyzeRelocation(
22292229
ErrorOr<uint64_t> Value =
22302230
BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
22312231
assert(Value && "failed to extract relocated value");
2232-
if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
2233-
return true;
22342232

22352233
ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
22362234
Addend = getRelocationAddend(InputFile, Rel);
@@ -2283,17 +2281,14 @@ bool RewriteInstance::analyzeRelocation(
22832281
}
22842282
}
22852283

2286-
// If no symbol has been found or if it is a relocation requiring the
2287-
// creation of a GOT entry, do not link against the symbol but against
2288-
// whatever address was extracted from the instruction itself. We are
2289-
// not creating a GOT entry as this was already processed by the linker.
2290-
// For GOT relocs, do not subtract addend as the addend does not refer
2291-
// to this instruction's target, but it refers to the target in the GOT
2292-
// entry.
2293-
if (Relocation::isGOT(RType)) {
2294-
Addend = 0;
2295-
SymbolAddress = ExtractedValue + PCRelOffset;
2296-
} else if (Relocation::isTLS(RType)) {
2284+
// GOT relocation can cause the underlying instruction to be modified by the
2285+
// linker, resulting in the extracted value being different from the actual
2286+
// symbol. It's also possible to have a GOT entry for a symbol defined in the
2287+
// binary. In the latter case, the instruction can be using the GOT version
2288+
// causing the extracted value mismatch. Similar cases can happen for TLS.
2289+
// Pass the relocation information as is to the disassembler and let it decide
2290+
// how to use it for the operand symbolization.
2291+
if (Relocation::isGOT(RType) || Relocation::isTLS(RType)) {
22972292
SkipVerification = true;
22982293
} else if (!SymbolAddress) {
22992294
assert(!IsSectionRelocation);
@@ -2666,11 +2661,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
26662661

26672662
MCSymbol *ReferencedSymbol = nullptr;
26682663
if (!IsSectionRelocation) {
2669-
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
2664+
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) {
26702665
ReferencedSymbol = BD->getSymbol();
2671-
else if (BC->isGOTSymbol(SymbolName))
2666+
} else if (BC->isGOTSymbol(SymbolName)) {
26722667
if (BinaryData *BD = BC->getGOTSymbol())
26732668
ReferencedSymbol = BD->getSymbol();
2669+
} else if (BinaryData *BD = BC->getBinaryDataAtAddress(SymbolAddress)) {
2670+
ReferencedSymbol = BD->getSymbol();
2671+
}
26742672
}
26752673

26762674
ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
@@ -2798,15 +2796,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
27982796
}
27992797
}
28002798

2801-
if (ForceRelocation) {
2802-
std::string Name =
2803-
Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName;
2804-
ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
2805-
SymbolAddress = 0;
2806-
if (Relocation::isGOT(RType))
2807-
Addend = Address;
2799+
if (ForceRelocation && !ReferencedBF) {
2800+
// Create the relocation symbol if it's not defined in the binary.
2801+
if (SymbolAddress == 0)
2802+
ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0, 0, 0);
2803+
28082804
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
2809-
<< SymbolName << " with addend " << Addend << '\n');
2805+
<< ReferencedSymbol->getName() << " with addend "
2806+
<< Addend << '\n');
28102807
} else if (ReferencedBF) {
28112808
ReferencedSymbol = ReferencedBF->getSymbol();
28122809
uint64_t RefFunctionOffset = 0;

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
281281
return Inst.getOpcode() == AArch64::ADR;
282282
}
283283

284-
bool isAddXri(const MCInst &Inst) const {
284+
bool isAddXri(const MCInst &Inst) const override {
285285
return Inst.getOpcode() == AArch64::ADDXri;
286286
}
287287

@@ -318,7 +318,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
318318
Inst.getOpcode() == AArch64::CBZX);
319319
}
320320

321-
bool isMOVW(const MCInst &Inst) const {
321+
bool isMOVW(const MCInst &Inst) const override {
322322
return (Inst.getOpcode() == AArch64::MOVKWi ||
323323
Inst.getOpcode() == AArch64::MOVKXi ||
324324
Inst.getOpcode() == AArch64::MOVNWi ||

bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp

Lines changed: 62 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,24 +45,15 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
4545
BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
4646
};
4747

48-
// The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
49-
// NOP+ADR. After the conversion, the linker might keep the relocations and
50-
// if we try to symbolize ADR's operand using outdated relocations, we might
51-
// get unexpected results. Hence, we check for the conversion/relaxation, and
52-
// ignore the relocation. The symbolization is done based on the PC-relative
53-
// value of the operand instead.
54-
if (Relocation && BC.MIB->isADR(Inst)) {
55-
if (Relocation->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
56-
Relocation->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) {
57-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
58-
<< Twine::utohexstr(InstAddress) << '\n');
59-
Relocation = nullptr;
48+
if (Relocation) {
49+
auto AdjustedRel = adjustRelocation(*Relocation, Inst);
50+
if (AdjustedRel) {
51+
addOperand(AdjustedRel->Symbol, AdjustedRel->Addend, AdjustedRel->Type);
52+
return true;
6053
}
61-
}
6254

63-
if (Relocation) {
64-
addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
65-
return true;
55+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
56+
<< Twine::utohexstr(InstAddress) << '\n');
6657
}
6758

6859
if (!BC.MIB->hasPCRelOperand(Inst))
@@ -88,6 +79,61 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
8879
return true;
8980
}
9081

82+
std::optional<Relocation>
83+
AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
84+
const MCInst &Inst) const {
85+
BinaryContext &BC = Function.getBinaryContext();
86+
87+
// The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
88+
// NOP+ADR. After the conversion, the linker might keep the relocations and
89+
// if we try to symbolize ADR's operand using outdated relocations, we might
90+
// get unexpected results. Hence, we check for the conversion/relaxation, and
91+
// ignore the relocation. The symbolization is done based on the PC-relative
92+
// value of the operand instead.
93+
if (BC.MIB->isADR(Inst) && (Rel.Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
94+
Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC))
95+
return std::nullopt;
96+
97+
// The linker might perform TLS relocations relaxations, such as changed TLS
98+
// access model (e.g. changed global dynamic model to initial exec), thus
99+
// changing the instructions. The static relocations might be invalid at this
100+
// point and we don't have to process these relocations anymore. More
101+
// information could be found by searching elfNN_aarch64_tls_relax in bfd.
102+
if (BC.MIB->isMOVW(Inst)) {
103+
switch (Rel.Type) {
104+
default:
105+
break;
106+
case ELF::R_AARCH64_TLSDESC_LD64_LO12:
107+
case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
108+
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
109+
case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
110+
return std::nullopt;
111+
}
112+
}
113+
114+
if (!Relocation::isGOT(Rel.Type))
115+
return Rel;
116+
117+
Relocation AdjustedRel = Rel;
118+
if (Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && BC.MIB->isAddXri(Inst)) {
119+
// The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the
120+
// second instruction and have to use the relocation type for ADD.
121+
AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
122+
} else {
123+
// For instructions that reference GOT, ignore the referenced symbol and
124+
// use value at the relocation site. FixRelaxationPass will look at
125+
// instruction pairs and will perform necessary adjustments.
126+
ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(*Rel.Symbol);
127+
assert(SymbolValue && "Symbol value should be set");
128+
(void)SymbolValue;
129+
130+
AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
131+
AdjustedRel.Addend = Rel.Value;
132+
}
133+
134+
return AdjustedRel;
135+
}
136+
91137
void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
92138
int64_t Value,
93139
uint64_t Address) {}

bolt/lib/Target/AArch64/AArch64MCSymbolizer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "bolt/Core/BinaryFunction.h"
1313
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
14+
#include <optional>
1415

1516
namespace llvm {
1617
namespace bolt {
@@ -20,6 +21,13 @@ class AArch64MCSymbolizer : public MCSymbolizer {
2021
BinaryFunction &Function;
2122
bool CreateNewSymbols{true};
2223

24+
/// Modify relocation \p Rel based on type of the relocation and the
25+
/// instruction it was applied to. Return the new relocation info, or
26+
/// std::nullopt if the relocation should be ignored, e.g. in the case the
27+
/// instruction was modified by the linker.
28+
std::optional<Relocation> adjustRelocation(const Relocation &Rel,
29+
const MCInst &Inst) const;
30+
2331
public:
2432
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
2533
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),

0 commit comments

Comments
 (0)