Skip to content

Commit 8acc3b4

Browse files
[lld][ELF] Support adrp+ldr GOT optimization for AArch64
This diff adds first bits to support relocation relaxations for AArch64 discussed on ARM-software/abi-aa#106. In particular, the case of adrp x0, :got: symbol ldr x0, [x0, :got_lo12: symbol] is handled. Test plan: make check-all Differential revision: https://reviews.llvm.org/D112063
1 parent 6b8362e commit 8acc3b4

File tree

5 files changed

+305
-6
lines changed

5 files changed

+305
-6
lines changed

lld/ELF/Arch/AArch64.cpp

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,98 @@ void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
568568
llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
569569
}
570570

571+
AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
572+
if (!config->relax || config->emachine != EM_AARCH64) {
573+
safeToRelaxAdrpLdr = false;
574+
return;
575+
}
576+
// Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
577+
// always appear in pairs.
578+
size_t i = 0;
579+
const size_t size = relocs.size();
580+
for (; i != size; ++i) {
581+
if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
582+
if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
583+
++i;
584+
continue;
585+
}
586+
break;
587+
} else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
588+
break;
589+
}
590+
}
591+
safeToRelaxAdrpLdr = i == size;
592+
}
593+
594+
bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
595+
const Relocation &ldrRel, uint64_t secAddr,
596+
uint8_t *buf) const {
597+
if (!safeToRelaxAdrpLdr)
598+
return false;
599+
600+
// When the definition of sym is not preemptible then we may
601+
// be able to relax
602+
// ADRP xn, :got: sym
603+
// LDR xn, [ xn :got_lo12: sym]
604+
// to
605+
// ADRP xn, sym
606+
// ADD xn, xn, :lo_12: sym
607+
608+
if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
609+
ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
610+
return false;
611+
// Check if the relocations apply to consecutive instructions.
612+
if (adrpRel.offset + 4 != ldrRel.offset)
613+
return false;
614+
// Check if the relocations reference the same symbol and
615+
// skip undefined, preemptible and STT_GNU_IFUNC symbols.
616+
if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
617+
adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
618+
return false;
619+
// Check if the addends of the both instructions are zero.
620+
if (adrpRel.addend != 0 || ldrRel.addend != 0)
621+
return false;
622+
uint32_t adrpInstr = read32le(buf + adrpRel.offset);
623+
uint32_t ldrInstr = read32le(buf + ldrRel.offset);
624+
// Check if the first instruction is ADRP and the second instruction is LDR.
625+
if ((adrpInstr & 0x9f000000) != 0x90000000 ||
626+
(ldrInstr & 0x3b000000) != 0x39000000)
627+
return false;
628+
// Check the value of the sf bit.
629+
if (!(ldrInstr >> 31))
630+
return false;
631+
uint32_t adrpDestReg = adrpInstr & 0x1f;
632+
uint32_t ldrDestReg = ldrInstr & 0x1f;
633+
uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
634+
// Check if ADPR and LDR use the same register.
635+
if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
636+
return false;
637+
638+
Symbol &sym = *adrpRel.sym;
639+
// Check if the address difference is within 4GB range.
640+
int64_t val =
641+
getAArch64Page(sym.getVA()) - getAArch64Page(secAddr + adrpRel.offset);
642+
if (val != llvm::SignExtend64(val, 33))
643+
return false;
644+
645+
Relocation adrpSymRel = {R_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21,
646+
adrpRel.offset, /*addend=*/0, &sym};
647+
Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset,
648+
/*addend=*/0, &sym};
649+
650+
// adrp x_<dest_reg>
651+
write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg);
652+
// add x_<dest reg>, x_<dest reg>
653+
write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5));
654+
655+
target->relocate(buf + adrpSymRel.offset, adrpSymRel,
656+
SignExtend64(getAArch64Page(sym.getVA()) -
657+
getAArch64Page(secAddr + adrpSymRel.offset),
658+
64));
659+
target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
660+
return true;
661+
}
662+
571663
// AArch64 may use security features in variant PLT sequences. These are:
572664
// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
573665
// Indicator (BTI) introduced in armv8.5-a. The additional instructions used

lld/ELF/InputSection.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,25 +1010,35 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
10101010
const unsigned bits = config->wordsize * 8;
10111011
const TargetInfo &target = *elf::target;
10121012
uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1);
1013-
1014-
for (const Relocation &rel : relocations) {
1013+
AArch64Relaxer aarch64relaxer(relocations);
1014+
for (size_t i = 0, size = relocations.size(); i != size; ++i) {
1015+
const Relocation &rel = relocations[i];
10151016
if (rel.expr == R_NONE)
10161017
continue;
10171018
uint64_t offset = rel.offset;
10181019
uint8_t *bufLoc = buf + offset;
10191020

1020-
uint64_t addrLoc = getOutputSection()->addr + offset;
1021+
uint64_t secAddr = getOutputSection()->addr;
10211022
if (auto *sec = dyn_cast<InputSection>(this))
1022-
addrLoc += sec->outSecOff;
1023+
secAddr += sec->outSecOff;
1024+
const uint64_t addrLoc = secAddr + offset;
10231025
const uint64_t targetVA =
10241026
SignExtend64(getRelocTargetVA(file, rel.type, rel.addend, addrLoc,
1025-
*rel.sym, rel.expr), bits);
1026-
1027+
*rel.sym, rel.expr),
1028+
bits);
10271029
switch (rel.expr) {
10281030
case R_RELAX_GOT_PC:
10291031
case R_RELAX_GOT_PC_NOPIC:
10301032
target.relaxGot(bufLoc, rel, targetVA);
10311033
break;
1034+
case R_AARCH64_GOT_PAGE_PC:
1035+
if (i + 1 < size && aarch64relaxer.tryRelaxAdrpLdr(
1036+
rel, relocations[i + 1], secAddr, buf)) {
1037+
++i;
1038+
continue;
1039+
}
1040+
target.relocate(bufLoc, rel, targetVA);
1041+
break;
10321042
case R_PPC64_RELAX_GOT_PC: {
10331043
// The R_PPC64_PCREL_OPT relocation must appear immediately after
10341044
// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.

lld/ELF/Target.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,16 @@ void addPPC64SaveRestore();
221221
uint64_t getPPC64TocBase();
222222
uint64_t getAArch64Page(uint64_t expr);
223223

224+
class AArch64Relaxer {
225+
bool safeToRelaxAdrpLdr = true;
226+
227+
public:
228+
explicit AArch64Relaxer(ArrayRef<Relocation> relocs);
229+
230+
bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
231+
uint64_t secAddr, uint8_t *buf) const;
232+
};
233+
224234
extern const TargetInfo *target;
225235
TargetInfo *getTarget();
226236

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
## This test verifies that the pair adrp + ldr is relaxed/not relaxed
2+
## depending on the target symbol properties.
3+
4+
# REQUIRES: aarch64
5+
# RUN: split-file %s %t
6+
7+
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/symbols.s -o %t/symbols.o
8+
9+
# RUN: ld.lld -shared -T %t/linker.t %t/symbols.o -o %t/symbols.so
10+
# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols.so | \
11+
# RUN: FileCheck --check-prefix=LIB %s
12+
13+
## Symbol 'hidden_sym' is nonpreemptible, the relaxation should be applied.
14+
LIB: adrp x0
15+
LIB-NEXT: add x0
16+
17+
## Symbol 'global_sym' is preemptible, no relaxations should be applied.
18+
LIB-NEXT: adrp x1
19+
LIB-NEXT: ldr x1
20+
21+
## Symbol 'undefined_sym' is undefined, no relaxations should be applied.
22+
LIB-NEXT: adrp x2
23+
LIB-NEXT: ldr x2
24+
25+
## Symbol 'ifunc_sym' is STT_GNU_IFUNC, no relaxations should be applied.
26+
LIB-NEXT: adrp x3
27+
LIB-NEXT: ldr x3
28+
29+
# RUN: ld.lld -T %t/linker.t -z undefs %t/symbols.o -o %t/symbols
30+
# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols | \
31+
# RUN: FileCheck --check-prefix=EXE %s
32+
33+
## Symbol 'global_sym' is nonpreemptible, the relaxation should be applied.
34+
EXE: adrp x1
35+
EXE-NEXT: add x1
36+
37+
## The linker script ensures that .rodata and .text are sufficiently (>1MB)
38+
## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
39+
#--- linker.t
40+
SECTIONS {
41+
.rodata 0x1000: { *(.rodata) }
42+
.text 0x300100: { *(.text) }
43+
}
44+
45+
#--- symbols.s
46+
.rodata
47+
.hidden hidden_sym
48+
hidden_sym:
49+
.word 10
50+
51+
.global global_sym
52+
global_sym:
53+
.word 10
54+
55+
.text
56+
.type ifunc_sym STT_GNU_IFUNC
57+
.hidden ifunc_sym
58+
ifunc_sym:
59+
nop
60+
61+
.global _start
62+
_start:
63+
adrp x0, :got:hidden_sym
64+
ldr x0, [x0, #:got_lo12:hidden_sym]
65+
adrp x1, :got:global_sym
66+
ldr x1, [x1, #:got_lo12:global_sym]
67+
adrp x2, :got:undefined_sym
68+
ldr x2, [x2, #:got_lo12:undefined_sym]
69+
adrp x3, :got:ifunc_sym
70+
ldr x3, [x3, #:got_lo12:ifunc_sym]

lld/test/ELF/aarch64-adrp-ldr-got.s

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# REQUIRES: aarch64
2+
# RUN: split-file %s %t
3+
4+
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
5+
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o
6+
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o
7+
8+
# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a
9+
# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s
10+
11+
## Symbol 'x' is nonpreemptible, the relaxation should be applied.
12+
## This test verifies the encoding when the register x1 is used.
13+
# CHECK: adrp x1
14+
# CHECK-NEXT: add x1, x1
15+
16+
## ADRP contains a nonzero addend, no relaxations should be applied.
17+
# CHECK-NEXT: adrp x2
18+
# CHECK-NEXT: ldr
19+
20+
## LDR contains a nonzero addend, no relaxations should be applied.
21+
# CHECK-NEXT: adrp x3
22+
# CHECK-NEXT: ldr
23+
24+
## LDR and ADRP use different registers, no relaxations should be applied.
25+
# CHECK-NEXT: adrp x4
26+
# CHECK-NEXT: ldr
27+
28+
## LDR and ADRP use different registers, no relaxations should be applied.
29+
# CHECK-NEXT: adrp x6
30+
# CHECK-NEXT: ldr
31+
32+
## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations.
33+
# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax
34+
# RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \
35+
# FileCheck --check-prefix=X1-NO-RELAX %s
36+
37+
# X1-NO-RELAX: adrp x1
38+
# X1-NO-RELAX-NEXT: ldr
39+
40+
## Symbol 'x' is nonpreemptible, but the address is not within adrp range.
41+
# RUN: ld.lld %t/a.o -T %t/out-of-range.t -o %t/out-of-range
42+
# RUN: llvm-objdump --no-show-raw-insn -d %t/out-of-range | \
43+
# RUN: FileCheck --check-prefix=X1-NO-RELAX %s
44+
45+
## Relocations do not appear in pairs, no relaxations should be applied.
46+
# RUN: ld.lld %t/unpaired.o -o %t/unpaired
47+
# RUN: llvm-objdump --no-show-raw-insn -d %t/unpaired | \
48+
# RUN: FileCheck --check-prefix=UNPAIRED %s
49+
50+
# UNPAIRED: adrp x0
51+
# UNPAIRED-NEXT: b
52+
# UNPAIRED-NEXT: adrp x0
53+
# UNPAIRED: ldr x0
54+
55+
## Relocations do not appear in pairs, no relaxations should be applied.
56+
# RUN: ld.lld %t/lone-ldr.o -o %t/lone-ldr
57+
# RUN: llvm-objdump --no-show-raw-insn -d %t/lone-ldr | \
58+
# RUN: FileCheck --check-prefix=LONE-LDR %s
59+
60+
# LONE-LDR: ldr x0
61+
62+
## This linker script ensures that .rodata and .text are sufficiently (>1M)
63+
## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
64+
#--- linker.t
65+
SECTIONS {
66+
.rodata 0x1000: { *(.rodata) }
67+
.text 0x200100: { *(.text) }
68+
}
69+
70+
## This linker script ensures that .rodata and .text are sufficiently (>4GB)
71+
## far apart so that the adrp + ldr pair cannot be relaxed.
72+
#--- out-of-range.t
73+
SECTIONS {
74+
.rodata 0x1000: { *(.rodata) }
75+
.text 0x100002000: { *(.text) }
76+
}
77+
78+
#--- a.s
79+
.rodata
80+
.hidden x
81+
x:
82+
.word 10
83+
.text
84+
.global _start
85+
_start:
86+
adrp x1, :got:x
87+
ldr x1, [x1, #:got_lo12:x]
88+
adrp x2, :got:x+1
89+
ldr x2, [x2, #:got_lo12:x]
90+
adrp x3, :got:x
91+
ldr x3, [x3, #:got_lo12:x+8]
92+
adrp x4, :got:x
93+
ldr x5, [x4, #:got_lo12:x]
94+
adrp x6, :got:x
95+
ldr x6, [x0, #:got_lo12:x]
96+
97+
#--- unpaired.s
98+
.text
99+
.hidden x
100+
x:
101+
nop
102+
.global _start
103+
_start:
104+
adrp x0, :got:x
105+
b L
106+
adrp x0, :got:x
107+
L:
108+
ldr x0, [x0, #:got_lo12:x]
109+
110+
#--- lone-ldr.s
111+
.text
112+
.hidden x
113+
x:
114+
nop
115+
.global _start
116+
_start:
117+
ldr x0, [x0, #:got_lo12:x]

0 commit comments

Comments
 (0)