Skip to content

Commit 4450a2a

Browse files
[lld][ELF] Add support for ADRP+ADD optimization for AArch64
This diff adds support for ADRP+ADD optimization for AArch64 described in ARM-software/abi-aa@d2ca58c i.e. under appropriate constraints ADRP x0, symbol ADD x0, x0, :lo12: symbol can be turned into NOP ADR x0, symbol Test plan: make check-all Differential revision: https://reviews.llvm.org/D117614
1 parent e188aae commit 4450a2a

File tree

7 files changed

+188
-6
lines changed

7 files changed

+188
-6
lines changed

lld/ELF/Arch/AArch64.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,55 @@ AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
591591
safeToRelaxAdrpLdr = i == size;
592592
}
593593

594+
bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
595+
const Relocation &addRel, uint64_t secAddr,
596+
uint8_t *buf) const {
597+
// When the address of sym is within the range of ADR then
598+
// we may relax
599+
// ADRP xn, sym
600+
// ADD xn, xn, :lo12: sym
601+
// to
602+
// NOP
603+
// ADR xn, sym
604+
if (!config->relax || adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 ||
605+
addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
606+
return false;
607+
// Check if the relocations apply to consecutive instructions.
608+
if (adrpRel.offset + 4 != addRel.offset)
609+
return false;
610+
if (adrpRel.sym != addRel.sym)
611+
return false;
612+
if (adrpRel.addend != 0 || addRel.addend != 0)
613+
return false;
614+
615+
uint32_t adrpInstr = read32le(buf + adrpRel.offset);
616+
uint32_t addInstr = read32le(buf + addRel.offset);
617+
// Check if the first instruction is ADRP and the second instruction is ADD.
618+
if ((adrpInstr & 0x9f000000) != 0x90000000 ||
619+
(addInstr & 0xffc00000) != 0x91000000)
620+
return false;
621+
uint32_t adrpDestReg = adrpInstr & 0x1f;
622+
uint32_t addDestReg = addInstr & 0x1f;
623+
uint32_t addSrcReg = (addInstr >> 5) & 0x1f;
624+
if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg)
625+
return false;
626+
627+
Symbol &sym = *adrpRel.sym;
628+
// Check if the address difference is within 1MiB range.
629+
int64_t val = sym.getVA() - (secAddr + addRel.offset);
630+
if (val < -1024 * 1024 || val >= 1024 * 1024)
631+
return false;
632+
633+
Relocation adrRel = {R_ABS, R_AARCH64_ADR_PREL_LO21, addRel.offset,
634+
/*addend=*/0, &sym};
635+
// nop
636+
write32le(buf + adrpRel.offset, 0xd503201f);
637+
// adr x_<dest_reg>
638+
write32le(buf + adrRel.offset, 0x10000000 | adrpDestReg);
639+
target->relocate(buf + adrRel.offset, adrRel, val);
640+
return true;
641+
}
642+
594643
bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
595644
const Relocation &ldrRel, uint64_t secAddr,
596645
uint8_t *buf) const {
@@ -657,6 +706,7 @@ bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
657706
getAArch64Page(secAddr + adrpSymRel.offset),
658707
64));
659708
target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
709+
tryRelaxAdrpAdd(adrpSymRel, addRel, secAddr, buf);
660710
return true;
661711
}
662712

lld/ELF/InputSection.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,14 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
10251025
}
10261026
target.relocate(bufLoc, rel, targetVA);
10271027
break;
1028+
case R_AARCH64_PAGE_PC:
1029+
if (i + 1 < size && aarch64relaxer.tryRelaxAdrpAdd(
1030+
rel, relocations[i + 1], secAddr, buf)) {
1031+
++i;
1032+
continue;
1033+
}
1034+
target.relocate(bufLoc, rel, targetVA);
1035+
break;
10281036
case R_PPC64_RELAX_GOT_PC: {
10291037
// The R_PPC64_PCREL_OPT relocation must appear immediately after
10301038
// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.

lld/ELF/Target.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ class AArch64Relaxer {
227227
public:
228228
explicit AArch64Relaxer(ArrayRef<Relocation> relocs);
229229

230+
bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
231+
uint64_t secAddr, uint8_t *buf) const;
230232
bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
231233
uint64_t secAddr, uint8_t *buf) const;
232234
};

lld/test/ELF/aarch64-adrp-add.s

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# REQUIRES: aarch64
2+
# RUN: rm -rf %t && split-file %s %t
3+
4+
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
5+
# RUN: ld.lld %t/a.o -T %t/out-of-adr-range-low.t -o %t/a-low
6+
# RUN: llvm-objdump --no-show-raw-insn -d %t/a-low | FileCheck %s --check-prefix=OUT-OF-RANGE
7+
# RUN: ld.lld %t/a.o -T %t/out-of-adr-range-high.t -o %t/a-high
8+
# RUN: llvm-objdump --no-show-raw-insn -d %t/a-high | FileCheck %s --check-prefix=OUT-OF-RANGE
9+
10+
# OUT-OF-RANGE: adrp x30
11+
# OUT-OF-RANGE-NEXT: add x30, x30
12+
13+
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
14+
# RUN: ld.lld %t/a.o -T %t/within-adr-range-low.t -o %t/a-low
15+
# RUN: llvm-objdump --no-show-raw-insn -d %t/a-low | FileCheck %s --check-prefix=IN-RANGE-LOW
16+
17+
# IN-RANGE-LOW: nop
18+
# IN-RANGE-LOW-NEXT: adr x30
19+
# IN-RANGE-LOW-NEXT: adrp x1
20+
# IN-RANGE-LOW-NEXT: add x1
21+
# IN-RANGE-LOW-NEXT: adrp x15
22+
# IN-RANGE-LOW-NEXT: add x15
23+
24+
## ADRP and ADD use different registers, no relaxations should be applied.
25+
# IN-RANGE-LOW-NEXT: adrp x2
26+
# IN-RANGE-LOW-NEXT: add x3, x2
27+
28+
## ADRP and ADD use different registers, no relaxations should be applied.
29+
# IN-RANGE-LOW-NEXT: adrp x2
30+
# IN-RANGE-LOW-NEXT: add x2, x3
31+
32+
# RUN: ld.lld %t/a.o -T %t/within-adr-range-high.t -o %t/a-high
33+
# RUN: llvm-objdump --no-show-raw-insn -d %t/a-high | FileCheck %s --check-prefix=IN-RANGE-HIGH
34+
35+
# IN-RANGE-HIGH: nop
36+
# IN-RANGE-HIGH-NEXT: adr x30
37+
# IN-RANGE-HIGH-NEXT: nop
38+
# IN-RANGE-HIGH-NEXT: adr x1
39+
# IN-RANGE-HIGH-NEXT: nop
40+
# IN-RANGE-HIGH-NEXT: adr x15
41+
42+
## ADRP and ADD use different registers, no relaxations should be applied.
43+
# IN-RANGE-HIGH-NEXT: adrp x2
44+
# IN-RANGE-HIGH-NEXT: add x3, x2
45+
46+
## ADRP and ADD use different registers, no relaxations should be applied.
47+
# IN-RANGE-HIGH-NEXT: adrp x2
48+
# IN-RANGE-HIGH-NEXT: add x2, x3
49+
50+
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
51+
# RUN: ld.lld %t/a.o -T %t/within-adr-range-low.t --no-relax -o %t/a
52+
## --no-relax disables relaxations.
53+
# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s --check-prefix=OUT-OF-RANGE
54+
55+
## .rodata and .text are close to each other,
56+
## the adrp + add pair can be relaxed to nop + adr, moreover, the address difference
57+
## is equal to the lowest allowed value.
58+
#--- within-adr-range-low.t
59+
SECTIONS {
60+
.rodata 0x1000: { *(.rodata) }
61+
.text 0x100ffc: { *(.text) }
62+
}
63+
64+
## .rodata and .text are far apart,
65+
## the adrp + add pair cannot be relaxed to nop + adr, moreover, the address difference
66+
## is equal to the lowest allowed value minus one.
67+
#--- out-of-adr-range-low.t
68+
SECTIONS {
69+
.rodata 0x1000: { *(.rodata) }
70+
.text 0x100ffd: { *(.text) }
71+
}
72+
73+
## .rodata and .text are close to each other,
74+
## the adrp + add pair can be relaxed to nop + adr, moreover, the address difference
75+
## is equal to the highest allowed value.
76+
#--- within-adr-range-high.t
77+
SECTIONS {
78+
.text 0x1000: { *(.text) }
79+
.rodata 0x101003: { *(.rodata) }
80+
}
81+
82+
## .rodata and .text are far apart,
83+
## the adrp + add pair cannot be relaxed to nop + adr, moreover, the address difference
84+
## is equal to the highest allowed value plus one.
85+
#--- out-of-adr-range-high.t
86+
SECTIONS {
87+
.text 0x1000: { *(.text) }
88+
.rodata 0x101004: { *(.rodata) }
89+
}
90+
91+
#--- a.s
92+
.rodata
93+
x:
94+
.word 10
95+
.text
96+
.global _start
97+
_start:
98+
adrp x30, x
99+
add x30, x30, :lo12:x
100+
adrp x1, x
101+
add x1, x1, :lo12:x
102+
adrp x15, x
103+
add x15, x15, :lo12:x
104+
adrp x2, x
105+
add x3, x2, :lo12:x
106+
adrp x2, x
107+
add x2, x3, :lo12:x

lld/test/ELF/aarch64-adrp-ldr-got.s

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o
66
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o
77

8-
# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a
8+
# RUN: ld.lld %t/a.o -T %t/out-of-adr-range.t -o %t/a
99
# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s
1010

1111
## Symbol 'x' is nonpreemptible, the relaxation should be applied.
@@ -29,8 +29,15 @@
2929
# CHECK-NEXT: adrp x6
3030
# CHECK-NEXT: ldr
3131

32+
# RUN: ld.lld %t/a.o -T %t/within-adr-range.t -o %t/a
33+
# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck --check-prefix=ADR %s
34+
35+
## Symbol 'x' is nonpreemptible, the relaxation should be applied.
36+
# ADR: nop
37+
# ADR-NEXT: adr x1
38+
3239
## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations.
33-
# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax
40+
# RUN: ld.lld %t/a.o -T %t/out-of-adr-range.t --no-relax -o %t/no-relax
3441
# RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \
3542
# RUN: FileCheck --check-prefix=X1-NO-RELAX %s
3643

@@ -61,12 +68,20 @@
6168

6269
## This linker script ensures that .rodata and .text are sufficiently (>1M)
6370
## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
64-
#--- linker.t
71+
#--- out-of-adr-range.t
6572
SECTIONS {
6673
.rodata 0x1000: { *(.rodata) }
6774
.text 0x200100: { *(.text) }
6875
}
6976

77+
## This linker script ensures that .rodata and .text are sufficiently (<1M)
78+
## close to each other so that the adrp + ldr pair can be relaxed to nop + adr.
79+
#--- within-adr-range.t
80+
SECTIONS {
81+
.rodata 0x1000: { *(.rodata) }
82+
.text 0x2000: { *(.text) }
83+
}
84+
7085
## This linker script ensures that .rodata and .text are sufficiently (>4GB)
7186
## far apart so that the adrp + ldr pair cannot be relaxed.
7287
#--- out-of-range.t

lld/test/ELF/aarch64-copy.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t.o
33
// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %p/Inputs/relocation-copy.s -o %t2.o
44
// RUN: ld.lld -shared %t2.o -soname fixed-length-string.so -o %t2.so
5-
// RUN: ld.lld %t.o %t2.so -o %t
5+
// RUN: ld.lld --no-relax %t.o %t2.so -o %t
66
// RUN: llvm-readobj -S -r --symbols %t | FileCheck %s
77
// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=CODE %s
88
// RUN: llvm-objdump -s --section=.rodata %t | FileCheck --check-prefix=RODATA %s

lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# REQUIRES: aarch64
22
# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o
33

4-
# RUN: ld.lld %t.o -o %t
4+
# RUN: ld.lld --no-relax %t.o -o %t
55
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PDE
66
# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PDE-RELOC
77

8-
# RUN: ld.lld -pie %t.o -o %t
8+
# RUN: ld.lld -pie --no-relax %t.o -o %t
99
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PIE
1010
# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PIE-RELOC
1111

0 commit comments

Comments
 (0)