Skip to content

Commit 2c19008

Browse files
authored
[lld][LoongArch] Support TLSDESC GD/LD to IE/LE (#123715)
Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. In normal or medium code model, there are two forms of code sequences: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) ------ * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) Convert to IE: * pcalau12i $a0, %ie_pc_hi20(sym_ie) * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) Convert to LE: * lu12i.w $a0, %le_hi20(sym_le) # le_hi20 != 0, otherwise NOP * ori $a0 src, %le_lo12(sym_le) # le_hi20 != 0, src = $a0, otherwise src = $zero Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the preceding instructions to NOPs, due to both forms of code sequence (corresponding to relocation combinations: R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and R_LARCH_TLS_DESC_PCREL20_S2) have same process. TODO: When relaxation enables, redundant NOPs can be removed. It will be implemented in a future patch. Note: All forms of TLSDESC code sequences should not appear interleaved in the normal, medium or extreme code model, which compilers do not generate and lld is unsupported. This is thanks to the guard in PostRASchedulerList.cpp in llvm. ``` Calls are not scheduling boundaries before register allocation, but post-ra we don't gain anything by scheduling across calls since we don't need to worry about register pressure. ```
1 parent f1cc0b6 commit 2c19008

File tree

7 files changed

+400
-171
lines changed

7 files changed

+400
-171
lines changed

lld/ELF/Arch/LoongArch.cpp

Lines changed: 149 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,13 @@ class LoongArch final : public TargetInfo {
3939
void relocate(uint8_t *loc, const Relocation &rel,
4040
uint64_t val) const override;
4141
bool relaxOnce(int pass) const override;
42+
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
4243
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
4344
void finalizeRelax(int passes) const override;
45+
46+
private:
47+
void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
48+
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
4449
};
4550
} // end anonymous namespace
4651

@@ -58,6 +63,7 @@ enum Op {
5863
LU12I_W = 0x14000000,
5964
PCADDI = 0x18000000,
6065
PCADDU12I = 0x1c000000,
66+
PCALAU12I = 0x1a000000,
6167
LD_W = 0x28800000,
6268
LD_D = 0x28c00000,
6369
JIRL = 0x4c000000,
@@ -69,6 +75,7 @@ enum Reg {
6975
R_ZERO = 0,
7076
R_RA = 1,
7177
R_TP = 2,
78+
R_A0 = 4,
7279
R_T0 = 12,
7380
R_T1 = 13,
7481
R_T2 = 14,
@@ -961,7 +968,8 @@ static bool relax(Ctx &ctx, InputSection &sec) {
961968
case R_LARCH_TLS_LD_PC_HI20:
962969
case R_LARCH_TLS_DESC_PC_HI20:
963970
// The overflow check for i+2 will be carried out in isPairRelaxable.
964-
if (isPairRelaxable(relocs, i))
971+
if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC &&
972+
r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i))
965973
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
966974
break;
967975
case R_LARCH_CALL36:
@@ -1046,6 +1054,104 @@ static void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
10461054
}
10471055
}
10481056

1057+
// Convert TLSDESC GD/LD to IE.
1058+
// In normal or medium code model, there are two forms of code sequences:
1059+
// * pcalau12i $a0, %desc_pc_hi20(sym_desc)
1060+
// * addi.d $a0, $a0, %desc_pc_lo12(sym_desc)
1061+
// * ld.d $ra, $a0, %desc_ld(sym_desc)
1062+
// * jirl $ra, $ra, %desc_call(sym_desc)
1063+
// ------
1064+
// * pcaddi $a0, %desc_pcrel_20(a)
1065+
// * load $ra, $a0, %desc_ld(a)
1066+
// * jirl $ra, $ra, %desc_call(a)
1067+
//
1068+
// The code sequence obtained is as follows:
1069+
// * pcalau12i $a0, %ie_pc_hi20(sym_ie)
1070+
// * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie)
1071+
//
1072+
// Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the
1073+
// preceding instructions to NOPs, due to both forms of code sequence
1074+
// (corresponding to relocation combinations:
1075+
// R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and
1076+
// R_LARCH_TLS_DESC_PCREL20_S2) have same process.
1077+
//
1078+
// When relaxation enables, redundant NOPs can be removed.
1079+
void LoongArch::tlsdescToIe(uint8_t *loc, const Relocation &rel,
1080+
uint64_t val) const {
1081+
switch (rel.type) {
1082+
case R_LARCH_TLS_DESC_PC_HI20:
1083+
case R_LARCH_TLS_DESC_PC_LO12:
1084+
case R_LARCH_TLS_DESC_PCREL20_S2:
1085+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
1086+
break;
1087+
case R_LARCH_TLS_DESC_LD:
1088+
write32le(loc, insn(PCALAU12I, R_A0, 0, 0)); // pcalau12i $a0, %ie_pc_hi20
1089+
relocateNoSym(loc, R_LARCH_TLS_IE_PC_HI20, val);
1090+
break;
1091+
case R_LARCH_TLS_DESC_CALL:
1092+
write32le(loc, insn(ctx.arg.is64 ? LD_D : LD_W, R_A0, R_A0,
1093+
0)); // ld.[wd] $a0, $a0, %ie_pc_lo12
1094+
relocateNoSym(loc, R_LARCH_TLS_IE_PC_LO12, val);
1095+
break;
1096+
default:
1097+
llvm_unreachable("unsupported relocation for TLSDESC to IE");
1098+
}
1099+
}
1100+
1101+
// Convert TLSDESC GD/LD to LE.
1102+
// The code sequence obtained in the normal or medium code model is as follows:
1103+
// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP
1104+
// * ori $a0, src, %le_lo12(sym) # le_hi20 != 0, src = $a0,
1105+
// # otherwise, src = $zero
1106+
// See the comment in tlsdescToIe for detailed information.
1107+
void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
1108+
uint64_t val) const {
1109+
assert(isInt<32>(val) &&
1110+
"val exceeds the range of medium code model in tlsdescToLe");
1111+
1112+
bool isUInt12 = isUInt<12>(val);
1113+
switch (rel.type) {
1114+
case R_LARCH_TLS_DESC_PC_HI20:
1115+
case R_LARCH_TLS_DESC_PC_LO12:
1116+
case R_LARCH_TLS_DESC_PCREL20_S2:
1117+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
1118+
break;
1119+
case R_LARCH_TLS_DESC_LD:
1120+
if (isUInt12)
1121+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
1122+
else
1123+
write32le(loc, insn(LU12I_W, R_A0, extractBits(val, 31, 12),
1124+
0)); // lu12i.w $a0, %le_hi20
1125+
break;
1126+
case R_LARCH_TLS_DESC_CALL:
1127+
if (isUInt12)
1128+
write32le(loc, insn(ORI, R_A0, R_ZERO, val)); // ori $a0, $zero, %le_lo12
1129+
else
1130+
write32le(loc,
1131+
insn(ORI, R_A0, R_A0, lo12(val))); // ori $a0, $a0, %le_lo12
1132+
break;
1133+
default:
1134+
llvm_unreachable("unsupported relocation for TLSDESC to LE");
1135+
}
1136+
}
1137+
1138+
// During TLSDESC GD_TO_IE, the converted code sequence always includes an
1139+
// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
1140+
// in `getRelocTargetVA`, expr of this instruction should be adjusted to
1141+
// R_RELAX_TLS_GD_TO_IE_ABS, while expr of other instructions related to the
1142+
// Hi20 relocation (pcalau12i) should be adjusted to
1143+
// RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC. Specifically, in the normal or
1144+
// medium code model, the instruction with relocation R_LARCH_TLS_DESC_CALL is
1145+
// the candidate of Lo12 relocation.
1146+
RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const {
1147+
if (expr == R_RELAX_TLS_GD_TO_IE) {
1148+
if (type != R_LARCH_TLS_DESC_CALL)
1149+
return RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC;
1150+
return R_RELAX_TLS_GD_TO_IE_ABS;
1151+
}
1152+
return expr;
1153+
}
1154+
10491155
void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
10501156
const unsigned bits = ctx.arg.is64 ? 64 : 32;
10511157
uint64_t secAddr = sec.getOutputSection()->addr;
@@ -1074,7 +1180,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
10741180
// * i+2 -- R_LARCH_TLS_IE64_PC_LO20
10751181
// * i+3 -- R_LARCH_TLS_IE64_PC_HI12
10761182
isExtreme =
1077-
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20);
1183+
i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20;
10781184
}
10791185
if (isExtreme) {
10801186
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
@@ -1088,6 +1194,47 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
10881194
tlsIeToLe(loc, rel, val);
10891195
}
10901196
continue;
1197+
case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
1198+
if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
1199+
// LoongArch does not support TLSDESC GD/LD to LE/IE optimization in the
1200+
// extreme code model. In these cases, the relocs are as follows:
1201+
//
1202+
// * i -- R_LARCH_TLS_DESC_PC_HI20
1203+
// * i+1 -- R_LARCH_TLS_DESC_PC_LO12
1204+
// * i+2 -- R_LARCH_TLS_DESC64_PC_LO20
1205+
// * i+3 -- R_LARCH_TLS_DESC64_PC_HI12
1206+
isExtreme =
1207+
i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20;
1208+
}
1209+
[[fallthrough]];
1210+
case R_RELAX_TLS_GD_TO_IE_ABS:
1211+
if (isExtreme) {
1212+
if (rel.type == R_LARCH_TLS_DESC_CALL)
1213+
continue;
1214+
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
1215+
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
1216+
bits);
1217+
relocateNoSym(loc, rel.type, val);
1218+
} else {
1219+
tlsdescToIe(loc, rel, val);
1220+
}
1221+
continue;
1222+
case R_RELAX_TLS_GD_TO_LE:
1223+
if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
1224+
isExtreme =
1225+
i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20;
1226+
}
1227+
if (isExtreme) {
1228+
if (rel.type == R_LARCH_TLS_DESC_CALL)
1229+
continue;
1230+
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
1231+
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
1232+
bits);
1233+
relocateNoSym(loc, rel.type, val);
1234+
} else {
1235+
tlsdescToLe(loc, rel, val);
1236+
}
1237+
continue;
10911238
default:
10921239
break;
10931240
}

lld/ELF/InputSection.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r,
830830
case R_GOTPLT_PC:
831831
return r.sym->getGotPltVA(ctx) + a - p;
832832
case RE_LOONGARCH_GOT_PAGE_PC:
833+
case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
833834
if (r.sym->hasFlag(NEEDS_TLSGD))
834835
return getLoongArchPageDelta(ctx.in.got->getGlobalDynAddr(*r.sym) + a, p,
835836
r.type);

lld/ELF/Relocations.cpp

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,22 +1340,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
13401340
if (ctx.arg.emachine == EM_MIPS)
13411341
return handleMipsTlsRelocation(ctx, type, sym, *sec, offset, addend, expr);
13421342

1343-
// LoongArch does not yet implement transition from TLSDESC to LE/IE, so
1344-
// generate TLSDESC dynamic relocation for the dynamic linker to handle.
1345-
if (ctx.arg.emachine == EM_LOONGARCH &&
1346-
oneof<RE_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC,
1347-
R_TLSDESC_CALL>(expr)) {
1348-
if (expr != R_TLSDESC_CALL) {
1349-
sym.setFlags(NEEDS_TLSDESC);
1350-
sec->addReloc({expr, type, offset, addend, &sym});
1351-
}
1352-
return 1;
1353-
}
1354-
13551343
bool isRISCV = ctx.arg.emachine == EM_RISCV;
13561344

13571345
if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
1358-
R_TLSDESC_GOTPLT>(expr) &&
1346+
R_TLSDESC_GOTPLT, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr) &&
13591347
ctx.arg.shared) {
13601348
// R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not
13611349
// set NEEDS_TLSDESC on the label.
@@ -1369,10 +1357,14 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
13691357
return 1;
13701358
}
13711359

1372-
// LoongArch supports IE to LE optimization in non-extreme code model.
1360+
// LoongArch supports IE to LE, DESC GD/LD to IE/LE optimizations in
1361+
// non-extreme code model.
13731362
bool execOptimizeInLoongArch =
13741363
ctx.arg.emachine == EM_LOONGARCH &&
1375-
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12);
1364+
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12 ||
1365+
type == R_LARCH_TLS_DESC_PC_HI20 || type == R_LARCH_TLS_DESC_PC_LO12 ||
1366+
type == R_LARCH_TLS_DESC_LD || type == R_LARCH_TLS_DESC_CALL ||
1367+
type == R_LARCH_TLS_DESC_PCREL20_S2);
13761368

13771369
// ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
13781370
// optimizations.
@@ -1431,9 +1423,23 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
14311423
return 1;
14321424
}
14331425

1426+
// LoongArch does not support transition from TLSDESC to LE/IE in the extreme
1427+
// code model, in which NEEDS_TLSDESC should set, rather than NEEDS_TLSGD. So
1428+
// we check independently.
1429+
if (ctx.arg.emachine == EM_LOONGARCH &&
1430+
oneof<RE_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC,
1431+
R_TLSDESC_CALL>(expr) &&
1432+
!execOptimize) {
1433+
if (expr != R_TLSDESC_CALL) {
1434+
sym.setFlags(NEEDS_TLSDESC);
1435+
sec->addReloc({expr, type, offset, addend, &sym});
1436+
}
1437+
return 1;
1438+
}
1439+
14341440
if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
14351441
R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC,
1436-
RE_LOONGARCH_TLSGD_PAGE_PC>(expr)) {
1442+
RE_LOONGARCH_TLSGD_PAGE_PC, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr)) {
14371443
if (!execOptimize) {
14381444
sym.setFlags(NEEDS_TLSGD);
14391445
sec->addReloc({expr, type, offset, addend, &sym});

lld/ELF/Relocations.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ enum RelExpr {
131131
RE_LOONGARCH_GOT_PAGE_PC,
132132
RE_LOONGARCH_TLSGD_PAGE_PC,
133133
RE_LOONGARCH_TLSDESC_PAGE_PC,
134+
RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC,
134135
};
135136

136137
// Architecture-neutral representation of relocation.

0 commit comments

Comments
 (0)