Skip to content

Commit 82bb8a5

Browse files
committed
[CSKY] Add codegen support of GlobalTLSAddress lowering
There are static and dynamic TLS address lowering in DAG stage according to different TLS model. It needs PseudoTLSLA32 pseudo to get address of TLS-related entry which resides in constant pool.
1 parent a7f8aea commit 82bb8a5

File tree

5 files changed

+329
-0
lines changed

5 files changed

+329
-0
lines changed

llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,33 @@ void CSKYAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
5858
// instructions) auto-generated.
5959
#include "CSKYGenMCPseudoLowering.inc"
6060

61+
void CSKYAsmPrinter::expandTLSLA(const MachineInstr *MI) {
62+
const CSKYInstrInfo *TII = Subtarget->getInstrInfo();
63+
64+
DebugLoc DL = MI->getDebugLoc();
65+
66+
MCSymbol *PCLabel = OutContext.getOrCreateSymbol(
67+
Twine(MAI->getPrivateGlobalPrefix()) + "PC" + Twine(getFunctionNumber()) +
68+
"_" + Twine(MI->getOperand(3).getImm()));
69+
70+
OutStreamer->emitLabel(PCLabel);
71+
72+
auto Instr = BuildMI(*MF, DL, TII->get(CSKY::LRW32))
73+
.add(MI->getOperand(0))
74+
.add(MI->getOperand(2));
75+
MCInst LRWInst;
76+
MCInstLowering.Lower(Instr, LRWInst);
77+
EmitToStreamer(*OutStreamer, LRWInst);
78+
79+
Instr = BuildMI(*MF, DL, TII->get(CSKY::GRS32))
80+
.add(MI->getOperand(1))
81+
.addSym(PCLabel);
82+
MCInst GRSInst;
83+
MCInstLowering.Lower(Instr, GRSInst);
84+
EmitToStreamer(*OutStreamer, GRSInst);
85+
return;
86+
}
87+
6188
void CSKYAsmPrinter::emitCustomConstantPool(const MachineInstr *MI) {
6289

6390
// This instruction represents a floating constant pool in the function.
@@ -102,6 +129,9 @@ void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) {
102129
InConstantPool = false;
103130
}
104131

132+
if (MI->getOpcode() == CSKY::PseudoTLSLA32)
133+
return expandTLSLA(MI);
134+
105135
if (MI->getOpcode() == CSKY::CONSTPOOL_ENTRY)
106136
return emitCustomConstantPool(MI);
107137

llvm/lib/Target/CSKY/CSKYAsmPrinter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter {
2626
/// MachineFunction.
2727
MachineConstantPool *MCP;
2828

29+
void expandTLSLA(const MachineInstr *MI);
2930
void emitCustomConstantPool(const MachineInstr *MI);
3031

3132
public:

llvm/lib/Target/CSKY/CSKYISelLowering.cpp

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ SDValue CSKYTargetLowering::LowerOperation(SDValue Op,
119119
return LowerGlobalAddress(Op, DAG);
120120
case ISD::ExternalSymbol:
121121
return LowerExternalSymbol(Op, DAG);
122+
case ISD::GlobalTLSAddress:
123+
return LowerGlobalTLSAddress(Op, DAG);
122124
case ISD::JumpTable:
123125
return LowerJumpTable(Op, DAG);
124126
case ISD::BlockAddress:
@@ -1005,3 +1007,116 @@ Register CSKYTargetLowering::getExceptionSelectorRegister(
10051007
const Constant *PersonalityFn) const {
10061008
return CSKY::R1;
10071009
}
1010+
1011+
SDValue CSKYTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1012+
SelectionDAG &DAG) const {
1013+
SDLoc DL(Op);
1014+
EVT Ty = Op.getValueType();
1015+
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1016+
int64_t Offset = N->getOffset();
1017+
MVT XLenVT = MVT::i32;
1018+
1019+
TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
1020+
SDValue Addr;
1021+
switch (Model) {
1022+
case TLSModel::LocalExec:
1023+
Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
1024+
break;
1025+
case TLSModel::InitialExec:
1026+
Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
1027+
break;
1028+
case TLSModel::LocalDynamic:
1029+
case TLSModel::GeneralDynamic:
1030+
Addr = getDynamicTLSAddr(N, DAG);
1031+
break;
1032+
}
1033+
1034+
// In order to maximise the opportunity for common subexpression elimination,
1035+
// emit a separate ADD node for the global address offset instead of folding
1036+
// it in the global address node. Later peephole optimisations may choose to
1037+
// fold it back in when profitable.
1038+
if (Offset != 0)
1039+
return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1040+
DAG.getConstant(Offset, DL, XLenVT));
1041+
return Addr;
1042+
}
1043+
1044+
SDValue CSKYTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1045+
SelectionDAG &DAG,
1046+
bool UseGOT) const {
1047+
MachineFunction &MF = DAG.getMachineFunction();
1048+
CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
1049+
1050+
unsigned CSKYPCLabelIndex = CFI->createPICLabelUId();
1051+
1052+
SDLoc DL(N);
1053+
EVT Ty = getPointerTy(DAG.getDataLayout());
1054+
1055+
CSKYCP::CSKYCPModifier Flag = UseGOT ? CSKYCP::TLSIE : CSKYCP::TLSLE;
1056+
bool AddCurrentAddr = UseGOT ? true : false;
1057+
unsigned char PCAjust = UseGOT ? 4 : 0;
1058+
1059+
CSKYConstantPoolValue *CPV =
1060+
CSKYConstantPoolConstant::Create(N->getGlobal(), CSKYCP::CPValue, PCAjust,
1061+
Flag, AddCurrentAddr, CSKYPCLabelIndex);
1062+
SDValue CAddr = DAG.getTargetConstantPool(CPV, Ty);
1063+
1064+
SDValue Load;
1065+
if (UseGOT) {
1066+
SDValue PICLabel = DAG.getTargetConstant(CSKYPCLabelIndex, DL, MVT::i32);
1067+
auto *LRWGRS = DAG.getMachineNode(CSKY::PseudoTLSLA32, DL, {Ty, Ty},
1068+
{CAddr, PICLabel});
1069+
auto LRWADDGRS =
1070+
DAG.getNode(ISD::ADD, DL, Ty, SDValue(LRWGRS, 0), SDValue(LRWGRS, 1));
1071+
Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), LRWADDGRS,
1072+
MachinePointerInfo(N->getGlobal()));
1073+
} else {
1074+
Load = SDValue(DAG.getMachineNode(CSKY::LRW32, DL, Ty, CAddr), 0);
1075+
}
1076+
1077+
// Add the thread pointer.
1078+
SDValue TPReg = DAG.getRegister(CSKY::R31, MVT::i32);
1079+
return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
1080+
}
1081+
1082+
SDValue CSKYTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1083+
SelectionDAG &DAG) const {
1084+
MachineFunction &MF = DAG.getMachineFunction();
1085+
CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
1086+
1087+
unsigned CSKYPCLabelIndex = CFI->createPICLabelUId();
1088+
1089+
SDLoc DL(N);
1090+
EVT Ty = getPointerTy(DAG.getDataLayout());
1091+
IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1092+
1093+
CSKYConstantPoolValue *CPV =
1094+
CSKYConstantPoolConstant::Create(N->getGlobal(), CSKYCP::CPValue, 4,
1095+
CSKYCP::TLSGD, true, CSKYPCLabelIndex);
1096+
SDValue Addr = DAG.getTargetConstantPool(CPV, Ty);
1097+
SDValue PICLabel = DAG.getTargetConstant(CSKYPCLabelIndex, DL, MVT::i32);
1098+
1099+
auto *LRWGRS =
1100+
DAG.getMachineNode(CSKY::PseudoTLSLA32, DL, {Ty, Ty}, {Addr, PICLabel});
1101+
1102+
auto Load =
1103+
DAG.getNode(ISD::ADD, DL, Ty, SDValue(LRWGRS, 0), SDValue(LRWGRS, 1));
1104+
1105+
// Prepare argument list to generate call.
1106+
ArgListTy Args;
1107+
ArgListEntry Entry;
1108+
Entry.Node = Load;
1109+
Entry.Ty = CallTy;
1110+
Args.push_back(Entry);
1111+
1112+
// Setup call to __tls_get_addr.
1113+
TargetLowering::CallLoweringInfo CLI(DAG);
1114+
CLI.setDebugLoc(DL)
1115+
.setChain(DAG.getEntryNode())
1116+
.setLibCallee(CallingConv::C, CallTy,
1117+
DAG.getExternalSymbol("__tls_get_addr", Ty),
1118+
std::move(Args));
1119+
SDValue V = LowerCallTo(CLI).first;
1120+
1121+
return V;
1122+
}

llvm/lib/Target/CSKY/CSKYISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ class CSKYTargetLowering : public TargetLowering {
154154
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
155155
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
156156

157+
SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
158+
bool UseGOT) const;
159+
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
160+
157161
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
158162
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) const;
159163
};

llvm/test/CodeGen/CSKY/tls-models.ll

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=csky -csky-no-aliases -relocation-model=pic -mattr=+2e3 < %s \
3+
; RUN: | FileCheck -check-prefix=CSKY-PIC %s
4+
; RUN: llc -mtriple=csky -csky-no-aliases -mattr=+2e3 < %s | FileCheck -check-prefix=CSKY-NOPIC %s
5+
6+
; Check that TLS symbols are lowered correctly based on the specified
7+
; model. Make sure they're external to avoid them all being optimised to Local
8+
; Exec for the executable.
9+
10+
@unspecified = external thread_local global i32
11+
@ld = external thread_local(localdynamic) global i32
12+
@ie = external thread_local(initialexec) global i32
13+
@le = external thread_local(localexec) global i32
14+
15+
16+
; No model specified
17+
18+
define i32* @f1() nounwind {
19+
; CSKY-PIC-LABEL: f1:
20+
; CSKY-PIC: # %bb.0: # %entry
21+
; CSKY-PIC-NEXT: subi16 sp, sp, 8
22+
; CSKY-PIC-NEXT: st32.w rgb, (sp, 4) # 4-byte Folded Spill
23+
; CSKY-PIC-NEXT: st32.w lr, (sp, 0) # 4-byte Folded Spill
24+
; CSKY-PIC-NEXT: lrw32 rgb, [.LCPI0_0]
25+
; CSKY-PIC-NEXT: .LPC0_1:
26+
; CSKY-PIC-NEXT: lrw32 a0, [.LCPI0_1]
27+
; CSKY-PIC-NEXT: grs32 a1, .LPC0_1
28+
; CSKY-PIC-NEXT: addu16 a0, a1
29+
; CSKY-PIC-NEXT: lrw32 a1, [.LCPI0_2]
30+
; CSKY-PIC-NEXT: ldr32.w a1, (rgb, a1 << 0)
31+
; CSKY-PIC-NEXT: jsr16 a1
32+
; CSKY-PIC-NEXT: ld32.w lr, (sp, 0) # 4-byte Folded Reload
33+
; CSKY-PIC-NEXT: ld32.w rgb, (sp, 4) # 4-byte Folded Reload
34+
; CSKY-PIC-NEXT: addi16 sp, sp, 8
35+
; CSKY-PIC-NEXT: rts16
36+
; CSKY-PIC-NEXT: .p2align 1
37+
; CSKY-PIC-NEXT: # %bb.1:
38+
; CSKY-PIC-NEXT: .p2align 2
39+
; CSKY-PIC-NEXT: .LCPI0_0:
40+
; CSKY-PIC-NEXT: .long _GLOBAL_OFFSET_TABLE_
41+
; CSKY-PIC-NEXT: .LCPI0_1:
42+
; CSKY-PIC-NEXT: .Ltmp0:
43+
; CSKY-PIC-NEXT: .long unspecified-(.LPC0_1-.Ltmp0)@TLSGD32
44+
; CSKY-PIC-NEXT: .LCPI0_2:
45+
; CSKY-PIC-NEXT: .long __tls_get_addr@PLT
46+
;
47+
; CSKY-NOPIC-LABEL: f1:
48+
; CSKY-NOPIC: # %bb.0: # %entry
49+
; CSKY-NOPIC-NEXT: .LPC0_1:
50+
; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI0_0]
51+
; CSKY-NOPIC-NEXT: grs32 a1, .LPC0_1
52+
; CSKY-NOPIC-NEXT: ldr32.w a0, (a0, a1 << 0)
53+
; CSKY-NOPIC-NEXT: addu32 a0, a0, tls
54+
; CSKY-NOPIC-NEXT: rts16
55+
; CSKY-NOPIC-NEXT: .p2align 1
56+
; CSKY-NOPIC-NEXT: # %bb.1:
57+
; CSKY-NOPIC-NEXT: .p2align 2
58+
; CSKY-NOPIC-NEXT: .LCPI0_0:
59+
; CSKY-NOPIC-NEXT: .Ltmp0:
60+
; CSKY-NOPIC-NEXT: .long unspecified-(.LPC0_1-.Ltmp0)@GOTTPOFF
61+
entry:
62+
ret i32* @unspecified
63+
}
64+
65+
66+
; localdynamic specified
67+
68+
define i32* @f2() nounwind {
69+
; CSKY-PIC-LABEL: f2:
70+
; CSKY-PIC: # %bb.0: # %entry
71+
; CSKY-PIC-NEXT: subi16 sp, sp, 8
72+
; CSKY-PIC-NEXT: st32.w rgb, (sp, 4) # 4-byte Folded Spill
73+
; CSKY-PIC-NEXT: st32.w lr, (sp, 0) # 4-byte Folded Spill
74+
; CSKY-PIC-NEXT: lrw32 rgb, [.LCPI1_0]
75+
; CSKY-PIC-NEXT: .LPC1_1:
76+
; CSKY-PIC-NEXT: lrw32 a0, [.LCPI1_1]
77+
; CSKY-PIC-NEXT: grs32 a1, .LPC1_1
78+
; CSKY-PIC-NEXT: addu16 a0, a1
79+
; CSKY-PIC-NEXT: lrw32 a1, [.LCPI1_2]
80+
; CSKY-PIC-NEXT: ldr32.w a1, (rgb, a1 << 0)
81+
; CSKY-PIC-NEXT: jsr16 a1
82+
; CSKY-PIC-NEXT: ld32.w lr, (sp, 0) # 4-byte Folded Reload
83+
; CSKY-PIC-NEXT: ld32.w rgb, (sp, 4) # 4-byte Folded Reload
84+
; CSKY-PIC-NEXT: addi16 sp, sp, 8
85+
; CSKY-PIC-NEXT: rts16
86+
; CSKY-PIC-NEXT: .p2align 1
87+
; CSKY-PIC-NEXT: # %bb.1:
88+
; CSKY-PIC-NEXT: .p2align 2
89+
; CSKY-PIC-NEXT: .LCPI1_0:
90+
; CSKY-PIC-NEXT: .long _GLOBAL_OFFSET_TABLE_
91+
; CSKY-PIC-NEXT: .LCPI1_1:
92+
; CSKY-PIC-NEXT: .Ltmp1:
93+
; CSKY-PIC-NEXT: .long ld-(.LPC1_1-.Ltmp1)@TLSGD32
94+
; CSKY-PIC-NEXT: .LCPI1_2:
95+
; CSKY-PIC-NEXT: .long __tls_get_addr@PLT
96+
;
97+
; CSKY-NOPIC-LABEL: f2:
98+
; CSKY-NOPIC: # %bb.0: # %entry
99+
; CSKY-NOPIC-NEXT: .LPC1_1:
100+
; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI1_0]
101+
; CSKY-NOPIC-NEXT: grs32 a1, .LPC1_1
102+
; CSKY-NOPIC-NEXT: ldr32.w a0, (a0, a1 << 0)
103+
; CSKY-NOPIC-NEXT: addu32 a0, a0, tls
104+
; CSKY-NOPIC-NEXT: rts16
105+
; CSKY-NOPIC-NEXT: .p2align 1
106+
; CSKY-NOPIC-NEXT: # %bb.1:
107+
; CSKY-NOPIC-NEXT: .p2align 2
108+
; CSKY-NOPIC-NEXT: .LCPI1_0:
109+
; CSKY-NOPIC-NEXT: .Ltmp1:
110+
; CSKY-NOPIC-NEXT: .long ld-(.LPC1_1-.Ltmp1)@GOTTPOFF
111+
entry:
112+
ret i32* @ld
113+
}
114+
115+
116+
; initialexec specified
117+
118+
define i32* @f3() nounwind {
119+
; CSKY-PIC-LABEL: f3:
120+
; CSKY-PIC: # %bb.0: # %entry
121+
; CSKY-PIC-NEXT: .LPC2_1:
122+
; CSKY-PIC-NEXT: lrw32 a0, [.LCPI2_0]
123+
; CSKY-PIC-NEXT: grs32 a1, .LPC2_1
124+
; CSKY-PIC-NEXT: ldr32.w a0, (a0, a1 << 0)
125+
; CSKY-PIC-NEXT: addu32 a0, a0, tls
126+
; CSKY-PIC-NEXT: rts16
127+
; CSKY-PIC-NEXT: .p2align 1
128+
; CSKY-PIC-NEXT: # %bb.1:
129+
; CSKY-PIC-NEXT: .p2align 2
130+
; CSKY-PIC-NEXT: .LCPI2_0:
131+
; CSKY-PIC-NEXT: .Ltmp2:
132+
; CSKY-PIC-NEXT: .long ie-(.LPC2_1-.Ltmp2)@GOTTPOFF
133+
;
134+
; CSKY-NOPIC-LABEL: f3:
135+
; CSKY-NOPIC: # %bb.0: # %entry
136+
; CSKY-NOPIC-NEXT: .LPC2_1:
137+
; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI2_0]
138+
; CSKY-NOPIC-NEXT: grs32 a1, .LPC2_1
139+
; CSKY-NOPIC-NEXT: ldr32.w a0, (a0, a1 << 0)
140+
; CSKY-NOPIC-NEXT: addu32 a0, a0, tls
141+
; CSKY-NOPIC-NEXT: rts16
142+
; CSKY-NOPIC-NEXT: .p2align 1
143+
; CSKY-NOPIC-NEXT: # %bb.1:
144+
; CSKY-NOPIC-NEXT: .p2align 2
145+
; CSKY-NOPIC-NEXT: .LCPI2_0:
146+
; CSKY-NOPIC-NEXT: .Ltmp2:
147+
; CSKY-NOPIC-NEXT: .long ie-(.LPC2_1-.Ltmp2)@GOTTPOFF
148+
entry:
149+
ret i32* @ie
150+
}
151+
152+
153+
; localexec specified
154+
155+
define i32* @f4() nounwind {
156+
; CSKY-PIC-LABEL: f4:
157+
; CSKY-PIC: # %bb.0: # %entry
158+
; CSKY-PIC-NEXT: lrw32 a0, [.LCPI3_0]
159+
; CSKY-PIC-NEXT: addu32 a0, a0, tls
160+
; CSKY-PIC-NEXT: rts16
161+
; CSKY-PIC-NEXT: .p2align 1
162+
; CSKY-PIC-NEXT: # %bb.1:
163+
; CSKY-PIC-NEXT: .p2align 2
164+
; CSKY-PIC-NEXT: .LCPI3_0:
165+
; CSKY-PIC-NEXT: .long le@TPOFF
166+
;
167+
; CSKY-NOPIC-LABEL: f4:
168+
; CSKY-NOPIC: # %bb.0: # %entry
169+
; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI3_0]
170+
; CSKY-NOPIC-NEXT: addu32 a0, a0, tls
171+
; CSKY-NOPIC-NEXT: rts16
172+
; CSKY-NOPIC-NEXT: .p2align 1
173+
; CSKY-NOPIC-NEXT: # %bb.1:
174+
; CSKY-NOPIC-NEXT: .p2align 2
175+
; CSKY-NOPIC-NEXT: .LCPI3_0:
176+
; CSKY-NOPIC-NEXT: .long le@TPOFF
177+
entry:
178+
ret i32* @le
179+
}

0 commit comments

Comments
 (0)