Skip to content

Commit ec14c19

Browse files
AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering for divergent operands that must be sgpr.
1 parent 28f0f17 commit ec14c19

19 files changed

+523
-243
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
117117
return LLT::scalar(32);
118118
}
119119

120-
static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
121-
const RegisterBankInfo &RBI);
120+
using ReadLaneFnTy =
121+
function_ref<MachineInstrBuilder(MachineIRBuilder &, Register, Register)>;
122+
123+
static Register buildReadLane(MachineIRBuilder &, Register,
124+
const RegisterBankInfo &, ReadLaneFnTy);
122125

123126
static void unmergeReadAnyLane(MachineIRBuilder &B,
124127
SmallVectorImpl<Register> &SgprDstParts,
125128
LLT UnmergeTy, Register VgprSrc,
126-
const RegisterBankInfo &RBI) {
129+
const RegisterBankInfo &RBI,
130+
ReadLaneFnTy BuildRL) {
127131
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
128132
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
129133
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
130-
SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
134+
SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
131135
}
132136
}
133137

134-
static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
135-
const RegisterBankInfo &RBI) {
138+
static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
139+
const RegisterBankInfo &RBI,
140+
ReadLaneFnTy BuildRL) {
136141
LLT Ty = B.getMRI()->getType(VgprSrc);
137142
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
138143
if (Ty.getSizeInBits() == 32) {
139-
return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}}, {VgprSrc})
140-
.getReg(0);
144+
Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
145+
return BuildRL(B, SgprDst, VgprSrc).getReg(0);
141146
}
142147

143148
SmallVector<Register, 8> SgprDstParts;
144-
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
149+
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
150+
BuildRL);
145151

146152
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
147153
}
148154

149-
void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
150-
Register VgprSrc, const RegisterBankInfo &RBI) {
155+
static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
156+
Register VgprSrc, const RegisterBankInfo &RBI,
157+
ReadLaneFnTy BuildReadLane) {
151158
LLT Ty = B.getMRI()->getType(VgprSrc);
152159
if (Ty.getSizeInBits() == 32) {
153-
B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
160+
BuildReadLane(B, SgprDst, VgprSrc);
154161
return;
155162
}
156163

157164
SmallVector<Register, 8> SgprDstParts;
158-
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
165+
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
166+
BuildReadLane);
159167

160168
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
161169
}
170+
171+
void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
172+
Register VgprSrc, const RegisterBankInfo &RBI) {
173+
return buildReadLane(
174+
B, SgprDst, VgprSrc, RBI,
175+
[](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
176+
return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
177+
});
178+
}
179+
180+
void AMDGPU::buildReadFirstLane(MachineIRBuilder &B, Register SgprDst,
181+
Register VgprSrc, const RegisterBankInfo &RBI) {
182+
return buildReadLane(
183+
B, SgprDst, VgprSrc, RBI,
184+
[](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
185+
return B.buildIntrinsic(Intrinsic::amdgcn_readfirstlane, SgprDst)
186+
.addReg(VgprSrc);
187+
});
188+
}

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ class IntrinsicLaneMaskAnalyzer {
5151

5252
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
5353
const RegisterBankInfo &RBI);
54+
void buildReadFirstLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
55+
const RegisterBankInfo &RBI);
5456
}
5557
}
5658

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,11 @@ class AMDGPURegBankLegalizeCombiner {
182182
if (UnMerge) {
183183
int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
184184
auto *Merge = getOpcodeDef<GMergeLikeInstr>(UnMerge->getSourceReg(), MRI);
185-
if (Merge) {
186-
auto [RAL, RALSrc] =
187-
tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
185+
if (Merge && UnMerge->getNumDefs() == Merge->getNumSources()) {
186+
Register SrcRegIdx = Merge->getSourceReg(Idx);
187+
if (MRI.getType(Src) != MRI.getType(SrcRegIdx))
188+
return {};
189+
auto [RAL, RALSrc] = tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE);
188190
if (RAL)
189191
return RALSrc;
190192
}
@@ -203,7 +205,14 @@ class AMDGPURegBankLegalizeCombiner {
203205
bool tryEliminateReadAnyLane(MachineInstr &Copy) {
204206
Register Dst = Copy.getOperand(0).getReg();
205207
Register Src = Copy.getOperand(1).getReg();
206-
if (!Src.isVirtual())
208+
209+
// Skip non-vgpr Dst
210+
if ((Dst.isVirtual() && MRI.getRegBankOrNull(Dst) != VgprRB) ||
211+
(Dst.isPhysical() && !TRI.isVGPR(MRI, Dst)))
212+
return false;
213+
214+
// Skip physical source registers and source registers with register class
215+
if (!Src.isVirtual() || MRI.getRegClassOrNull(Src))
207216
return false;
208217

209218
Register RALDst = Src;

0 commit comments

Comments
 (0)