-
Notifications
You must be signed in to change notification settings - Fork 14.4k
AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize #142790
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) { | |
return LLT::scalar(32); | ||
} | ||
|
||
static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc, | ||
const RegisterBankInfo &RBI); | ||
using ReadLaneFnTy = | ||
function_ref<MachineInstrBuilder(MachineIRBuilder &, Register, Register)>; | ||
|
||
static Register buildReadLane(MachineIRBuilder &, Register, | ||
const RegisterBankInfo &, ReadLaneFnTy); | ||
|
||
static void unmergeReadAnyLane(MachineIRBuilder &B, | ||
SmallVectorImpl<Register> &SgprDstParts, | ||
LLT UnmergeTy, Register VgprSrc, | ||
const RegisterBankInfo &RBI) { | ||
const RegisterBankInfo &RBI, | ||
ReadLaneFnTy BuildRL) { | ||
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID); | ||
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc); | ||
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) { | ||
SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI)); | ||
SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL)); | ||
} | ||
} | ||
|
||
static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc, | ||
const RegisterBankInfo &RBI) { | ||
static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc, | ||
const RegisterBankInfo &RBI, | ||
ReadLaneFnTy BuildRL) { | ||
LLT Ty = B.getMRI()->getType(VgprSrc); | ||
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID); | ||
if (Ty.getSizeInBits() == 32) { | ||
return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}}, {VgprSrc}) | ||
.getReg(0); | ||
Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty}); | ||
return BuildRL(B, SgprDst, VgprSrc).getReg(0); | ||
} | ||
|
||
SmallVector<Register, 8> SgprDstParts; | ||
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI); | ||
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI, | ||
BuildRL); | ||
|
||
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0); | ||
} | ||
|
||
void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, | ||
Register VgprSrc, const RegisterBankInfo &RBI) { | ||
static void buildReadLane(MachineIRBuilder &B, Register SgprDst, | ||
Register VgprSrc, const RegisterBankInfo &RBI, | ||
ReadLaneFnTy BuildReadLane) { | ||
LLT Ty = B.getMRI()->getType(VgprSrc); | ||
if (Ty.getSizeInBits() == 32) { | ||
B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc}); | ||
BuildReadLane(B, SgprDst, VgprSrc); | ||
return; | ||
} | ||
|
||
SmallVector<Register, 8> SgprDstParts; | ||
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI); | ||
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI, | ||
BuildReadLane); | ||
|
||
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0); | ||
} | ||
|
||
void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, | ||
Register VgprSrc, const RegisterBankInfo &RBI) { | ||
return buildReadLane( | ||
B, SgprDst, VgprSrc, RBI, | ||
[](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) { | ||
return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc}); | ||
}); | ||
} | ||
|
||
void AMDGPU::buildReadFirstLane(MachineIRBuilder &B, Register SgprDst, | ||
Register VgprSrc, const RegisterBankInfo &RBI) { | ||
return buildReadLane( | ||
B, SgprDst, VgprSrc, RBI, | ||
[](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) { | ||
return B.buildIntrinsic(Intrinsic::amdgcn_readfirstlane, SgprDst) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not for this PR, but we should really have an opcode for this too instead of having one being an intrinsic and one being a generic opcode |
||
.addReg(VgprSrc); | ||
}); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -184,9 +184,11 @@ class AMDGPURegBankLegalizeCombiner { | |
if (UnMerge) { | ||
int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr); | ||
auto *Merge = getOpcodeDef<GMergeLikeInstr>(UnMerge->getSourceReg(), MRI); | ||
if (Merge) { | ||
auto [RAL, RALSrc] = | ||
tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE); | ||
if (Merge && UnMerge->getNumDefs() == Merge->getNumSources()) { | ||
Register SrcRegIdx = Merge->getSourceReg(Idx); | ||
if (MRI.getType(Src) != MRI.getType(SrcRegIdx)) | ||
return {}; | ||
auto [RAL, RALSrc] = tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE); | ||
if (RAL) | ||
return RALSrc; | ||
} | ||
|
@@ -205,7 +207,14 @@ class AMDGPURegBankLegalizeCombiner { | |
bool tryEliminateReadAnyLane(MachineInstr &Copy) { | ||
Register Dst = Copy.getOperand(0).getReg(); | ||
Register Src = Copy.getOperand(1).getReg(); | ||
if (!Src.isVirtual()) | ||
|
||
// Skip non-vgpr Dst | ||
if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB) | ||
: !TRI.isVGPR(MRI, Dst)) | ||
return false; | ||
|
||
// Skip physical source registers and source registers with register class | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This shouldn't happen? |
||
if (!Src.isVirtual() || MRI.getRegClassOrNull(Src)) | ||
return false; | ||
|
||
Register RALDst = Src; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Make the function a template argument?