Skip to content

AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize #142790

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 40 additions & 13 deletions llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}

static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
const RegisterBankInfo &RBI);
using ReadLaneFnTy =
function_ref<MachineInstrBuilder(MachineIRBuilder &, Register, Register)>;

static Register buildReadLane(MachineIRBuilder &, Register,
const RegisterBankInfo &, ReadLaneFnTy);

static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl<Register> &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
const RegisterBankInfo &RBI) {
const RegisterBankInfo &RBI,
ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}

static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
const RegisterBankInfo &RBI) {
static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
const RegisterBankInfo &RBI,
ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}}, {VgprSrc})
.getReg(0);
Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}

SmallVector<Register, 8> SgprDstParts;
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
BuildRL);

return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}

void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
Register VgprSrc, const RegisterBankInfo &RBI) {
static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
Register VgprSrc, const RegisterBankInfo &RBI,
ReadLaneFnTy BuildReadLane) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make the function a template argument?

LLT Ty = B.getMRI()->getType(VgprSrc);
if (Ty.getSizeInBits() == 32) {
B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
BuildReadLane(B, SgprDst, VgprSrc);
return;
}

SmallVector<Register, 8> SgprDstParts;
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
BuildReadLane);

B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
}

void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
Register VgprSrc, const RegisterBankInfo &RBI) {
return buildReadLane(
B, SgprDst, VgprSrc, RBI,
[](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
});
}

void AMDGPU::buildReadFirstLane(MachineIRBuilder &B, Register SgprDst,
Register VgprSrc, const RegisterBankInfo &RBI) {
return buildReadLane(
B, SgprDst, VgprSrc, RBI,
[](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
return B.buildIntrinsic(Intrinsic::amdgcn_readfirstlane, SgprDst)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for this PR, but we should really have an opcode for this too instead of having one being an intrinsic and one being a generic opcode

.addReg(VgprSrc);
});
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class IntrinsicLaneMaskAnalyzer {

void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
const RegisterBankInfo &RBI);
void buildReadFirstLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
const RegisterBankInfo &RBI);
}
}

Expand Down
17 changes: 13 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,11 @@ class AMDGPURegBankLegalizeCombiner {
if (UnMerge) {
int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
auto *Merge = getOpcodeDef<GMergeLikeInstr>(UnMerge->getSourceReg(), MRI);
if (Merge) {
auto [RAL, RALSrc] =
tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
if (Merge && UnMerge->getNumDefs() == Merge->getNumSources()) {
Register SrcRegIdx = Merge->getSourceReg(Idx);
if (MRI.getType(Src) != MRI.getType(SrcRegIdx))
return {};
auto [RAL, RALSrc] = tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE);
if (RAL)
return RALSrc;
}
Expand All @@ -205,7 +207,14 @@ class AMDGPURegBankLegalizeCombiner {
bool tryEliminateReadAnyLane(MachineInstr &Copy) {
Register Dst = Copy.getOperand(0).getReg();
Register Src = Copy.getOperand(1).getReg();
if (!Src.isVirtual())

// Skip non-vgpr Dst
if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB)
: !TRI.isVGPR(MRI, Dst))
return false;

// Skip physical source registers and source registers with register class
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't happen?

if (!Src.isVirtual() || MRI.getRegClassOrNull(Src))
return false;

Register RALDst = Src;
Expand Down
Loading
Loading