-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[AMDGPU] VOPD/VOPD3 changes for gfx1250 #147602
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
rampitec
wants to merge
1
commit into
main
Choose a base branch
from
users/rampitec/07-08-_amdgpu_vopd_vopd3_mc_changes_for_gfx1250
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
[AMDGPU] VOPD/VOPD3 changes for gfx1250 #147602
rampitec
wants to merge
1
commit into
main
from
users/rampitec/07-08-_amdgpu_vopd_vopd3_mc_changes_for_gfx1250
+66,391
−229
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesPatch is 4.24 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147602.diff 24 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 3af140461afdb..41d06b42c32a8 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -335,6 +335,22 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
}
+ bool isRegOrInlineImmWithFP64InputMods() const {
+ return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
+ }
+
+ bool isVRegWithInputMods(unsigned RCID) const {
+ return isRegClass(RCID);
+ }
+
+ bool isVRegWithFP32InputMods() const {
+ return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
+ }
+
+ bool isVRegWithFP64InputMods() const {
+ return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
+ }
+
bool isPackedFP16InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
}
@@ -527,7 +543,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
- bool isVCSrcB64() const {
+ bool isVCSrc_b64() const {
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
@@ -553,7 +569,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
- bool isVCSrcF64() const {
+ bool isVCSrc_f64() const {
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
}
@@ -601,7 +617,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
}
- bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
+ bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
@@ -617,15 +633,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
- bool isVCSrcV2FP32() const {
- return isVCSrcF64();
- }
+ bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
- bool isVCSrcV2INT32() const {
- return isVCSrcB64();
- }
+ bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
@@ -633,7 +645,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
}
- bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
+ bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
@@ -1527,6 +1539,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
+ bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
+
bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
bool isGFX10_BEncoding() const {
@@ -1774,8 +1788,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSOPLiteral(const MCInst &Inst) const;
bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
- bool validateVOPDRegBankConstraints(const MCInst &Inst,
- const OperandVector &Operands);
+ std::optional<unsigned>
+ checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3);
+ bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
+ bool tryVOPD(const MCInst &Inst);
+ bool tryVOPD3(const MCInst &Inst);
+ bool tryAnotherVOPDEncoding(const MCInst &Inst);
+
bool validateIntClampSupported(const MCInst &Inst);
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
@@ -3505,6 +3524,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
}
+ // Asm can first try to match VOPD or VOPD3. By failing early here with
+ // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
+ // Checking later during validateInstruction does not give a chance to retry
+ // parsing as a different encoding.
+ if (tryAnotherVOPDEncoding(Inst))
+ return Match_InvalidOperand;
+
return Match_Success;
}
@@ -3685,8 +3711,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
return {getNamedOperandIdx(Opcode, OpName::src0X),
getNamedOperandIdx(Opcode, OpName::vsrc1X),
+ getNamedOperandIdx(Opcode, OpName::vsrc2X),
getNamedOperandIdx(Opcode, OpName::src0Y),
getNamedOperandIdx(Opcode, OpName::vsrc1Y),
+ getNamedOperandIdx(Opcode, OpName::vsrc2Y),
ImmXIdx,
ImmIdx};
}
@@ -3816,12 +3844,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
return false;
}
-bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
- const MCInst &Inst, const OperandVector &Operands) {
+std::optional<unsigned> AMDGPUAsmParser::checkVOPDRegBankConstraints(
+ const MCInst &Inst, bool AsVOPD3) {
const unsigned Opcode = Inst.getOpcode();
if (!isVOPD(Opcode))
- return true;
+ return {};
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
@@ -3833,15 +3861,64 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
};
// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
- bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
+ bool AllowSameVGPR = isGFX1250();
+
+ if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
+ for (auto OpName : {OpName::src0X, OpName::src0Y}) {
+ int I = getNamedOperandIdx(Opcode, OpName);
+ const MCOperand &Op = Inst.getOperand(I);
+ if (!Op.isImm())
+ continue;
+ int64_t Imm = Op.getImm();
+ if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
+ !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
+ return I;
+ }
+
+ for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y,
+ OpName::vsrc2X, OpName::vsrc2Y,
+ OpName::imm}) {
+ int I = getNamedOperandIdx(Opcode, OpName);
+ if (I == -1)
+ continue;
+ const MCOperand &Op = Inst.getOperand(I);
+ if (Op.isImm())
+ return I;
+ }
+ }
const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
auto InvalidCompOprIdx =
- InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
- if (!InvalidCompOprIdx)
+ InstInfo.getInvalidCompOperandIndex(getVRegIdx, *TRI, SkipSrc,
+ AllowSameVGPR, AsVOPD3);
+
+ return InvalidCompOprIdx;
+}
+
+bool AMDGPUAsmParser::validateVOPD(
+ const MCInst &Inst, const OperandVector &Operands) {
+
+ unsigned Opcode = Inst.getOpcode();
+ bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
+
+ if (AsVOPD3) {
+ for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+ if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
+ (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
+ Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
+ }
+ }
+
+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
+ if (!InvalidCompOprIdx.has_value())
return true;
auto CompOprIdx = *InvalidCompOprIdx;
+ const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
auto ParsedIdx =
std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
@@ -3849,7 +3926,10 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
if (CompOprIdx == VOPD::Component::DST) {
- Error(Loc, "one dst register must be even and the other odd");
+ if (AsVOPD3)
+ Error(Loc, "dst registers must be distinct");
+ else
+ Error(Loc, "one dst register must be even and the other odd");
} else {
auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
Error(Loc, Twine("src") + Twine(CompSrcIdx) +
@@ -3859,6 +3939,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
return false;
}
+// \returns true if \p Inst does not satisfy VOPD constraints, but can be
+// potentially used as VOPD3 with the same operands.
+bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
+ // First check if it fits VOPD
+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
+ if (!InvalidCompOprIdx.has_value())
+ return false;
+
+ // Then if it fits VOPD3
+ InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
+ if (InvalidCompOprIdx.has_value()) {
+ // If failed operand is dst it is better to show error about VOPD3
+ // instruction as it has more capabilities and error message will be
+ // more informative. If the dst is not legal for VOPD3, then it is not
+ // legal for VOPD either.
+ if (*InvalidCompOprIdx == VOPD::Component::DST)
+ return true;
+
+ // Otherwise prefer VOPD as we may find ourselves in an awkward situation
+ // with a conflict in tied implicit src2 of fmac and no asm operand to
+ // to point to.
+ return false;
+ }
+ return true;
+}
+
+// \returns true is a VOPD3 instruction can be also represented as a shorter
+// VOPD encoding.
+bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
+ const unsigned Opcode = Inst.getOpcode();
+ const auto &II = getVOPDInstInfo(Opcode, &MII);
+ unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
+ if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
+ !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
+ return false;
+
+ // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
+ // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
+ // be parsed as VOPD which does not accept src2.
+ if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
+ II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
+ return false;
+
+ // If any modifiers are set this cannot be VOPD.
+ for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
+ OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
+ OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
+ int I = getNamedOperandIdx(Opcode, OpName);
+ if (I == -1)
+ continue;
+ if (Inst.getOperand(I).getImm())
+ return false;
+ }
+
+ return !tryVOPD3(Inst);
+}
+
+// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
+// form but switch to VOPD3 otherwise.
+bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
+ const unsigned Opcode = Inst.getOpcode();
+ if (!isGFX1250() || !isVOPD(Opcode))
+ return false;
+
+ if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
+ return tryVOPD(Inst);
+ return tryVOPD3(Inst);
+}
+
bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -5179,7 +5328,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateConstantBusLimitations(Inst, Operands)) {
return false;
}
- if (!validateVOPDRegBankConstraints(Inst, Operands)) {
+ if (!validateVOPD(Inst, Operands)) {
return false;
}
if (!validateIntClampSupported(Inst)) {
@@ -9180,8 +9329,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
// Create VOPD MCInst operands using parsed assembler operands.
void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+
auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
+ if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+ return;
+ }
if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
return;
@@ -9210,6 +9365,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
if (CInfo.hasSrc2Acc())
addOp(CInfo.getIndexOfDstInParsedOperands());
}
+
+ int BitOp3Idx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::bitop3);
+ if (BitOp3Idx != -1) {
+ OptionalImmIndexMap OptIdx;
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
+ if (Op.isImm())
+ OptIdx[Op.getImmTy()] = Operands.size() - 1;
+
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
+ }
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index ccc711a0bcc4e..26322a4de54fc 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -42,11 +42,13 @@ class GCNCreateVOPD {
class VOPDCombineInfo {
public:
VOPDCombineInfo() = default;
- VOPDCombineInfo(MachineInstr *First, MachineInstr *Second)
- : FirstMI(First), SecondMI(Second) {}
+ VOPDCombineInfo(MachineInstr *First, MachineInstr *Second,
+ bool VOPD3 = false)
+ : FirstMI(First), SecondMI(Second), IsVOPD3(VOPD3) {}
MachineInstr *FirstMI;
MachineInstr *SecondMI;
+ bool IsVOPD3;
};
public:
@@ -59,9 +61,9 @@ class GCNCreateVOPD {
unsigned Opc2 = SecondMI->getOpcode();
unsigned EncodingFamily =
AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
- int NewOpcode =
- AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
- AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
+ int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1, CI.IsVOPD3),
+ AMDGPU::getVOPDOpcode(Opc2, CI.IsVOPD3),
+ EncodingFamily, CI.IsVOPD3);
assert(NewOpcode != -1 &&
"Should have previously determined this as a possible VOPD\n");
@@ -79,12 +81,36 @@ class GCNCreateVOPD {
VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
}
+ const AMDGPU::OpName Mods[2][3] = {
+ {AMDGPU::OpName::src0X_modifiers, AMDGPU::OpName::vsrc1X_modifiers,
+ AMDGPU::OpName::vsrc2X_modifiers},
+ {AMDGPU::OpName::src0Y_modifiers, AMDGPU::OpName::vsrc1Y_modifiers,
+ AMDGPU::OpName::vsrc2Y_modifiers}};
+ const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers};
+ const unsigned VOPDOpc = VOPDInst->getOpcode();
+
for (auto CompIdx : VOPD::COMPONENTS) {
auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
+ bool IsVOP3 = SII->isVOP3(*MI[CompIdx]);
for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
- auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+ if (AMDGPU::hasNamedOperand(VOPDOpc, Mods[CompIdx][CompSrcIdx])) {
+ const MachineOperand *Mod =
+ SII->getNamedOperand(*MI[CompIdx], SrcMods[CompSrcIdx]);
+ VOPDInst.addImm(Mod ? Mod->getImm() : 0);
+ }
+ auto MCOprIdx =
+ InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx, IsVOP3);
VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
}
+ if (MI[CompIdx]->getOpcode() == AMDGPU::V_CNDMASK_B32_e32 && CI.IsVOPD3)
+ VOPDInst.addReg(AMDGPU::VCC_LO);
+ }
+
+ if (CI.IsVOPD3) {
+ if (unsigned BitOp2 = AMDGPU::getBitOp2(Opc2))
+ VOPDInst.addImm(BitOp2);
}
SII->fixImplicitOperands(*VOPDInst);
@@ -109,6 +135,8 @@ class GCNCreateVOPD {
const SIInstrInfo *SII = ST->getInstrInfo();
bool Changed = false;
+ unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(*ST);
+ bool HasVOPD3 = ST->hasVOPD3();
SmallVector<VOPDCombineInfo> ReplaceCandidates;
@@ -124,19 +152,26 @@ class GCNCreateVOPD {
auto *SecondMI = &*MII;
unsigned Opc = FirstMI->getOpcode();
unsigned Opc2 = SecondMI->getOpcode();
- llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
- llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
VOPDCombineInfo CI;
- if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
- CI = VOPDCombineInfo(FirstMI, SecondMI);
- else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
- CI = VOPDCombineInfo(SecondMI, FirstMI);
- else
- continue;
- // checkVOPDRegConstraints cares about program order, but doReplace
- // cares about X-Y order in the constituted VOPD
- if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
+ const auto checkVOPD = [&](bool VOPD3) -> bool {
+ llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD =
+ AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
+ llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD =
+ AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
+
+ if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
+ CI = VOPDCombineInfo(FirstMI, SecondMI, VOPD3);
+ else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
+ CI = VOPDCombineInfo(SecondMI, FirstMI, VOPD3);
+ else
+ return false;
+ // checkVOPDRegConstraints cares about program order, but doReplace
+ // cares about X-Y order in the constituted VOPD
+ return llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI, VOPD3);
+ };
+
+ if (checkVOPD(false) || (HasVOPD3 && checkVOPD(true))) {
ReplaceCandidates.push_back(CI);
++MII;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index fa1209db2fa07..80fd830d10aa4 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1478,6 +1478,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasGFX1250Insts() const { return GFX1250Insts; }
+ bool hasVOPD3() const { return GFX1250Insts; }
+
// \returns true if target has S_SETPRIO_INC_WG instruction.
bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; }
diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
index 33c208495c500..ae5db01f6e119 100644
--- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
@@ -36,11 +36,20 @@ using namespace llvm;
bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
const MachineInstr &FirstMI,
- const MachineInstr &SecondMI) {
+ const MachineInstr &SecondMI,
+ bool IsVOPD3) {
namespace VOPD = AMDGPU::VOPD;
const MachineFunction *MF = FirstMI.getMF();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+
+ if (IsVOPD3 && !ST.hasVOPD3())
+ return false;
+ if (!IsVOPD3 && (TII.isVOP3(FirstMI) || TII.isVOP3(SecondMI)))
+ return false;
+ if (TII.isDPP(FirstMI) || TII.isDPP(SecondMI))
+ return false;
+
const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
const MachineRegisterInfo &MRI = MF->getRegInfo();
// Literals also count against scalar bus limit
@@ -80,23 +89,61 @@ bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
for (auto CompIdx : VOPD::COMPONENTS) {
const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
- const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
+ const MachineOperand &Src0 = *TII.getNam...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
09ea436
to
63aae4b
Compare
Sorry for the size of the patch, but splitting it is very challenging for no apparent reason. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.