Skip to content

Commit 09ea436

Browse files
committed
[AMDGPU] VOPD/VOPD3 changes for gfx1250
1 parent d0a4af7 commit 09ea436

24 files changed

+66395
-228
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 186 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,22 @@ class AMDGPUOperand : public MCParsedAsmOperand {
335335
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
336336
}
337337

338+
bool isRegOrInlineImmWithFP64InputMods() const {
339+
return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
340+
}
341+
342+
bool isVRegWithInputMods(unsigned RCID) const {
343+
return isRegClass(RCID);
344+
}
345+
346+
bool isVRegWithFP32InputMods() const {
347+
return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
348+
}
349+
350+
bool isVRegWithFP64InputMods() const {
351+
return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
352+
}
353+
338354
bool isPackedFP16InputMods() const {
339355
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
340356
}
@@ -527,7 +543,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
527543
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
528544
}
529545

530-
bool isVCSrcB64() const {
546+
bool isVCSrc_b64() const {
531547
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
532548
}
533549

@@ -553,7 +569,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
553569
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
554570
}
555571

556-
bool isVCSrcF64() const {
572+
bool isVCSrc_f64() const {
557573
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
558574
}
559575

@@ -601,7 +617,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
601617
return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
602618
}
603619

604-
bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
620+
bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
605621

606622
bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
607623

@@ -617,23 +633,19 @@ class AMDGPUOperand : public MCParsedAsmOperand {
617633

618634
bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
619635

620-
bool isVCSrcV2FP32() const {
621-
return isVCSrcF64();
622-
}
636+
bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
623637

624638
bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
625639

626-
bool isVCSrcV2INT32() const {
627-
return isVCSrcB64();
628-
}
640+
bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
629641

630642
bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
631643

632644
bool isVSrc_f32() const {
633645
return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
634646
}
635647

636-
bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
648+
bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
637649

638650
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
639651

@@ -1527,6 +1539,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
15271539

15281540
bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
15291541

1542+
bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1543+
15301544
bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
15311545

15321546
bool isGFX10_BEncoding() const {
@@ -1774,8 +1788,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
17741788
bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
17751789
bool validateSOPLiteral(const MCInst &Inst) const;
17761790
bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1777-
bool validateVOPDRegBankConstraints(const MCInst &Inst,
1778-
const OperandVector &Operands);
1791+
std::optional<unsigned>
1792+
checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3);
1793+
bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1794+
bool tryVOPD(const MCInst &Inst);
1795+
bool tryVOPD3(const MCInst &Inst);
1796+
bool tryAnotherVOPDEncoding(const MCInst &Inst);
1797+
17791798
bool validateIntClampSupported(const MCInst &Inst);
17801799
bool validateMIMGAtomicDMask(const MCInst &Inst);
17811800
bool validateMIMGGatherDMask(const MCInst &Inst);
@@ -3505,6 +3524,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
35053524
}
35063525
}
35073526

3527+
// Asm can first try to match VOPD or VOPD3. By failing early here with
3528+
// Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3529+
// Checking later during validateInstruction does not give a chance to retry
3530+
// parsing as a different encoding.
3531+
if (tryAnotherVOPDEncoding(Inst))
3532+
return Match_InvalidOperand;
3533+
35083534
return Match_Success;
35093535
}
35103536

@@ -3685,8 +3711,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
36853711

36863712
return {getNamedOperandIdx(Opcode, OpName::src0X),
36873713
getNamedOperandIdx(Opcode, OpName::vsrc1X),
3714+
getNamedOperandIdx(Opcode, OpName::vsrc2X),
36883715
getNamedOperandIdx(Opcode, OpName::src0Y),
36893716
getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3717+
getNamedOperandIdx(Opcode, OpName::vsrc2Y),
36903718
ImmXIdx,
36913719
ImmIdx};
36923720
}
@@ -3816,12 +3844,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
38163844
return false;
38173845
}
38183846

3819-
bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3820-
const MCInst &Inst, const OperandVector &Operands) {
3847+
std::optional<unsigned> AMDGPUAsmParser::checkVOPDRegBankConstraints(
3848+
const MCInst &Inst, bool AsVOPD3) {
38213849

38223850
const unsigned Opcode = Inst.getOpcode();
38233851
if (!isVOPD(Opcode))
3824-
return true;
3852+
return {};
38253853

38263854
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
38273855

@@ -3833,23 +3861,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
38333861
};
38343862

38353863
// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3836-
bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3864+
bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3865+
Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3866+
Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3867+
bool AllowSameVGPR = isGFX1250();
3868+
3869+
if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3870+
for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3871+
int I = getNamedOperandIdx(Opcode, OpName);
3872+
const MCOperand &Op = Inst.getOperand(I);
3873+
if (!Op.isImm())
3874+
continue;
3875+
int64_t Imm = Op.getImm();
3876+
if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3877+
!AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3878+
return I;
3879+
}
3880+
3881+
for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y,
3882+
OpName::vsrc2X, OpName::vsrc2Y,
3883+
OpName::imm}) {
3884+
int I = getNamedOperandIdx(Opcode, OpName);
3885+
if (I == -1)
3886+
continue;
3887+
const MCOperand &Op = Inst.getOperand(I);
3888+
if (Op.isImm())
3889+
return I;
3890+
}
3891+
}
38373892

38383893
const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
38393894
auto InvalidCompOprIdx =
3840-
InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3841-
if (!InvalidCompOprIdx)
3895+
InstInfo.getInvalidCompOperandIndex(getVRegIdx, *TRI, SkipSrc,
3896+
AllowSameVGPR, AsVOPD3);
3897+
3898+
return InvalidCompOprIdx;
3899+
}
3900+
3901+
bool AMDGPUAsmParser::validateVOPD(
3902+
const MCInst &Inst, const OperandVector &Operands) {
3903+
3904+
unsigned Opcode = Inst.getOpcode();
3905+
bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3906+
3907+
if (AsVOPD3) {
3908+
for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
3909+
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3910+
if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3911+
(Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3912+
Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
3913+
}
3914+
}
3915+
3916+
auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3917+
if (!InvalidCompOprIdx.has_value())
38423918
return true;
38433919

38443920
auto CompOprIdx = *InvalidCompOprIdx;
3921+
const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
38453922
auto ParsedIdx =
38463923
std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
38473924
InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
38483925
assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
38493926

38503927
auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
38513928
if (CompOprIdx == VOPD::Component::DST) {
3852-
Error(Loc, "one dst register must be even and the other odd");
3929+
if (AsVOPD3)
3930+
Error(Loc, "dst registers must be distinct");
3931+
else
3932+
Error(Loc, "one dst register must be even and the other odd");
38533933
} else {
38543934
auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
38553935
Error(Loc, Twine("src") + Twine(CompSrcIdx) +
@@ -3859,6 +3939,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
38593939
return false;
38603940
}
38613941

3942+
// \returns true if \p Inst does not satisfy VOPD constraints, but can be
3943+
// potentially used as VOPD3 with the same operands.
3944+
bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
3945+
// First check if it fits VOPD
3946+
auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
3947+
if (!InvalidCompOprIdx.has_value())
3948+
return false;
3949+
3950+
// Then if it fits VOPD3
3951+
InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
3952+
if (InvalidCompOprIdx.has_value()) {
3953+
// If failed operand is dst it is better to show error about VOPD3
3954+
// instruction as it has more capabilities and error message will be
3955+
// more informative. If the dst is not legal for VOPD3, then it is not
3956+
// legal for VOPD either.
3957+
if (*InvalidCompOprIdx == VOPD::Component::DST)
3958+
return true;
3959+
3960+
// Otherwise prefer VOPD as we may find ourselves in an awkward situation
3961+
// with a conflict in tied implicit src2 of fmac and no asm operand to
3962+
// to point to.
3963+
return false;
3964+
}
3965+
return true;
3966+
}
3967+
3968+
// \returns true is a VOPD3 instruction can be also represented as a shorter
3969+
// VOPD encoding.
3970+
bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
3971+
const unsigned Opcode = Inst.getOpcode();
3972+
const auto &II = getVOPDInstInfo(Opcode, &MII);
3973+
unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
3974+
if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
3975+
!getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
3976+
return false;
3977+
3978+
// This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
3979+
// explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
3980+
// be parsed as VOPD which does not accept src2.
3981+
if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
3982+
II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
3983+
return false;
3984+
3985+
// If any modifiers are set this cannot be VOPD.
3986+
for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
3987+
OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
3988+
OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
3989+
int I = getNamedOperandIdx(Opcode, OpName);
3990+
if (I == -1)
3991+
continue;
3992+
if (Inst.getOperand(I).getImm())
3993+
return false;
3994+
}
3995+
3996+
return !tryVOPD3(Inst);
3997+
}
3998+
3999+
// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4000+
// form but switch to VOPD3 otherwise.
4001+
bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4002+
const unsigned Opcode = Inst.getOpcode();
4003+
if (!isGFX1250() || !isVOPD(Opcode))
4004+
return false;
4005+
4006+
if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4007+
return tryVOPD(Inst);
4008+
return tryVOPD3(Inst);
4009+
}
4010+
38624011
bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
38634012

38644013
const unsigned Opc = Inst.getOpcode();
@@ -5179,7 +5328,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
51795328
if (!validateConstantBusLimitations(Inst, Operands)) {
51805329
return false;
51815330
}
5182-
if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5331+
if (!validateVOPD(Inst, Operands)) {
51835332
return false;
51845333
}
51855334
if (!validateIntClampSupported(Inst)) {
@@ -9180,8 +9329,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
91809329

91819330
// Create VOPD MCInst operands using parsed assembler operands.
91829331
void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9332+
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9333+
91839334
auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
91849335
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9336+
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9337+
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9338+
return;
9339+
}
91859340
if (Op.isReg()) {
91869341
Op.addRegOperands(Inst, 1);
91879342
return;
@@ -9210,6 +9365,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
92109365
if (CInfo.hasSrc2Acc())
92119366
addOp(CInfo.getIndexOfDstInParsedOperands());
92129367
}
9368+
9369+
int BitOp3Idx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9370+
AMDGPU::OpName::bitop3);
9371+
if (BitOp3Idx != -1) {
9372+
OptionalImmIndexMap OptIdx;
9373+
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9374+
if (Op.isImm())
9375+
OptIdx[Op.getImmTy()] = Operands.size() - 1;
9376+
9377+
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9378+
}
92139379
}
92149380

92159381
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)