Skip to content

Commit 7920dff

Browse files
authored
[AMDGPU] VOPD/VOPD3 changes for gfx1250 (#147602)
1 parent 88ba06d commit 7920dff

24 files changed

+50945
-231
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 185 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,20 @@ class AMDGPUOperand : public MCParsedAsmOperand {
336336
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
337337
}
338338

339+
bool isRegOrInlineImmWithFP64InputMods() const {
340+
return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
341+
}
342+
343+
bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
344+
345+
bool isVRegWithFP32InputMods() const {
346+
return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
347+
}
348+
349+
bool isVRegWithFP64InputMods() const {
350+
return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
351+
}
352+
339353
bool isPackedFP16InputMods() const {
340354
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
341355
}
@@ -531,7 +545,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
531545
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
532546
}
533547

534-
bool isVCSrcB64() const {
548+
bool isVCSrc_b64() const {
535549
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
536550
}
537551

@@ -557,7 +571,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
557571
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
558572
}
559573

560-
bool isVCSrcF64() const {
574+
bool isVCSrc_f64() const {
561575
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
562576
}
563577

@@ -605,7 +619,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
605619
return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
606620
}
607621

608-
bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
622+
bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
609623

610624
bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
611625

@@ -621,23 +635,19 @@ class AMDGPUOperand : public MCParsedAsmOperand {
621635

622636
bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
623637

624-
bool isVCSrcV2FP32() const {
625-
return isVCSrcF64();
626-
}
638+
bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
627639

628640
bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
629641

630-
bool isVCSrcV2INT32() const {
631-
return isVCSrcB64();
632-
}
642+
bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
633643

634644
bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
635645

636646
bool isVSrc_f32() const {
637647
return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
638648
}
639649

640-
bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
650+
bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
641651

642652
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
643653

@@ -1531,6 +1541,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
15311541

15321542
bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
15331543

1544+
bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1545+
15341546
bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
15351547

15361548
bool isGFX10_BEncoding() const {
@@ -1782,8 +1794,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
17821794
bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
17831795
bool validateSOPLiteral(const MCInst &Inst) const;
17841796
bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1785-
bool validateVOPDRegBankConstraints(const MCInst &Inst,
1786-
const OperandVector &Operands);
1797+
std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1798+
bool AsVOPD3);
1799+
bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1800+
bool tryVOPD(const MCInst &Inst);
1801+
bool tryVOPD3(const MCInst &Inst);
1802+
bool tryAnotherVOPDEncoding(const MCInst &Inst);
1803+
17871804
bool validateIntClampSupported(const MCInst &Inst);
17881805
bool validateMIMGAtomicDMask(const MCInst &Inst);
17891806
bool validateMIMGGatherDMask(const MCInst &Inst);
@@ -3569,6 +3586,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
35693586
}
35703587
}
35713588

3589+
// Asm can first try to match VOPD or VOPD3. By failing early here with
3590+
// Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3591+
// Checking later during validateInstruction does not give a chance to retry
3592+
// parsing as a different encoding.
3593+
if (tryAnotherVOPDEncoding(Inst))
3594+
return Match_InvalidOperand;
3595+
35723596
return Match_Success;
35733597
}
35743598

@@ -3749,8 +3773,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
37493773

37503774
return {getNamedOperandIdx(Opcode, OpName::src0X),
37513775
getNamedOperandIdx(Opcode, OpName::vsrc1X),
3776+
getNamedOperandIdx(Opcode, OpName::vsrc2X),
37523777
getNamedOperandIdx(Opcode, OpName::src0Y),
37533778
getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3779+
getNamedOperandIdx(Opcode, OpName::vsrc2Y),
37543780
ImmXIdx,
37553781
ImmIdx};
37563782
}
@@ -3880,12 +3906,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
38803906
return false;
38813907
}
38823908

3883-
bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3884-
const MCInst &Inst, const OperandVector &Operands) {
3909+
std::optional<unsigned>
3910+
AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
38853911

38863912
const unsigned Opcode = Inst.getOpcode();
38873913
if (!isVOPD(Opcode))
3888-
return true;
3914+
return {};
38893915

38903916
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
38913917

@@ -3896,24 +3922,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
38963922
: MCRegister();
38973923
};
38983924

3899-
// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3900-
bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3925+
// On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3926+
// source-cache.
3927+
bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3928+
Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3929+
Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3930+
bool AllowSameVGPR = isGFX1250();
3931+
3932+
if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3933+
for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3934+
int I = getNamedOperandIdx(Opcode, OpName);
3935+
const MCOperand &Op = Inst.getOperand(I);
3936+
if (!Op.isImm())
3937+
continue;
3938+
int64_t Imm = Op.getImm();
3939+
if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3940+
!AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3941+
return (unsigned)I;
3942+
}
3943+
3944+
for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3945+
OpName::vsrc2Y, OpName::imm}) {
3946+
int I = getNamedOperandIdx(Opcode, OpName);
3947+
if (I == -1)
3948+
continue;
3949+
const MCOperand &Op = Inst.getOperand(I);
3950+
if (Op.isImm())
3951+
return (unsigned)I;
3952+
}
3953+
}
39013954

39023955
const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3903-
auto InvalidCompOprIdx =
3904-
InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3905-
if (!InvalidCompOprIdx)
3956+
auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3957+
getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3958+
3959+
return InvalidCompOprIdx;
3960+
}
3961+
3962+
bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3963+
const OperandVector &Operands) {
3964+
3965+
unsigned Opcode = Inst.getOpcode();
3966+
bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3967+
3968+
if (AsVOPD3) {
3969+
for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
3970+
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3971+
if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3972+
(Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3973+
Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
3974+
}
3975+
}
3976+
3977+
auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3978+
if (!InvalidCompOprIdx.has_value())
39063979
return true;
39073980

39083981
auto CompOprIdx = *InvalidCompOprIdx;
3982+
const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
39093983
auto ParsedIdx =
39103984
std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
39113985
InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
39123986
assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
39133987

39143988
auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
39153989
if (CompOprIdx == VOPD::Component::DST) {
3916-
Error(Loc, "one dst register must be even and the other odd");
3990+
if (AsVOPD3)
3991+
Error(Loc, "dst registers must be distinct");
3992+
else
3993+
Error(Loc, "one dst register must be even and the other odd");
39173994
} else {
39183995
auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
39193996
Error(Loc, Twine("src") + Twine(CompSrcIdx) +
@@ -3923,6 +4000,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
39234000
return false;
39244001
}
39254002

4003+
// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4004+
// potentially used as VOPD3 with the same operands.
4005+
bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4006+
// First check if it fits VOPD
4007+
auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4008+
if (!InvalidCompOprIdx.has_value())
4009+
return false;
4010+
4011+
// Then if it fits VOPD3
4012+
InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4013+
if (InvalidCompOprIdx.has_value()) {
4014+
// If failed operand is dst it is better to show error about VOPD3
4015+
// instruction as it has more capabilities and error message will be
4016+
// more informative. If the dst is not legal for VOPD3, then it is not
4017+
// legal for VOPD either.
4018+
if (*InvalidCompOprIdx == VOPD::Component::DST)
4019+
return true;
4020+
4021+
// Otherwise prefer VOPD as we may find ourselves in an awkward situation
4022+
// with a conflict in tied implicit src2 of fmac and no asm operand to
4023+
// to point to.
4024+
return false;
4025+
}
4026+
return true;
4027+
}
4028+
4029+
// \returns true is a VOPD3 instruction can be also represented as a shorter
4030+
// VOPD encoding.
4031+
bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4032+
const unsigned Opcode = Inst.getOpcode();
4033+
const auto &II = getVOPDInstInfo(Opcode, &MII);
4034+
unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4035+
if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4036+
!getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4037+
return false;
4038+
4039+
// This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4040+
// explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4041+
// be parsed as VOPD which does not accept src2.
4042+
if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4043+
II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4044+
return false;
4045+
4046+
// If any modifiers are set this cannot be VOPD.
4047+
for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4048+
OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4049+
OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4050+
int I = getNamedOperandIdx(Opcode, OpName);
4051+
if (I == -1)
4052+
continue;
4053+
if (Inst.getOperand(I).getImm())
4054+
return false;
4055+
}
4056+
4057+
return !tryVOPD3(Inst);
4058+
}
4059+
4060+
// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4061+
// form but switch to VOPD3 otherwise.
4062+
bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4063+
const unsigned Opcode = Inst.getOpcode();
4064+
if (!isGFX1250() || !isVOPD(Opcode))
4065+
return false;
4066+
4067+
if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4068+
return tryVOPD(Inst);
4069+
return tryVOPD3(Inst);
4070+
}
4071+
39264072
bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
39274073

39284074
const unsigned Opc = Inst.getOpcode();
@@ -5243,7 +5389,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
52435389
if (!validateConstantBusLimitations(Inst, Operands)) {
52445390
return false;
52455391
}
5246-
if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5392+
if (!validateVOPD(Inst, Operands)) {
52475393
return false;
52485394
}
52495395
if (!validateIntClampSupported(Inst)) {
@@ -9244,8 +9390,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
92449390

92459391
// Create VOPD MCInst operands using parsed assembler operands.
92469392
void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9393+
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9394+
92479395
auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
92489396
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9397+
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9398+
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9399+
return;
9400+
}
92499401
if (Op.isReg()) {
92509402
Op.addRegOperands(Inst, 1);
92519403
return;
@@ -9274,6 +9426,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
92749426
if (CInfo.hasSrc2Acc())
92759427
addOp(CInfo.getIndexOfDstInParsedOperands());
92769428
}
9429+
9430+
int BitOp3Idx =
9431+
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9432+
if (BitOp3Idx != -1) {
9433+
OptionalImmIndexMap OptIdx;
9434+
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9435+
if (Op.isImm())
9436+
OptIdx[Op.getImmTy()] = Operands.size() - 1;
9437+
9438+
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9439+
}
92779440
}
92789441

92799442
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)