@@ -336,6 +336,20 @@ class AMDGPUOperand : public MCParsedAsmOperand {
336
336
return isRegOrInline (AMDGPU::VS_32RegClassID, MVT::f32 );
337
337
}
338
338
339
+ bool isRegOrInlineImmWithFP64InputMods () const {
340
+ return isRegOrInline (AMDGPU::VS_64RegClassID, MVT::f64 );
341
+ }
342
+
343
+ bool isVRegWithInputMods (unsigned RCID) const { return isRegClass (RCID); }
344
+
345
+ bool isVRegWithFP32InputMods () const {
346
+ return isVRegWithInputMods (AMDGPU::VGPR_32RegClassID);
347
+ }
348
+
349
+ bool isVRegWithFP64InputMods () const {
350
+ return isVRegWithInputMods (AMDGPU::VReg_64RegClassID);
351
+ }
352
+
339
353
bool isPackedFP16InputMods () const {
340
354
return isRegOrImmWithInputMods (AMDGPU::VS_32RegClassID, MVT::v2f16);
341
355
}
@@ -531,7 +545,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
531
545
return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::i32 );
532
546
}
533
547
534
- bool isVCSrcB64 () const {
548
+ bool isVCSrc_b64 () const {
535
549
return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::i64 );
536
550
}
537
551
@@ -557,7 +571,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
557
571
return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::f32 );
558
572
}
559
573
560
- bool isVCSrcF64 () const {
574
+ bool isVCSrc_f64 () const {
561
575
return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::f64 );
562
576
}
563
577
@@ -605,7 +619,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
605
619
return isVCSrc_f32 () || isLiteralImm (MVT::i32 ) || isExpr ();
606
620
}
607
621
608
- bool isVSrc_b64 () const { return isVCSrcF64 () || isLiteralImm (MVT::i64 ); }
622
+ bool isVSrc_b64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::i64 ); }
609
623
610
624
bool isVSrcT_b16 () const { return isVCSrcT_b16 () || isLiteralImm (MVT::i16 ); }
611
625
@@ -621,23 +635,19 @@ class AMDGPUOperand : public MCParsedAsmOperand {
621
635
622
636
bool isVSrc_v2b16 () const { return isVSrc_b16 () || isLiteralImm (MVT::v2i16); }
623
637
624
- bool isVCSrcV2FP32 () const {
625
- return isVCSrcF64 ();
626
- }
638
+ bool isVCSrcV2FP32 () const { return isVCSrc_f64 (); }
627
639
628
640
bool isVSrc_v2f32 () const { return isVSrc_f64 () || isLiteralImm (MVT::v2f32); }
629
641
630
- bool isVCSrcV2INT32 () const {
631
- return isVCSrcB64 ();
632
- }
642
+ bool isVCSrc_v2b32 () const { return isVCSrc_b64 (); }
633
643
634
644
bool isVSrc_v2b32 () const { return isVSrc_b64 () || isLiteralImm (MVT::v2i32); }
635
645
636
646
bool isVSrc_f32 () const {
637
647
return isVCSrc_f32 () || isLiteralImm (MVT::f32 ) || isExpr ();
638
648
}
639
649
640
- bool isVSrc_f64 () const { return isVCSrcF64 () || isLiteralImm (MVT::f64 ); }
650
+ bool isVSrc_f64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::f64 ); }
641
651
642
652
bool isVSrcT_bf16 () const { return isVCSrcTBF16 () || isLiteralImm (MVT::bf16 ); }
643
653
@@ -1531,6 +1541,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
1531
1541
1532
1542
bool isGFX12Plus () const { return AMDGPU::isGFX12Plus (getSTI ()); }
1533
1543
1544
+ bool isGFX1250 () const { return AMDGPU::isGFX1250 (getSTI ()); }
1545
+
1534
1546
bool isGFX10_AEncoding () const { return AMDGPU::isGFX10_AEncoding (getSTI ()); }
1535
1547
1536
1548
bool isGFX10_BEncoding () const {
@@ -1782,8 +1794,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
1782
1794
bool validateSMEMOffset (const MCInst &Inst, const OperandVector &Operands);
1783
1795
bool validateSOPLiteral (const MCInst &Inst) const ;
1784
1796
bool validateConstantBusLimitations (const MCInst &Inst, const OperandVector &Operands);
1785
- bool validateVOPDRegBankConstraints (const MCInst &Inst,
1786
- const OperandVector &Operands);
1797
+ std::optional<unsigned > checkVOPDRegBankConstraints (const MCInst &Inst,
1798
+ bool AsVOPD3);
1799
+ bool validateVOPD (const MCInst &Inst, const OperandVector &Operands);
1800
+ bool tryVOPD (const MCInst &Inst);
1801
+ bool tryVOPD3 (const MCInst &Inst);
1802
+ bool tryAnotherVOPDEncoding (const MCInst &Inst);
1803
+
1787
1804
bool validateIntClampSupported (const MCInst &Inst);
1788
1805
bool validateMIMGAtomicDMask (const MCInst &Inst);
1789
1806
bool validateMIMGGatherDMask (const MCInst &Inst);
@@ -3569,6 +3586,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3569
3586
}
3570
3587
}
3571
3588
3589
+ // Asm can first try to match VOPD or VOPD3. By failing early here with
3590
+ // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3591
+ // Checking later during validateInstruction does not give a chance to retry
3592
+ // parsing as a different encoding.
3593
+ if (tryAnotherVOPDEncoding (Inst))
3594
+ return Match_InvalidOperand;
3595
+
3572
3596
return Match_Success;
3573
3597
}
3574
3598
@@ -3749,8 +3773,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
3749
3773
3750
3774
return {getNamedOperandIdx (Opcode, OpName::src0X),
3751
3775
getNamedOperandIdx (Opcode, OpName::vsrc1X),
3776
+ getNamedOperandIdx (Opcode, OpName::vsrc2X),
3752
3777
getNamedOperandIdx (Opcode, OpName::src0Y),
3753
3778
getNamedOperandIdx (Opcode, OpName::vsrc1Y),
3779
+ getNamedOperandIdx (Opcode, OpName::vsrc2Y),
3754
3780
ImmXIdx,
3755
3781
ImmIdx};
3756
3782
}
@@ -3880,12 +3906,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
3880
3906
return false ;
3881
3907
}
3882
3908
3883
- bool AMDGPUAsmParser::validateVOPDRegBankConstraints (
3884
- const MCInst &Inst, const OperandVector &Operands ) {
3909
+ std::optional< unsigned >
3910
+ AMDGPUAsmParser::checkVOPDRegBankConstraints ( const MCInst &Inst, bool AsVOPD3 ) {
3885
3911
3886
3912
const unsigned Opcode = Inst.getOpcode ();
3887
3913
if (!isVOPD (Opcode))
3888
- return true ;
3914
+ return {} ;
3889
3915
3890
3916
const MCRegisterInfo *TRI = getContext ().getRegisterInfo ();
3891
3917
@@ -3896,24 +3922,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3896
3922
: MCRegister ();
3897
3923
};
3898
3924
3899
- // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3900
- bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3925
+ // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3926
+ // source-cache.
3927
+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3928
+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3929
+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3930
+ bool AllowSameVGPR = isGFX1250 ();
3931
+
3932
+ if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3933
+ for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3934
+ int I = getNamedOperandIdx (Opcode, OpName);
3935
+ const MCOperand &Op = Inst.getOperand (I);
3936
+ if (!Op.isImm ())
3937
+ continue ;
3938
+ int64_t Imm = Op.getImm ();
3939
+ if (!AMDGPU::isInlinableLiteral32 (Imm, hasInv2PiInlineImm ()) &&
3940
+ !AMDGPU::isInlinableLiteral64 (Imm, hasInv2PiInlineImm ()))
3941
+ return (unsigned )I;
3942
+ }
3943
+
3944
+ for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3945
+ OpName::vsrc2Y, OpName::imm}) {
3946
+ int I = getNamedOperandIdx (Opcode, OpName);
3947
+ if (I == -1 )
3948
+ continue ;
3949
+ const MCOperand &Op = Inst.getOperand (I);
3950
+ if (Op.isImm ())
3951
+ return (unsigned )I;
3952
+ }
3953
+ }
3901
3954
3902
3955
const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
3903
- auto InvalidCompOprIdx =
3904
- InstInfo.getInvalidCompOperandIndex (getVRegIdx, SkipSrc);
3905
- if (!InvalidCompOprIdx)
3956
+ auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex (
3957
+ getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3958
+
3959
+ return InvalidCompOprIdx;
3960
+ }
3961
+
3962
+ bool AMDGPUAsmParser::validateVOPD (const MCInst &Inst,
3963
+ const OperandVector &Operands) {
3964
+
3965
+ unsigned Opcode = Inst.getOpcode ();
3966
+ bool AsVOPD3 = MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3;
3967
+
3968
+ if (AsVOPD3) {
3969
+ for (unsigned I = 0 , E = Operands.size (); I != E; ++I) {
3970
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3971
+ if ((Op.isRegKind () || Op.isImmTy (AMDGPUOperand::ImmTyNone)) &&
3972
+ (Op.getModifiers ().getFPModifiersOperand () & SISrcMods::ABS))
3973
+ Error (Op.getStartLoc (), " ABS not allowed in VOPD3 instructions" );
3974
+ }
3975
+ }
3976
+
3977
+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, AsVOPD3);
3978
+ if (!InvalidCompOprIdx.has_value ())
3906
3979
return true ;
3907
3980
3908
3981
auto CompOprIdx = *InvalidCompOprIdx;
3982
+ const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
3909
3983
auto ParsedIdx =
3910
3984
std::max (InstInfo[VOPD::X].getIndexInParsedOperands (CompOprIdx),
3911
3985
InstInfo[VOPD::Y].getIndexInParsedOperands (CompOprIdx));
3912
3986
assert (ParsedIdx > 0 && ParsedIdx < Operands.size ());
3913
3987
3914
3988
auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc ();
3915
3989
if (CompOprIdx == VOPD::Component::DST) {
3916
- Error (Loc, " one dst register must be even and the other odd" );
3990
+ if (AsVOPD3)
3991
+ Error (Loc, " dst registers must be distinct" );
3992
+ else
3993
+ Error (Loc, " one dst register must be even and the other odd" );
3917
3994
} else {
3918
3995
auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3919
3996
Error (Loc, Twine (" src" ) + Twine (CompSrcIdx) +
@@ -3923,6 +4000,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3923
4000
return false ;
3924
4001
}
3925
4002
4003
+ // \returns true if \p Inst does not satisfy VOPD constraints, but can be
4004
+ // potentially used as VOPD3 with the same operands.
4005
+ bool AMDGPUAsmParser::tryVOPD3 (const MCInst &Inst) {
4006
+ // First check if it fits VOPD
4007
+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, false );
4008
+ if (!InvalidCompOprIdx.has_value ())
4009
+ return false ;
4010
+
4011
+ // Then if it fits VOPD3
4012
+ InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, true );
4013
+ if (InvalidCompOprIdx.has_value ()) {
4014
+ // If failed operand is dst it is better to show error about VOPD3
4015
+ // instruction as it has more capabilities and error message will be
4016
+ // more informative. If the dst is not legal for VOPD3, then it is not
4017
+ // legal for VOPD either.
4018
+ if (*InvalidCompOprIdx == VOPD::Component::DST)
4019
+ return true ;
4020
+
4021
+ // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4022
+ // with a conflict in tied implicit src2 of fmac and no asm operand to
4023
+ // to point to.
4024
+ return false ;
4025
+ }
4026
+ return true ;
4027
+ }
4028
+
4029
+ // \returns true is a VOPD3 instruction can be also represented as a shorter
4030
+ // VOPD encoding.
4031
+ bool AMDGPUAsmParser::tryVOPD (const MCInst &Inst) {
4032
+ const unsigned Opcode = Inst.getOpcode ();
4033
+ const auto &II = getVOPDInstInfo (Opcode, &MII);
4034
+ unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily (getSTI ());
4035
+ if (!getCanBeVOPD (II[VOPD::X].getOpcode (), EncodingFamily, false ).X ||
4036
+ !getCanBeVOPD (II[VOPD::Y].getOpcode (), EncodingFamily, false ).Y )
4037
+ return false ;
4038
+
4039
+ // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4040
+ // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4041
+ // be parsed as VOPD which does not accept src2.
4042
+ if (II[VOPD::X].getOpcode () == AMDGPU::V_CNDMASK_B32_e32 ||
4043
+ II[VOPD::Y].getOpcode () == AMDGPU::V_CNDMASK_B32_e32)
4044
+ return false ;
4045
+
4046
+ // If any modifiers are set this cannot be VOPD.
4047
+ for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4048
+ OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4049
+ OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4050
+ int I = getNamedOperandIdx (Opcode, OpName);
4051
+ if (I == -1 )
4052
+ continue ;
4053
+ if (Inst.getOperand (I).getImm ())
4054
+ return false ;
4055
+ }
4056
+
4057
+ return !tryVOPD3 (Inst);
4058
+ }
4059
+
4060
+ // VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4061
+ // form but switch to VOPD3 otherwise.
4062
+ bool AMDGPUAsmParser::tryAnotherVOPDEncoding (const MCInst &Inst) {
4063
+ const unsigned Opcode = Inst.getOpcode ();
4064
+ if (!isGFX1250 () || !isVOPD (Opcode))
4065
+ return false ;
4066
+
4067
+ if (MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3)
4068
+ return tryVOPD (Inst);
4069
+ return tryVOPD3 (Inst);
4070
+ }
4071
+
3926
4072
bool AMDGPUAsmParser::validateIntClampSupported (const MCInst &Inst) {
3927
4073
3928
4074
const unsigned Opc = Inst.getOpcode ();
@@ -5243,7 +5389,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5243
5389
if (!validateConstantBusLimitations (Inst, Operands)) {
5244
5390
return false ;
5245
5391
}
5246
- if (!validateVOPDRegBankConstraints (Inst, Operands)) {
5392
+ if (!validateVOPD (Inst, Operands)) {
5247
5393
return false ;
5248
5394
}
5249
5395
if (!validateIntClampSupported (Inst)) {
@@ -9244,8 +9390,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9244
9390
9245
9391
// Create VOPD MCInst operands using parsed assembler operands.
9246
9392
void AMDGPUAsmParser::cvtVOPD (MCInst &Inst, const OperandVector &Operands) {
9393
+ const MCInstrDesc &Desc = MII.get (Inst.getOpcode ());
9394
+
9247
9395
auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9248
9396
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9397
+ if (isRegOrImmWithInputMods (Desc, Inst.getNumOperands ())) {
9398
+ Op.addRegOrImmWithFPInputModsOperands (Inst, 2 );
9399
+ return ;
9400
+ }
9249
9401
if (Op.isReg ()) {
9250
9402
Op.addRegOperands (Inst, 1 );
9251
9403
return ;
@@ -9274,6 +9426,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9274
9426
if (CInfo.hasSrc2Acc ())
9275
9427
addOp (CInfo.getIndexOfDstInParsedOperands ());
9276
9428
}
9429
+
9430
+ int BitOp3Idx =
9431
+ AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::bitop3);
9432
+ if (BitOp3Idx != -1 ) {
9433
+ OptionalImmIndexMap OptIdx;
9434
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back ());
9435
+ if (Op.isImm ())
9436
+ OptIdx[Op.getImmTy ()] = Operands.size () - 1 ;
9437
+
9438
+ addOptionalImmOperand (Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9439
+ }
9277
9440
}
9278
9441
9279
9442
// ===----------------------------------------------------------------------===//
0 commit comments