@@ -335,6 +335,20 @@ class AMDGPUOperand : public MCParsedAsmOperand {
335
335
return isRegOrInline (AMDGPU::VS_32RegClassID, MVT::f32 );
336
336
}
337
337
338
+ bool isRegOrInlineImmWithFP64InputMods () const {
339
+ return isRegOrInline (AMDGPU::VS_64RegClassID, MVT::f64 );
340
+ }
341
+
342
+ bool isVRegWithInputMods (unsigned RCID) const { return isRegClass (RCID); }
343
+
344
+ bool isVRegWithFP32InputMods () const {
345
+ return isVRegWithInputMods (AMDGPU::VGPR_32RegClassID);
346
+ }
347
+
348
+ bool isVRegWithFP64InputMods () const {
349
+ return isVRegWithInputMods (AMDGPU::VReg_64RegClassID);
350
+ }
351
+
338
352
bool isPackedFP16InputMods () const {
339
353
return isRegOrImmWithInputMods (AMDGPU::VS_32RegClassID, MVT::v2f16);
340
354
}
@@ -527,7 +541,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
527
541
return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::i32 );
528
542
}
529
543
530
- bool isVCSrcB64 () const {
544
+ bool isVCSrc_b64 () const {
531
545
return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::i64 );
532
546
}
533
547
@@ -553,7 +567,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
553
567
return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::f32 );
554
568
}
555
569
556
- bool isVCSrcF64 () const {
570
+ bool isVCSrc_f64 () const {
557
571
return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::f64 );
558
572
}
559
573
@@ -601,7 +615,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
601
615
return isVCSrc_f32 () || isLiteralImm (MVT::i32 ) || isExpr ();
602
616
}
603
617
604
- bool isVSrc_b64 () const { return isVCSrcF64 () || isLiteralImm (MVT::i64 ); }
618
+ bool isVSrc_b64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::i64 ); }
605
619
606
620
bool isVSrcT_b16 () const { return isVCSrcT_b16 () || isLiteralImm (MVT::i16 ); }
607
621
@@ -617,23 +631,19 @@ class AMDGPUOperand : public MCParsedAsmOperand {
617
631
618
632
bool isVSrc_v2b16 () const { return isVSrc_b16 () || isLiteralImm (MVT::v2i16); }
619
633
620
- bool isVCSrcV2FP32 () const {
621
- return isVCSrcF64 ();
622
- }
634
+ bool isVCSrcV2FP32 () const { return isVCSrc_f64 (); }
623
635
624
636
bool isVSrc_v2f32 () const { return isVSrc_f64 () || isLiteralImm (MVT::v2f32); }
625
637
626
- bool isVCSrcV2INT32 () const {
627
- return isVCSrcB64 ();
628
- }
638
+ bool isVCSrc_v2b32 () const { return isVCSrc_b64 (); }
629
639
630
640
bool isVSrc_v2b32 () const { return isVSrc_b64 () || isLiteralImm (MVT::v2i32); }
631
641
632
642
bool isVSrc_f32 () const {
633
643
return isVCSrc_f32 () || isLiteralImm (MVT::f32 ) || isExpr ();
634
644
}
635
645
636
- bool isVSrc_f64 () const { return isVCSrcF64 () || isLiteralImm (MVT::f64 ); }
646
+ bool isVSrc_f64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::f64 ); }
637
647
638
648
bool isVSrcT_bf16 () const { return isVCSrcTBF16 () || isLiteralImm (MVT::bf16 ); }
639
649
@@ -1527,6 +1537,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
1527
1537
1528
1538
bool isGFX12Plus () const { return AMDGPU::isGFX12Plus (getSTI ()); }
1529
1539
1540
+ bool isGFX1250 () const { return AMDGPU::isGFX1250 (getSTI ()); }
1541
+
1530
1542
bool isGFX10_AEncoding () const { return AMDGPU::isGFX10_AEncoding (getSTI ()); }
1531
1543
1532
1544
bool isGFX10_BEncoding () const {
@@ -1774,8 +1786,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
1774
1786
bool validateSMEMOffset (const MCInst &Inst, const OperandVector &Operands);
1775
1787
bool validateSOPLiteral (const MCInst &Inst) const ;
1776
1788
bool validateConstantBusLimitations (const MCInst &Inst, const OperandVector &Operands);
1777
- bool validateVOPDRegBankConstraints (const MCInst &Inst,
1778
- const OperandVector &Operands);
1789
+ std::optional<unsigned > checkVOPDRegBankConstraints (const MCInst &Inst,
1790
+ bool AsVOPD3);
1791
+ bool validateVOPD (const MCInst &Inst, const OperandVector &Operands);
1792
+ bool tryVOPD (const MCInst &Inst);
1793
+ bool tryVOPD3 (const MCInst &Inst);
1794
+ bool tryAnotherVOPDEncoding (const MCInst &Inst);
1795
+
1779
1796
bool validateIntClampSupported (const MCInst &Inst);
1780
1797
bool validateMIMGAtomicDMask (const MCInst &Inst);
1781
1798
bool validateMIMGGatherDMask (const MCInst &Inst);
@@ -3505,6 +3522,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3505
3522
}
3506
3523
}
3507
3524
3525
+ // Asm can first try to match VOPD or VOPD3. By failing early here with
3526
+ // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3527
+ // Checking later during validateInstruction does not give a chance to retry
3528
+ // parsing as a different encoding.
3529
+ if (tryAnotherVOPDEncoding (Inst))
3530
+ return Match_InvalidOperand;
3531
+
3508
3532
return Match_Success;
3509
3533
}
3510
3534
@@ -3685,8 +3709,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
3685
3709
3686
3710
return {getNamedOperandIdx (Opcode, OpName::src0X),
3687
3711
getNamedOperandIdx (Opcode, OpName::vsrc1X),
3712
+ getNamedOperandIdx (Opcode, OpName::vsrc2X),
3688
3713
getNamedOperandIdx (Opcode, OpName::src0Y),
3689
3714
getNamedOperandIdx (Opcode, OpName::vsrc1Y),
3715
+ getNamedOperandIdx (Opcode, OpName::vsrc2Y),
3690
3716
ImmXIdx,
3691
3717
ImmIdx};
3692
3718
}
@@ -3816,12 +3842,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
3816
3842
return false ;
3817
3843
}
3818
3844
3819
- bool AMDGPUAsmParser::validateVOPDRegBankConstraints (
3820
- const MCInst &Inst, const OperandVector &Operands ) {
3845
+ std::optional< unsigned >
3846
+ AMDGPUAsmParser::checkVOPDRegBankConstraints ( const MCInst &Inst, bool AsVOPD3 ) {
3821
3847
3822
3848
const unsigned Opcode = Inst.getOpcode ();
3823
3849
if (!isVOPD (Opcode))
3824
- return true ;
3850
+ return {} ;
3825
3851
3826
3852
const MCRegisterInfo *TRI = getContext ().getRegisterInfo ();
3827
3853
@@ -3833,23 +3859,73 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3833
3859
};
3834
3860
3835
3861
// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3836
- bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3862
+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3863
+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3864
+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3865
+ bool AllowSameVGPR = isGFX1250 ();
3866
+
3867
+ if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3868
+ for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3869
+ int I = getNamedOperandIdx (Opcode, OpName);
3870
+ const MCOperand &Op = Inst.getOperand (I);
3871
+ if (!Op.isImm ())
3872
+ continue ;
3873
+ int64_t Imm = Op.getImm ();
3874
+ if (!AMDGPU::isInlinableLiteral32 (Imm, hasInv2PiInlineImm ()) &&
3875
+ !AMDGPU::isInlinableLiteral64 (Imm, hasInv2PiInlineImm ()))
3876
+ return I;
3877
+ }
3878
+
3879
+ for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3880
+ OpName::vsrc2Y, OpName::imm}) {
3881
+ int I = getNamedOperandIdx (Opcode, OpName);
3882
+ if (I == -1 )
3883
+ continue ;
3884
+ const MCOperand &Op = Inst.getOperand (I);
3885
+ if (Op.isImm ())
3886
+ return I;
3887
+ }
3888
+ }
3837
3889
3838
3890
const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
3839
- auto InvalidCompOprIdx =
3840
- InstInfo.getInvalidCompOperandIndex (getVRegIdx, SkipSrc);
3841
- if (!InvalidCompOprIdx)
3891
+ auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex (
3892
+ getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3893
+
3894
+ return InvalidCompOprIdx;
3895
+ }
3896
+
3897
+ bool AMDGPUAsmParser::validateVOPD (const MCInst &Inst,
3898
+ const OperandVector &Operands) {
3899
+
3900
+ unsigned Opcode = Inst.getOpcode ();
3901
+ bool AsVOPD3 = MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3;
3902
+
3903
+ if (AsVOPD3) {
3904
+ for (unsigned I = 0 , E = Operands.size (); I != E; ++I) {
3905
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3906
+ if ((Op.isRegKind () || Op.isImmTy (AMDGPUOperand::ImmTyNone)) &&
3907
+ (Op.getModifiers ().getFPModifiersOperand () & SISrcMods::ABS))
3908
+ Error (Op.getStartLoc (), " ABS not allowed in VOPD3 instructions" );
3909
+ }
3910
+ }
3911
+
3912
+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, AsVOPD3);
3913
+ if (!InvalidCompOprIdx.has_value ())
3842
3914
return true ;
3843
3915
3844
3916
auto CompOprIdx = *InvalidCompOprIdx;
3917
+ const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
3845
3918
auto ParsedIdx =
3846
3919
std::max (InstInfo[VOPD::X].getIndexInParsedOperands (CompOprIdx),
3847
3920
InstInfo[VOPD::Y].getIndexInParsedOperands (CompOprIdx));
3848
3921
assert (ParsedIdx > 0 && ParsedIdx < Operands.size ());
3849
3922
3850
3923
auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc ();
3851
3924
if (CompOprIdx == VOPD::Component::DST) {
3852
- Error (Loc, " one dst register must be even and the other odd" );
3925
+ if (AsVOPD3)
3926
+ Error (Loc, " dst registers must be distinct" );
3927
+ else
3928
+ Error (Loc, " one dst register must be even and the other odd" );
3853
3929
} else {
3854
3930
auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3855
3931
Error (Loc, Twine (" src" ) + Twine (CompSrcIdx) +
@@ -3859,6 +3935,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3859
3935
return false ;
3860
3936
}
3861
3937
3938
+ // \returns true if \p Inst does not satisfy VOPD constraints, but can be
3939
+ // potentially used as VOPD3 with the same operands.
3940
+ bool AMDGPUAsmParser::tryVOPD3 (const MCInst &Inst) {
3941
+ // First check if it fits VOPD
3942
+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, false );
3943
+ if (!InvalidCompOprIdx.has_value ())
3944
+ return false ;
3945
+
3946
+ // Then if it fits VOPD3
3947
+ InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, true );
3948
+ if (InvalidCompOprIdx.has_value ()) {
3949
+ // If failed operand is dst it is better to show error about VOPD3
3950
+ // instruction as it has more capabilities and error message will be
3951
+ // more informative. If the dst is not legal for VOPD3, then it is not
3952
+ // legal for VOPD either.
3953
+ if (*InvalidCompOprIdx == VOPD::Component::DST)
3954
+ return true ;
3955
+
3956
+ // Otherwise prefer VOPD as we may find ourselves in an awkward situation
3957
+ // with a conflict in tied implicit src2 of fmac and no asm operand to
3958
+ // to point to.
3959
+ return false ;
3960
+ }
3961
+ return true ;
3962
+ }
3963
+
3964
+ // \returns true is a VOPD3 instruction can be also represented as a shorter
3965
+ // VOPD encoding.
3966
+ bool AMDGPUAsmParser::tryVOPD (const MCInst &Inst) {
3967
+ const unsigned Opcode = Inst.getOpcode ();
3968
+ const auto &II = getVOPDInstInfo (Opcode, &MII);
3969
+ unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily (getSTI ());
3970
+ if (!getCanBeVOPD (II[VOPD::X].getOpcode (), EncodingFamily, false ).X ||
3971
+ !getCanBeVOPD (II[VOPD::Y].getOpcode (), EncodingFamily, false ).Y )
3972
+ return false ;
3973
+
3974
+ // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
3975
+ // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
3976
+ // be parsed as VOPD which does not accept src2.
3977
+ if (II[VOPD::X].getOpcode () == AMDGPU::V_CNDMASK_B32_e32 ||
3978
+ II[VOPD::Y].getOpcode () == AMDGPU::V_CNDMASK_B32_e32)
3979
+ return false ;
3980
+
3981
+ // If any modifiers are set this cannot be VOPD.
3982
+ for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
3983
+ OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
3984
+ OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
3985
+ int I = getNamedOperandIdx (Opcode, OpName);
3986
+ if (I == -1 )
3987
+ continue ;
3988
+ if (Inst.getOperand (I).getImm ())
3989
+ return false ;
3990
+ }
3991
+
3992
+ return !tryVOPD3 (Inst);
3993
+ }
3994
+
3995
+ // VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
3996
+ // form but switch to VOPD3 otherwise.
3997
+ bool AMDGPUAsmParser::tryAnotherVOPDEncoding (const MCInst &Inst) {
3998
+ const unsigned Opcode = Inst.getOpcode ();
3999
+ if (!isGFX1250 () || !isVOPD (Opcode))
4000
+ return false ;
4001
+
4002
+ if (MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3)
4003
+ return tryVOPD (Inst);
4004
+ return tryVOPD3 (Inst);
4005
+ }
4006
+
3862
4007
bool AMDGPUAsmParser::validateIntClampSupported (const MCInst &Inst) {
3863
4008
3864
4009
const unsigned Opc = Inst.getOpcode ();
@@ -5179,7 +5324,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5179
5324
if (!validateConstantBusLimitations (Inst, Operands)) {
5180
5325
return false ;
5181
5326
}
5182
- if (!validateVOPDRegBankConstraints (Inst, Operands)) {
5327
+ if (!validateVOPD (Inst, Operands)) {
5183
5328
return false ;
5184
5329
}
5185
5330
if (!validateIntClampSupported (Inst)) {
@@ -9180,8 +9325,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9180
9325
9181
9326
// Create VOPD MCInst operands using parsed assembler operands.
9182
9327
void AMDGPUAsmParser::cvtVOPD (MCInst &Inst, const OperandVector &Operands) {
9328
+ const MCInstrDesc &Desc = MII.get (Inst.getOpcode ());
9329
+
9183
9330
auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9184
9331
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9332
+ if (isRegOrImmWithInputMods (Desc, Inst.getNumOperands ())) {
9333
+ Op.addRegOrImmWithFPInputModsOperands (Inst, 2 );
9334
+ return ;
9335
+ }
9185
9336
if (Op.isReg ()) {
9186
9337
Op.addRegOperands (Inst, 1 );
9187
9338
return ;
@@ -9210,6 +9361,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9210
9361
if (CInfo.hasSrc2Acc ())
9211
9362
addOp (CInfo.getIndexOfDstInParsedOperands ());
9212
9363
}
9364
+
9365
+ int BitOp3Idx =
9366
+ AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::bitop3);
9367
+ if (BitOp3Idx != -1 ) {
9368
+ OptionalImmIndexMap OptIdx;
9369
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back ());
9370
+ if (Op.isImm ())
9371
+ OptIdx[Op.getImmTy ()] = Operands.size () - 1 ;
9372
+
9373
+ addOptionalImmOperand (Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9374
+ }
9213
9375
}
9214
9376
9215
9377
// ===----------------------------------------------------------------------===//
0 commit comments