@@ -335,6 +335,22 @@ class AMDGPUOperand : public MCParsedAsmOperand {
335
335
return isRegOrInline (AMDGPU::VS_32RegClassID, MVT::f32 );
336
336
}
337
337
338
+ bool isRegOrInlineImmWithFP64InputMods () const {
339
+ return isRegOrInline (AMDGPU::VS_64RegClassID, MVT::f64 );
340
+ }
341
+
342
+ bool isVRegWithInputMods (unsigned RCID) const {
343
+ return isRegClass (RCID);
344
+ }
345
+
346
+ bool isVRegWithFP32InputMods () const {
347
+ return isVRegWithInputMods (AMDGPU::VGPR_32RegClassID);
348
+ }
349
+
350
+ bool isVRegWithFP64InputMods () const {
351
+ return isVRegWithInputMods (AMDGPU::VReg_64RegClassID);
352
+ }
353
+
338
354
bool isPackedFP16InputMods () const {
339
355
return isRegOrImmWithInputMods (AMDGPU::VS_32RegClassID, MVT::v2f16);
340
356
}
@@ -527,7 +543,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
527
543
return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::i32 );
528
544
}
529
545
530
- bool isVCSrcB64 () const {
546
+ bool isVCSrc_b64 () const {
531
547
return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::i64 );
532
548
}
533
549
@@ -553,7 +569,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
553
569
return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::f32 );
554
570
}
555
571
556
- bool isVCSrcF64 () const {
572
+ bool isVCSrc_f64 () const {
557
573
return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::f64 );
558
574
}
559
575
@@ -601,7 +617,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
601
617
return isVCSrc_f32 () || isLiteralImm (MVT::i32 ) || isExpr ();
602
618
}
603
619
604
- bool isVSrc_b64 () const { return isVCSrcF64 () || isLiteralImm (MVT::i64 ); }
620
+ bool isVSrc_b64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::i64 ); }
605
621
606
622
bool isVSrcT_b16 () const { return isVCSrcT_b16 () || isLiteralImm (MVT::i16 ); }
607
623
@@ -617,23 +633,19 @@ class AMDGPUOperand : public MCParsedAsmOperand {
617
633
618
634
bool isVSrc_v2b16 () const { return isVSrc_b16 () || isLiteralImm (MVT::v2i16); }
619
635
620
- bool isVCSrcV2FP32 () const {
621
- return isVCSrcF64 ();
622
- }
636
+ bool isVCSrcV2FP32 () const { return isVCSrc_f64 (); }
623
637
624
638
bool isVSrc_v2f32 () const { return isVSrc_f64 () || isLiteralImm (MVT::v2f32); }
625
639
626
- bool isVCSrcV2INT32 () const {
627
- return isVCSrcB64 ();
628
- }
640
+ bool isVCSrc_v2b32 () const { return isVCSrc_b64 (); }
629
641
630
642
bool isVSrc_v2b32 () const { return isVSrc_b64 () || isLiteralImm (MVT::v2i32); }
631
643
632
644
bool isVSrc_f32 () const {
633
645
return isVCSrc_f32 () || isLiteralImm (MVT::f32 ) || isExpr ();
634
646
}
635
647
636
- bool isVSrc_f64 () const { return isVCSrcF64 () || isLiteralImm (MVT::f64 ); }
648
+ bool isVSrc_f64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::f64 ); }
637
649
638
650
bool isVSrcT_bf16 () const { return isVCSrcTBF16 () || isLiteralImm (MVT::bf16 ); }
639
651
@@ -1527,6 +1539,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
1527
1539
1528
1540
bool isGFX12Plus () const { return AMDGPU::isGFX12Plus (getSTI ()); }
1529
1541
1542
+ bool isGFX1250 () const { return AMDGPU::isGFX1250 (getSTI ()); }
1543
+
1530
1544
bool isGFX10_AEncoding () const { return AMDGPU::isGFX10_AEncoding (getSTI ()); }
1531
1545
1532
1546
bool isGFX10_BEncoding () const {
@@ -1774,8 +1788,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
1774
1788
bool validateSMEMOffset (const MCInst &Inst, const OperandVector &Operands);
1775
1789
bool validateSOPLiteral (const MCInst &Inst) const ;
1776
1790
bool validateConstantBusLimitations (const MCInst &Inst, const OperandVector &Operands);
1777
- bool validateVOPDRegBankConstraints (const MCInst &Inst,
1778
- const OperandVector &Operands);
1791
+ std::optional<unsigned >
1792
+ checkVOPDRegBankConstraints (const MCInst &Inst, bool AsVOPD3);
1793
+ bool validateVOPD (const MCInst &Inst, const OperandVector &Operands);
1794
+ bool tryVOPD (const MCInst &Inst);
1795
+ bool tryVOPD3 (const MCInst &Inst);
1796
+ bool tryAnotherVOPDEncoding (const MCInst &Inst);
1797
+
1779
1798
bool validateIntClampSupported (const MCInst &Inst);
1780
1799
bool validateMIMGAtomicDMask (const MCInst &Inst);
1781
1800
bool validateMIMGGatherDMask (const MCInst &Inst);
@@ -3505,6 +3524,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3505
3524
}
3506
3525
}
3507
3526
3527
+ // Asm can first try to match VOPD or VOPD3. By failing early here with
3528
+ // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3529
+ // Checking later during validateInstruction does not give a chance to retry
3530
+ // parsing as a different encoding.
3531
+ if (tryAnotherVOPDEncoding (Inst))
3532
+ return Match_InvalidOperand;
3533
+
3508
3534
return Match_Success;
3509
3535
}
3510
3536
@@ -3685,8 +3711,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
3685
3711
3686
3712
return {getNamedOperandIdx (Opcode, OpName::src0X),
3687
3713
getNamedOperandIdx (Opcode, OpName::vsrc1X),
3714
+ getNamedOperandIdx (Opcode, OpName::vsrc2X),
3688
3715
getNamedOperandIdx (Opcode, OpName::src0Y),
3689
3716
getNamedOperandIdx (Opcode, OpName::vsrc1Y),
3717
+ getNamedOperandIdx (Opcode, OpName::vsrc2Y),
3690
3718
ImmXIdx,
3691
3719
ImmIdx};
3692
3720
}
@@ -3816,12 +3844,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
3816
3844
return false ;
3817
3845
}
3818
3846
3819
- bool AMDGPUAsmParser::validateVOPDRegBankConstraints (
3820
- const MCInst &Inst, const OperandVector &Operands ) {
3847
+ std::optional< unsigned > AMDGPUAsmParser::checkVOPDRegBankConstraints (
3848
+ const MCInst &Inst, bool AsVOPD3 ) {
3821
3849
3822
3850
const unsigned Opcode = Inst.getOpcode ();
3823
3851
if (!isVOPD (Opcode))
3824
- return true ;
3852
+ return {} ;
3825
3853
3826
3854
const MCRegisterInfo *TRI = getContext ().getRegisterInfo ();
3827
3855
@@ -3833,23 +3861,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3833
3861
};
3834
3862
3835
3863
// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3836
- bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3864
+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3865
+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3866
+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3867
+ bool AllowSameVGPR = isGFX1250 ();
3868
+
3869
+ if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3870
+ for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3871
+ int I = getNamedOperandIdx (Opcode, OpName);
3872
+ const MCOperand &Op = Inst.getOperand (I);
3873
+ if (!Op.isImm ())
3874
+ continue ;
3875
+ int64_t Imm = Op.getImm ();
3876
+ if (!AMDGPU::isInlinableLiteral32 (Imm, hasInv2PiInlineImm ()) &&
3877
+ !AMDGPU::isInlinableLiteral64 (Imm, hasInv2PiInlineImm ()))
3878
+ return I;
3879
+ }
3880
+
3881
+ for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y,
3882
+ OpName::vsrc2X, OpName::vsrc2Y,
3883
+ OpName::imm}) {
3884
+ int I = getNamedOperandIdx (Opcode, OpName);
3885
+ if (I == -1 )
3886
+ continue ;
3887
+ const MCOperand &Op = Inst.getOperand (I);
3888
+ if (Op.isImm ())
3889
+ return I;
3890
+ }
3891
+ }
3837
3892
3838
3893
const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
3839
3894
auto InvalidCompOprIdx =
3840
- InstInfo.getInvalidCompOperandIndex (getVRegIdx, SkipSrc);
3841
- if (!InvalidCompOprIdx)
3895
+ InstInfo.getInvalidCompOperandIndex (getVRegIdx, *TRI, SkipSrc,
3896
+ AllowSameVGPR, AsVOPD3);
3897
+
3898
+ return InvalidCompOprIdx;
3899
+ }
3900
+
3901
+ bool AMDGPUAsmParser::validateVOPD (
3902
+ const MCInst &Inst, const OperandVector &Operands) {
3903
+
3904
+ unsigned Opcode = Inst.getOpcode ();
3905
+ bool AsVOPD3 = MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3;
3906
+
3907
+ if (AsVOPD3) {
3908
+ for (unsigned I = 0 , E = Operands.size (); I != E; ++I) {
3909
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3910
+ if ((Op.isRegKind () || Op.isImmTy (AMDGPUOperand::ImmTyNone)) &&
3911
+ (Op.getModifiers ().getFPModifiersOperand () & SISrcMods::ABS))
3912
+ Error (Op.getStartLoc (), " ABS not allowed in VOPD3 instructions" );
3913
+ }
3914
+ }
3915
+
3916
+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, AsVOPD3);
3917
+ if (!InvalidCompOprIdx.has_value ())
3842
3918
return true ;
3843
3919
3844
3920
auto CompOprIdx = *InvalidCompOprIdx;
3921
+ const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
3845
3922
auto ParsedIdx =
3846
3923
std::max (InstInfo[VOPD::X].getIndexInParsedOperands (CompOprIdx),
3847
3924
InstInfo[VOPD::Y].getIndexInParsedOperands (CompOprIdx));
3848
3925
assert (ParsedIdx > 0 && ParsedIdx < Operands.size ());
3849
3926
3850
3927
auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc ();
3851
3928
if (CompOprIdx == VOPD::Component::DST) {
3852
- Error (Loc, " one dst register must be even and the other odd" );
3929
+ if (AsVOPD3)
3930
+ Error (Loc, " dst registers must be distinct" );
3931
+ else
3932
+ Error (Loc, " one dst register must be even and the other odd" );
3853
3933
} else {
3854
3934
auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3855
3935
Error (Loc, Twine (" src" ) + Twine (CompSrcIdx) +
@@ -3859,6 +3939,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3859
3939
return false ;
3860
3940
}
3861
3941
3942
+ // \returns true if \p Inst does not satisfy VOPD constraints, but can be
3943
+ // potentially used as VOPD3 with the same operands.
3944
+ bool AMDGPUAsmParser::tryVOPD3 (const MCInst &Inst) {
3945
+ // First check if it fits VOPD
3946
+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, false );
3947
+ if (!InvalidCompOprIdx.has_value ())
3948
+ return false ;
3949
+
3950
+ // Then if it fits VOPD3
3951
+ InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, true );
3952
+ if (InvalidCompOprIdx.has_value ()) {
3953
+ // If failed operand is dst it is better to show error about VOPD3
3954
+ // instruction as it has more capabilities and error message will be
3955
+ // more informative. If the dst is not legal for VOPD3, then it is not
3956
+ // legal for VOPD either.
3957
+ if (*InvalidCompOprIdx == VOPD::Component::DST)
3958
+ return true ;
3959
+
3960
+ // Otherwise prefer VOPD as we may find ourselves in an awkward situation
3961
+ // with a conflict in tied implicit src2 of fmac and no asm operand to
3962
+ // to point to.
3963
+ return false ;
3964
+ }
3965
+ return true ;
3966
+ }
3967
+
3968
+ // \returns true is a VOPD3 instruction can be also represented as a shorter
3969
+ // VOPD encoding.
3970
+ bool AMDGPUAsmParser::tryVOPD (const MCInst &Inst) {
3971
+ const unsigned Opcode = Inst.getOpcode ();
3972
+ const auto &II = getVOPDInstInfo (Opcode, &MII);
3973
+ unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily (getSTI ());
3974
+ if (!getCanBeVOPD (II[VOPD::X].getOpcode (), EncodingFamily, false ).X ||
3975
+ !getCanBeVOPD (II[VOPD::Y].getOpcode (), EncodingFamily, false ).Y )
3976
+ return false ;
3977
+
3978
+ // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
3979
+ // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
3980
+ // be parsed as VOPD which does not accept src2.
3981
+ if (II[VOPD::X].getOpcode () == AMDGPU::V_CNDMASK_B32_e32 ||
3982
+ II[VOPD::Y].getOpcode () == AMDGPU::V_CNDMASK_B32_e32)
3983
+ return false ;
3984
+
3985
+ // If any modifiers are set this cannot be VOPD.
3986
+ for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
3987
+ OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
3988
+ OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
3989
+ int I = getNamedOperandIdx (Opcode, OpName);
3990
+ if (I == -1 )
3991
+ continue ;
3992
+ if (Inst.getOperand (I).getImm ())
3993
+ return false ;
3994
+ }
3995
+
3996
+ return !tryVOPD3 (Inst);
3997
+ }
3998
+
3999
+ // VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4000
+ // form but switch to VOPD3 otherwise.
4001
+ bool AMDGPUAsmParser::tryAnotherVOPDEncoding (const MCInst &Inst) {
4002
+ const unsigned Opcode = Inst.getOpcode ();
4003
+ if (!isGFX1250 () || !isVOPD (Opcode))
4004
+ return false ;
4005
+
4006
+ if (MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3)
4007
+ return tryVOPD (Inst);
4008
+ return tryVOPD3 (Inst);
4009
+ }
4010
+
3862
4011
bool AMDGPUAsmParser::validateIntClampSupported (const MCInst &Inst) {
3863
4012
3864
4013
const unsigned Opc = Inst.getOpcode ();
@@ -5179,7 +5328,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5179
5328
if (!validateConstantBusLimitations (Inst, Operands)) {
5180
5329
return false ;
5181
5330
}
5182
- if (!validateVOPDRegBankConstraints (Inst, Operands)) {
5331
+ if (!validateVOPD (Inst, Operands)) {
5183
5332
return false ;
5184
5333
}
5185
5334
if (!validateIntClampSupported (Inst)) {
@@ -9180,8 +9329,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9180
9329
9181
9330
// Create VOPD MCInst operands using parsed assembler operands.
9182
9331
void AMDGPUAsmParser::cvtVOPD (MCInst &Inst, const OperandVector &Operands) {
9332
+ const MCInstrDesc &Desc = MII.get (Inst.getOpcode ());
9333
+
9183
9334
auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9184
9335
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9336
+ if (isRegOrImmWithInputMods (Desc, Inst.getNumOperands ())) {
9337
+ Op.addRegOrImmWithFPInputModsOperands (Inst, 2 );
9338
+ return ;
9339
+ }
9185
9340
if (Op.isReg ()) {
9186
9341
Op.addRegOperands (Inst, 1 );
9187
9342
return ;
@@ -9210,6 +9365,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9210
9365
if (CInfo.hasSrc2Acc ())
9211
9366
addOp (CInfo.getIndexOfDstInParsedOperands ());
9212
9367
}
9368
+
9369
+ int BitOp3Idx = AMDGPU::getNamedOperandIdx (Inst.getOpcode (),
9370
+ AMDGPU::OpName::bitop3);
9371
+ if (BitOp3Idx != -1 ) {
9372
+ OptionalImmIndexMap OptIdx;
9373
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back ());
9374
+ if (Op.isImm ())
9375
+ OptIdx[Op.getImmTy ()] = Operands.size () - 1 ;
9376
+
9377
+ addOptionalImmOperand (Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9378
+ }
9213
9379
}
9214
9380
9215
9381
// ===----------------------------------------------------------------------===//
0 commit comments