Skip to content

Commit 2ae76d5

Browse files
authored
[AMDGPU] Use different values for SISrcMods::NEG and SISrcMods::SEXT (#147964)
The SISrcMods::NEG and SISrcMods::SEXT enumerators share the same enum value. At the time when they were introduced, it was assumed that the "floating point" "neg"/"abs" and the "integer" "sext" source modifiers are mutually exclusive. This can lead to miscompilation as a "sext" modifier may right now be accepted erroneously on some instructions which are encoded as floating point instructions (see the test case modified by this PR). The encoding will then use the "neg" modifier. Furthermore, the "neg"/"abs" and the "sext" modifiers are not necessarily mutually exclusive, i.e. the hardware may support both. This cannot be handled correctly with the current representation. This patch changes the SISrcMods enum to use different values for NEG and SEXT. This is meant as a first step to allow their coexistence on the same instruction.
1 parent eb0d61a commit 2ae76d5

File tree

5 files changed

+39
-35
lines changed

5 files changed

+39
-35
lines changed

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -262,16 +262,16 @@ enum OperandType : unsigned {
262262
// Input operand modifiers bit-masks
263263
// NEG and SEXT share same bit-mask because they can't be set simultaneously.
264264
namespace SISrcMods {
265-
enum : unsigned {
266-
NONE = 0,
267-
NEG = 1 << 0, // Floating-point negate modifier
268-
ABS = 1 << 1, // Floating-point absolute modifier
269-
SEXT = 1 << 0, // Integer sign-extend modifier
270-
NEG_HI = ABS, // Floating-point negate high packed component modifier.
271-
OP_SEL_0 = 1 << 2,
272-
OP_SEL_1 = 1 << 3,
273-
DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1)
274-
};
265+
enum : unsigned {
266+
NONE = 0,
267+
NEG = 1 << 0, // Floating-point negate modifier
268+
ABS = 1 << 1, // Floating-point absolute modifier
269+
SEXT = 1 << 4, // Integer sign-extend modifier
270+
NEG_HI = ABS, // Floating-point negate high packed component modifier.
271+
OP_SEL_0 = 1 << 2,
272+
OP_SEL_1 = 1 << 3,
273+
DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1)
274+
};
275275
}
276276

277277
namespace SIOutMods {

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -632,9 +632,9 @@ def SDWA {
632632
class VOP_SDWAe<VOPProfile P> : Enc64 {
633633
bits<8> src0;
634634
bits<3> src0_sel;
635-
bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
635+
bits<5> src0_modifiers; // float: {abs,neg}, int {sext}
636636
bits<3> src1_sel;
637-
bits<2> src1_modifiers;
637+
bits<5> src1_modifiers;
638638
bits<3> dst_sel;
639639
bits<2> dst_unused;
640640
bits<1> clamp;
@@ -644,10 +644,10 @@ class VOP_SDWAe<VOPProfile P> : Enc64 {
644644
let Inst{44-43} = !if(P.EmitDstSel, dst_unused{1-0}, ?);
645645
let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0);
646646
let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, 0);
647-
let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
647+
let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{4}, 0);
648648
let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
649649
let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, 0);
650-
let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
650+
let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{4}, 0);
651651
let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
652652
}
653653

@@ -668,18 +668,18 @@ class VOP_SDWAe<VOPProfile P> : Enc64 {
668668
class VOP_SDWA9e<VOPProfile P> : Enc64 {
669669
bits<9> src0; // {src0_sgpr{0}, src0{7-0}}
670670
bits<3> src0_sel;
671-
bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
671+
bits<5> src0_modifiers; // float: {abs,neg}, int {sext}
672672
bits<3> src1_sel;
673-
bits<2> src1_modifiers;
673+
bits<5> src1_modifiers;
674674
bits<1> src1_sgpr;
675675

676676
let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
677677
let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, 0);
678-
let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
678+
let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{4}, 0);
679679
let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
680680
let Inst{55} = !if(P.HasSrc0, src0{8}, 0);
681681
let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, 0);
682-
let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
682+
let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{4}, 0);
683683
let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
684684
let Inst{63} = 0; // src1_sgpr - should be specified in subclass
685685
}

llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel-src.mir

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ body: |
484484
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
485485
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
486486
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 8, implicit $exec
487-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
487+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
488488
; CHECK-NEXT: S_ENDPGM 0
489489
%1:vgpr_32 = COPY $vgpr0
490490
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -572,7 +572,7 @@ body: |
572572
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
573573
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
574574
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 8, implicit $exec
575-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
575+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
576576
; CHECK-NEXT: S_ENDPGM 0
577577
%1:vgpr_32 = COPY $vgpr0
578578
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -638,7 +638,7 @@ body: |
638638
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
639639
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
640640
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 24, 8, implicit $exec
641-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
641+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
642642
; CHECK-NEXT: S_ENDPGM 0
643643
%1:vgpr_32 = COPY $vgpr0
644644
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -704,7 +704,7 @@ body: |
704704
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
705705
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
706706
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 24, 8, implicit $exec
707-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
707+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
708708
; CHECK-NEXT: S_ENDPGM 0
709709
%1:vgpr_32 = COPY $vgpr0
710710
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -792,7 +792,7 @@ body: |
792792
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
793793
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
794794
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec
795-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
795+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
796796
; CHECK-NEXT: S_ENDPGM 0
797797
%1:vgpr_32 = COPY $vgpr0
798798
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -814,7 +814,7 @@ body: |
814814
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
815815
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
816816
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec
817-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
817+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
818818
; CHECK-NEXT: S_ENDPGM 0
819819
%1:vgpr_32 = COPY $vgpr0
820820
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -836,7 +836,7 @@ body: |
836836
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
837837
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
838838
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec
839-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
839+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
840840
; CHECK-NEXT: S_ENDPGM 0
841841
%1:vgpr_32 = COPY $vgpr0
842842
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -902,7 +902,7 @@ body: |
902902
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
903903
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
904904
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec
905-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
905+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
906906
; CHECK-NEXT: S_ENDPGM 0
907907
%1:vgpr_32 = COPY $vgpr0
908908
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -924,7 +924,7 @@ body: |
924924
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
925925
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
926926
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec
927-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
927+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
928928
; CHECK-NEXT: S_ENDPGM 0
929929
%1:vgpr_32 = COPY $vgpr0
930930
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -946,7 +946,7 @@ body: |
946946
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
947947
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
948948
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
949-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 6, implicit $exec
949+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 6, implicit $exec
950950
; CHECK-NEXT: S_ENDPGM 0
951951
%1:vgpr_32 = COPY $vgpr0
952952
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -968,7 +968,7 @@ body: |
968968
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
969969
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
970970
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
971-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
971+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
972972
; CHECK-NEXT: S_ENDPGM 0
973973
%1:vgpr_32 = COPY $vgpr0
974974
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -990,7 +990,7 @@ body: |
990990
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
991991
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
992992
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
993-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 4, implicit $exec
993+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 4, implicit $exec
994994
; CHECK-NEXT: S_ENDPGM 0
995995
%1:vgpr_32 = COPY $vgpr0
996996
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -1012,7 +1012,7 @@ body: |
10121012
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
10131013
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
10141014
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
1015-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
1015+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
10161016
; CHECK-NEXT: S_ENDPGM 0
10171017
%1:vgpr_32 = COPY $vgpr0
10181018
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -1034,7 +1034,7 @@ body: |
10341034
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
10351035
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
10361036
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
1037-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
1037+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
10381038
; CHECK-NEXT: S_ENDPGM 0
10391039
%1:vgpr_32 = COPY $vgpr0
10401040
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -1056,7 +1056,7 @@ body: |
10561056
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
10571057
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
10581058
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
1059-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 1, implicit $exec
1059+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 1, implicit $exec
10601060
; CHECK-NEXT: S_ENDPGM 0
10611061
%1:vgpr_32 = COPY $vgpr0
10621062
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -1078,7 +1078,7 @@ body: |
10781078
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
10791079
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
10801080
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
1081-
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 0, implicit $exec
1081+
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 0, implicit $exec
10821082
; CHECK-NEXT: S_ENDPGM 0
10831083
%1:vgpr_32 = COPY $vgpr0
10841084
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec

llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,7 @@ v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
8888
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05]
8989

9090
v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
91+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06]
92+
93+
v_pk_fmac_f16_sdwa v5, v1, -v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
9194
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16]

llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
8888
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05
8989

90-
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
90+
# FIXME: The instruction gets printed using the wrong function (AMDGPUInstPrinter::printOperandAndIntInputMods) and hence the "-" modifier is not printed.
91+
# COM: v_pk_fmac_f16_sdwa v5, v1, -v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
9192
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16
9293

0 commit comments

Comments
 (0)