Skip to content

Commit ce7851f

Browse files
authored
[AMDGPU][MC] Allow op_sel in v_alignbit_b32 etc in GFX9 and GFX10 (#142188)
In GFX9 and GFX10, the op_sel modifier should be allowed in the instructions v_align_bit_b32 and v_alignbyte_b32.
1 parent 55e1e9c commit ce7851f

File tree

11 files changed

+230
-12
lines changed

11 files changed

+230
-12
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2465,6 +2465,7 @@ def : AMDGPUPat <
24652465
>;
24662466

24672467
let True16Predicate = NotHasTrue16BitInsts in {
2468+
let SubtargetPredicate = isNotGFX9Plus in {
24682469
def : ROTRPattern <V_ALIGNBIT_B32_e64>;
24692470

24702471
def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
@@ -2474,6 +2475,35 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
24742475
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
24752476
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
24762477
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
2478+
} // isNotGFX9Plus
2479+
2480+
let SubtargetPredicate = isGFX9GFX10 in {
2481+
def : GCNPat <
2482+
(rotr i32:$src0, i32:$src1),
2483+
(V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0,
2484+
/* src1_modifiers */ 0, $src0,
2485+
/* src2_modifiers */ 0,
2486+
$src1, /* clamp */ 0, /* op_sel */ 0)
2487+
>;
2488+
2489+
foreach pat = [(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
2490+
(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1))))] in
2491+
def : GCNPat<pat,
2492+
(V_ALIGNBIT_B32_opsel_e64 0, /* src0_modifiers */
2493+
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
2494+
0, /* src1_modifiers */
2495+
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
2496+
0, /* src2_modifiers */
2497+
$src1, /* clamp */ 0, /* op_sel */ 0)
2498+
>;
2499+
2500+
def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
2501+
(V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0,
2502+
/* src1_modifiers */ 0, $src1,
2503+
/* src2_modifiers */ 0,
2504+
$src2, /* clamp */ 0, /* op_sel */ 0)
2505+
>;
2506+
} // isGFX9GFX10
24772507
} // end True16Predicate = NotHasTrue16BitInsts
24782508

24792509
let True16Predicate = UseRealTrue16Insts in {
@@ -3074,6 +3104,8 @@ def : GCNPat <
30743104
(i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
30753105
>;
30763106

3107+
// This pattern for bswap is used for pre-GFX8. For GFX8+, bswap is mapped
3108+
// to V_PERM_B32.
30773109
let True16Predicate = NotHasTrue16BitInsts in
30783110
def : GCNPat <
30793111
(i32 (bswap i32:$a)),
@@ -3549,15 +3581,20 @@ def : GCNPat <
35493581

35503582
// Take the upper 16 bits from V[0] and the lower 16 bits from V[1]
35513583
// Special case, can use V_ALIGNBIT (always uses encoded literal)
3552-
let True16Predicate = NotHasTrue16BitInsts in
3553-
def : GCNPat <
3584+
let True16Predicate = NotHasTrue16BitInsts in {
3585+
defvar BuildVectorToAlignBitPat =
35543586
(vecTy (DivergentBinFrag<build_vector>
35553587
(Ty !if(!eq(Ty, i16),
35563588
(Ty (trunc (srl VGPR_32:$a, (i32 16)))),
35573589
(Ty (bitconvert (i16 (trunc (srl VGPR_32:$a, (i32 16)))))))),
3558-
(Ty VGPR_32:$b))),
3559-
(V_ALIGNBIT_B32_e64 VGPR_32:$b, VGPR_32:$a, (i32 16))
3560-
>;
3590+
(Ty VGPR_32:$b)));
3591+
3592+
let SubtargetPredicate = isNotGFX9Plus in
3593+
def : GCNPat<BuildVectorToAlignBitPat, (V_ALIGNBIT_B32_e64 VGPR_32:$b, VGPR_32:$a, (i32 16))>;
3594+
3595+
let SubtargetPredicate = isGFX9GFX10 in
3596+
def : GCNPat<BuildVectorToAlignBitPat, (V_ALIGNBIT_B32_opsel_e64 0, VGPR_32:$b, 0, VGPR_32:$a, 0, (i32 16), 0, 0)>;
3597+
} //True16Predicate = NotHasTrue16BitInsts
35613598

35623599
let True16Predicate = UseFakeTrue16Insts in
35633600
def : GCNPat <

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,12 @@ defm V_ALIGNBIT_B32 : VOP3Inst_t16_with_profiles <"v_alignbit_b32",
222222
fshr, null_frag>;
223223

224224
defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
225+
226+
// In gfx9 and 10, opsel is allowed for V_ALIGNBIT_B32 and V_ALIGNBYTE_B32.
227+
// Hardware uses opsel[1:0] to byte-select src2. Other opsel bits are ignored.
228+
defm V_ALIGNBIT_B32_opsel : VOP3Inst <"v_alignbit_b32_opsel", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_OPSEL>>;
229+
defm V_ALIGNBYTE_B32_opsel : VOP3Inst <"v_alignbyte_b32_opsel", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_OPSEL>>;
230+
225231
let True16Predicate = UseRealTrue16Insts in
226232
defm V_ALIGNBYTE_B32_t16 : VOP3Inst <"v_alignbyte_b32_t16", VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>>;
227233
let True16Predicate = UseFakeTrue16Insts in
@@ -1947,6 +1953,9 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
19471953
}
19481954
} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
19491955

1956+
defm V_ALIGNBIT_B32_opsel : VOP3OpSel_Real_gfx10_with_name<0x14e, "V_ALIGNBIT_B32_opsel", "v_alignbit_b32">;
1957+
defm V_ALIGNBYTE_B32_opsel : VOP3OpSel_Real_gfx10_with_name<0x14f, "V_ALIGNBYTE_B32_opsel", "v_alignbyte_b32">;
1958+
19501959
defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>;
19511960

19521961
let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
@@ -2097,8 +2106,8 @@ defm V_BFI_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14a>;
20972106
defm V_FMA_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x14b>;
20982107
defm V_FMA_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x14c>;
20992108
defm V_LERP_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x14d>;
2100-
defm V_ALIGNBIT_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14e>;
2101-
defm V_ALIGNBYTE_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14f>;
2109+
defm V_ALIGNBIT_B32 : VOP3_Real_gfx6_gfx7<0x14e>;
2110+
defm V_ALIGNBYTE_B32 : VOP3_Real_gfx6_gfx7<0x14f>;
21022111
defm V_MULLIT_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x150>;
21032112
defm V_MIN3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x151>;
21042113
defm V_MIN3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x152>;
@@ -2241,6 +2250,17 @@ multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0>
22412250
}
22422251
}
22432252

2253+
// Instructions such as v_alignbyte_b32 allows op_sel in gfx9, but not in vi.
2254+
// The following is created to support that.
2255+
multiclass VOP3OpSel_Real_gfx9_with_name<bits<10> op, string opName, string AsmName> {
2256+
defvar psName = opName#"_e64";
2257+
def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(psName), SIEncodingFamily.VI>, // note: encoding family is VI
2258+
VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(psName).Pfl> {
2259+
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(psName);
2260+
let AsmString = AsmName # ps.AsmOperands;
2261+
}
2262+
}
2263+
22442264
} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"
22452265

22462266
defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
@@ -2260,8 +2280,10 @@ defm V_BFI_B32 : VOP3_Real_vi <0x1ca>;
22602280
defm V_FMA_F32 : VOP3_Real_vi <0x1cb>;
22612281
defm V_FMA_F64 : VOP3_Real_vi <0x1cc>;
22622282
defm V_LERP_U8 : VOP3_Real_vi <0x1cd>;
2283+
let SubtargetPredicate = isGFX8Only in {
22632284
defm V_ALIGNBIT_B32 : VOP3_Real_vi <0x1ce>;
22642285
defm V_ALIGNBYTE_B32 : VOP3_Real_vi <0x1cf>;
2286+
}
22652287
defm V_MIN3_F32 : VOP3_Real_vi <0x1d0>;
22662288
defm V_MIN3_I32 : VOP3_Real_vi <0x1d1>;
22672289
defm V_MIN3_U32 : VOP3_Real_vi <0x1d2>;
@@ -2306,6 +2328,9 @@ defm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16"
23062328
defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">;
23072329
defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">;
23082330

2331+
defm V_ALIGNBIT_B32_opsel : VOP3OpSel_Real_gfx9_with_name <0x1ce, "V_ALIGNBIT_B32_opsel", "v_alignbit_b32">;
2332+
defm V_ALIGNBYTE_B32_opsel : VOP3OpSel_Real_gfx9_with_name <0x1cf, "V_ALIGNBYTE_B32_opsel", "v_alignbyte_b32">;
2333+
23092334
defm V_MAD_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">;
23102335
defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
23112336
defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
33
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
4+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
5+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
46

57
---
68
name: bswap_i32_vv
@@ -19,13 +21,30 @@ body: |
1921
; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16711935
2022
; GFX7-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 [[S_MOV_B32_]], [[V_ALIGNBIT_B32_e64_1]], [[V_ALIGNBIT_B32_e64_]], implicit $exec
2123
; GFX7-NEXT: S_ENDPGM 0, implicit [[V_BFI_B32_e64_]]
24+
;
2225
; GFX8-LABEL: name: bswap_i32_vv
2326
; GFX8: liveins: $vgpr0
2427
; GFX8-NEXT: {{ $}}
2528
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2629
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051
2730
; GFX8-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec
2831
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]]
32+
;
33+
; GFX9-LABEL: name: bswap_i32_vv
34+
; GFX9: liveins: $vgpr0
35+
; GFX9-NEXT: {{ $}}
36+
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
37+
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051
38+
; GFX9-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec
39+
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]]
40+
;
41+
; GFX10-LABEL: name: bswap_i32_vv
42+
; GFX10: liveins: $vgpr0
43+
; GFX10-NEXT: {{ $}}
44+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
45+
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051
46+
; GFX10-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec
47+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]]
2948
%0:vgpr(s32) = COPY $vgpr0
3049
%1:vgpr(s32) = G_BSWAP %0
3150
S_ENDPGM 0, implicit %1

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
33
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
4-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
5-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
4+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
5+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
66
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11 %s
77

88
---
@@ -24,6 +24,24 @@ body: |
2424
; GCN-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
2525
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_e64_]]
2626
;
27+
; GFX9-LABEL: name: fshr_s32
28+
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
29+
; GFX9-NEXT: {{ $}}
30+
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
31+
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
32+
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
33+
; GFX9-NEXT: [[V_ALIGNBIT_B32_opsel_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_opsel_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
34+
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_opsel_e64_]]
35+
;
36+
; GFX10-LABEL: name: fshr_s32
37+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
38+
; GFX10-NEXT: {{ $}}
39+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
40+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
41+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
42+
; GFX10-NEXT: [[V_ALIGNBIT_B32_opsel_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_opsel_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
43+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_opsel_e64_]]
44+
;
2745
; GFX11-LABEL: name: fshr_s32
2846
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
2947
; GFX11-NEXT: {{ $}}

llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -766,10 +766,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
766766
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr22, implicit $exec
767767
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
768768
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr46, implicit $exec
769-
; GFX90A-NEXT: renamable $vgpr11 = V_ALIGNBIT_B32_e64 killed $sgpr47, killed $vgpr10, 1, implicit $exec
770-
; GFX90A-NEXT: renamable $vgpr52 = V_ALIGNBIT_B32_e64 $vgpr17, $vgpr16, 1, implicit $exec
769+
; GFX90A-NEXT: renamable $vgpr11 = V_ALIGNBIT_B32_opsel_e64 0, killed $sgpr47, 0, killed $vgpr10, 0, 1, 0, 0, implicit $exec
770+
; GFX90A-NEXT: renamable $vgpr52 = V_ALIGNBIT_B32_opsel_e64 0, $vgpr17, 0, $vgpr16, 0, 1, 0, 0, implicit $exec
771771
; GFX90A-NEXT: renamable $vgpr17 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
772-
; GFX90A-NEXT: renamable $vgpr15 = V_ALIGNBIT_B32_e64 $vgpr15, $vgpr14, 1, implicit $exec
772+
; GFX90A-NEXT: renamable $vgpr15 = V_ALIGNBIT_B32_opsel_e64 0, $vgpr15, 0, $vgpr14, 0, 1, 0, 0, implicit $exec
773773
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_XOR_B64 $exec, -1, implicit-def dead $scc
774774
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
775775
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $vgpr14, implicit $exec

llvm/test/MC/AMDGPU/gfx10_asm_vop3.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3628,6 +3628,18 @@ v_alignbit_b32 v5, v1, v2, exec_lo
36283628
v_alignbit_b32 v5, v1, v2, exec_hi
36293629
// GFX10: encoding: [0x05,0x00,0x4e,0xd5,0x01,0x05,0xfe,0x01]
36303630

3631+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1]
3632+
// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd5,0x01,0x05,0x0e,0x04]
3633+
3634+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1]
3635+
// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0x4e,0xd5,0x01,0x05,0x0e,0x04]
3636+
3637+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1]
3638+
// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0x4e,0xd5,0x01,0x05,0x0e,0x04]
3639+
3640+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
3641+
// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd5,0x01,0x05,0x0e,0x04]
3642+
36313643
v_alignbyte_b32 v5, v1, v2, v3
36323644
// GFX10: encoding: [0x05,0x00,0x4f,0xd5,0x01,0x05,0x0e,0x04]
36333645

@@ -3715,6 +3727,18 @@ v_alignbyte_b32 v5, v1, v2, exec_lo
37153727
v_alignbyte_b32 v5, v1, v2, exec_hi
37163728
// GFX10: encoding: [0x05,0x00,0x4f,0xd5,0x01,0x05,0xfe,0x01]
37173729

3730+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1]
3731+
// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd5,0x01,0x05,0x0e,0x04]
3732+
3733+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1]
3734+
// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0x4f,0xd5,0x01,0x05,0x0e,0x04]
3735+
3736+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1]
3737+
// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0x4f,0xd5,0x01,0x05,0x0e,0x04]
3738+
3739+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
3740+
// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4f,0xd5,0x01,0x05,0x0e,0x04]
3741+
37183742
v_mullit_f32 v5, v1, v2, v3
37193743
// GFX10: encoding: [0x05,0x00,0x50,0xd5,0x01,0x05,0x0e,0x04]
37203744

llvm/test/MC/AMDGPU/gfx7_err_pos.s

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,16 @@ s_load_dword s5, s[2:3], glc
4444
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: cache policy is not supported for SMRD instructions
4545
// CHECK-NEXT:{{^}}s_load_dword s5, s[2:3], glc
4646
// CHECK-NEXT:{{^}} ^
47+
48+
//==============================================================================
49+
// not a valid operand
50+
51+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
52+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
53+
// CHECK-NEXT:{{^}}v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
54+
// CHECK-NEXT:{{^}} ^
55+
56+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
57+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
58+
// CHECK-NEXT:{{^}}v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
59+
// CHECK-NEXT:{{^}} ^

llvm/test/MC/AMDGPU/gfx8_err_pos.s

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,13 @@ v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PRESERV
4949
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
5050
// CHECK-NEXT:{{^}}v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:BYTE_0 src1_sel:WORD_0
5151
// CHECK-NEXT:{{^}} ^
52+
53+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
54+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
55+
// CHECK-NEXT:{{^}}v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
56+
// CHECK-NEXT:{{^}} ^
57+
58+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
59+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
60+
// CHECK-NEXT:{{^}}v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
61+
// CHECK-NEXT:{{^}} ^

llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2829,6 +2829,18 @@ v_alignbit_b32 v5, v1, v2, src_execz
28292829
v_alignbit_b32 v5, v1, v2, src_scc
28302830
// CHECK: [0x05,0x00,0xce,0xd1,0x01,0x05,0xf6,0x03]
28312831

2832+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xce,0xd1,0x01,0x05,0x0e,0x04]
2833+
// CHECK: [0x05,0x08,0xce,0xd1,0x01,0x05,0x0e,0x04]
2834+
2835+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0xce,0xd1,0x01,0x05,0x0e,0x04]
2836+
// CHECK: [0x05,0x18,0xce,0xd1,0x01,0x05,0x0e,0x04]
2837+
2838+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0xce,0xd1,0x01,0x05,0x0e,0x04]
2839+
// CHECK: [0x05,0x38,0xce,0xd1,0x01,0x05,0x0e,0x04]
2840+
2841+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xce,0xd1,0x01,0x05,0x0e,0x04]
2842+
// CHECK: [0x05,0x78,0xce,0xd1,0x01,0x05,0x0e,0x04]
2843+
28322844
v_alignbyte_b32 v5, v1, v2, v3
28332845
// CHECK: [0x05,0x00,0xcf,0xd1,0x01,0x05,0x0e,0x04]
28342846

@@ -3000,6 +3012,18 @@ v_alignbyte_b32 v5, v1, v2, src_execz
30003012
v_alignbyte_b32 v5, v1, v2, src_scc
30013013
// CHECK: [0x05,0x00,0xcf,0xd1,0x01,0x05,0xf6,0x03]
30023014

3015+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1]
3016+
// CHECK: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xcf,0xd1,0x01,0x05,0x0e,0x04]
3017+
3018+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1]
3019+
// CHECK: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0xcf,0xd1,0x01,0x05,0x0e,0x04]
3020+
3021+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1]
3022+
// CHECK: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0xcf,0xd1,0x01,0x05,0x0e,0x04]
3023+
3024+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
3025+
// CHECK: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xcf,0xd1,0x01,0x05,0x0e,0x04]
3026+
30033027
v_min3_f32 v5, v1, v2, v3
30043028
// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x05,0x0e,0x04]
30053029

0 commit comments

Comments
 (0)