Skip to content

Commit 641ad52

Browse files
authored
[AMDGPU][MC] Fix disassembly for v_permlane16_swap_b32 for GFX950 (#146600)
When targeting GFX950, disassembly of v_permlane16_swap_b32 and v_permlane32_swap_b32 instructions produces errors when they use certain vdst operand values, e.g., v_permlane16_swap_b32 v218, v219. This patch fixes this problem.
1 parent 2fe0feb commit 641ad52

File tree

3 files changed

+62
-2
lines changed

3 files changed

+62
-2
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,8 @@ def VOP_PERMLANE_SWAP : VOPProfile<[i32, i32, untyped, untyped]> {
401401
let HasExtDPP = 0;
402402
let HasExtSDWA = 0;
403403

404-
let Ins32 = (ins Src0RC64:$vdst_in, Src0RC32:$src0);
405-
let Ins64 = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
404+
let Ins32 = (ins DstRC:$vdst_in, Src0RC32:$src0);
405+
let Ins64 = (ins DstRC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
406406
let InsVOP3OpSel = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
407407
let Asm64 = "$vdst, $src0$bound_ctrl$fi";
408408
}

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,26 @@ global_load_lds_dwordx4 v2, s[4:5] offset:4
4040
// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
4141
v_permlane16_swap_b32 v1, v2
4242

43+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
44+
// GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
45+
v_permlane16_swap_b32 v218, v219
46+
4347
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
4448
// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
4549
v_permlane16_swap_b32_e32 v1, v2
4650

51+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
52+
// GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
53+
v_permlane16_swap_b32_e32 v218, v219
54+
4755
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
4856
// GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
4957
v_permlane16_swap_b32_e64 v1, v2
5058

59+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
60+
// GFX950: v_permlane16_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
61+
v_permlane16_swap_b32_e64 v218, v219
62+
5163
// FIXME: Parsed as bound_ctrl:1?
5264
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
5365
// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00]
@@ -81,14 +93,26 @@ v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1
8193
// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
8294
v_permlane32_swap_b32 v1, v2
8395

96+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
97+
// GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
98+
v_permlane32_swap_b32 v218, v219
99+
84100
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
85101
// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
86102
v_permlane32_swap_b32_e32 v1, v2
87103

104+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
105+
// GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
106+
v_permlane32_swap_b32_e32 v218, v219
107+
88108
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
89109
// GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
90110
v_permlane32_swap_b32_e64 v1, v2
91111

112+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
113+
// GFX950: v_permlane32_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
114+
v_permlane32_swap_b32_e64 v218, v219
115+
92116
// FIXME: Parsed as bound_ctrl:1?
93117
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
94118
// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00]

llvm/test/MC/Disassembler/AMDGPU/gfx950.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,27 @@
4747
# GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
4848
0x02,0xb3,0x02,0x7e
4949

50+
# GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
51+
0xdb,0xb3,0xb4,0x7f
52+
53+
# GFX950: v_permlane16_swap_b32_e32 v218, v2 ; encoding: [0x02,0xb3,0xb4,0x7f]
54+
0x02,0xb3,0xb4,0x7f
55+
56+
# GFX950: v_permlane16_swap_b32_e32 v2, v219 ; encoding: [0xdb,0xb3,0x04,0x7e]
57+
0xdb,0xb3,0x04,0x7e
58+
5059
# GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
5160
0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00
5261

62+
# GFX950: v_permlane16_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
63+
0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00
64+
65+
# GFX950: v_permlane16_swap_b32_e64 v218, v2 ; encoding: [0xda,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
66+
0xda,0x00,0x99,0xd1,0x02,0x01,0x00,0x00
67+
68+
# GFX950: v_permlane16_swap_b32_e64 v2, v219 ; encoding: [0x02,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
69+
0x02,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00
70+
5371
# GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00]
5472
0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00
5573

@@ -63,9 +81,27 @@
6381
# GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
6482
0x02,0xb5,0x02,0x7e
6583

84+
# GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
85+
0xdb,0xb5,0xb4,0x7f
86+
87+
# GFX950: v_permlane32_swap_b32_e32 v218, v2 ; encoding: [0x02,0xb5,0xb4,0x7f]
88+
0x02,0xb5,0xb4,0x7f
89+
90+
# GFX950: v_permlane32_swap_b32_e32 v2, v219 ; encoding: [0xdb,0xb5,0x04,0x7e]
91+
0xdb,0xb5,0x04,0x7e
92+
6693
# GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
6794
0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00
6895

96+
# GFX950: v_permlane32_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
97+
0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00
98+
99+
# GFX950: v_permlane32_swap_b32_e64 v218, v2 ; encoding: [0xda,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
100+
0xda,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00
101+
102+
# GFX950: v_permlane32_swap_b32_e64 v2, v219 ; encoding: [0x02,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
103+
0x02,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00
104+
69105
# GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00]
70106
0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00
71107

0 commit comments

Comments
 (0)