Skip to content

Commit d64faec

Browse files
shiltianrampitec
andauthored
[AMDGPU] Add support for v_cvt_f32_bf8 on gfx1250 (#147600)
This PR doesn't really need to change anything else, since the instruction is already supported, but just not tested. Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>
1 parent 0263079 commit d64faec

File tree

8 files changed

+199
-0
lines changed

8 files changed

+199
-0
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
34

45
define amdgpu_cs float @test_cvt_f32_bf8_byte0(i32 %a) {
56
; GFX12-LABEL: test_cvt_f32_bf8_byte0:
67
; GFX12: ; %bb.0:
78
; GFX12-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
89
; GFX12-NEXT: ; return to shader part epilog
10+
;
11+
; GFX1250-LABEL: test_cvt_f32_bf8_byte0:
12+
; GFX1250: ; %bb.0:
13+
; GFX1250-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
14+
; GFX1250-NEXT: ; return to shader part epilog
915
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
1016
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 0)
1117
ret float %ret
@@ -16,6 +22,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) {
1622
; GFX12: ; %bb.0:
1723
; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
1824
; GFX12-NEXT: ; return to shader part epilog
25+
;
26+
; GFX1250-LABEL: test_cvt_f32_bf8_byte1:
27+
; GFX1250: ; %bb.0:
28+
; GFX1250-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
29+
; GFX1250-NEXT: ; return to shader part epilog
1930
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
2031
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 1)
2132
ret float %ret
@@ -26,6 +37,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) {
2637
; GFX12: ; %bb.0:
2738
; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
2839
; GFX12-NEXT: ; return to shader part epilog
40+
;
41+
; GFX1250-LABEL: test_cvt_f32_bf8_byte2:
42+
; GFX1250: ; %bb.0:
43+
; GFX1250-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
44+
; GFX1250-NEXT: ; return to shader part epilog
2945
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
3046
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 2)
3147
ret float %ret
@@ -36,6 +52,11 @@ define amdgpu_cs float @test_cvt_f32_fp8_byte3(i32 %a) {
3652
; GFX12: ; %bb.0:
3753
; GFX12-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
3854
; GFX12-NEXT: ; return to shader part epilog
55+
;
56+
; GFX1250-LABEL: test_cvt_f32_fp8_byte3:
57+
; GFX1250: ; %bb.0:
58+
; GFX1250-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
59+
; GFX1250-NEXT: ; return to shader part epilog
3960
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
4061
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %tmp0, i32 3)
4162
ret float %ret
@@ -47,6 +68,13 @@ define amdgpu_cs void @test_cvt_pk_bf8_f32_word0(i32 %a, float %y, i32 %old, ptr
4768
; GFX12-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
4869
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
4970
; GFX12-NEXT: s_endpgm
71+
;
72+
; GFX1250-LABEL: test_cvt_pk_bf8_f32_word0:
73+
; GFX1250: ; %bb.0:
74+
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
75+
; GFX1250-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
76+
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
77+
; GFX1250-NEXT: s_endpgm
5078
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
5179
%tmp1 = bitcast i32 %tmp0 to float
5280
%ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %tmp1, float %y, i32 %old, i1 false)
@@ -62,6 +90,15 @@ define amdgpu_cs void @test_cvt_pk_fp8_f32_word1(i32 %a, float %y, i32 %old, ptr
6290
; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
6391
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
6492
; GFX12-NEXT: s_endpgm
93+
;
94+
; GFX1250-LABEL: test_cvt_pk_fp8_f32_word1:
95+
; GFX1250: ; %bb.0:
96+
; GFX1250-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
97+
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
98+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
99+
; GFX1250-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
100+
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
101+
; GFX1250-NEXT: s_endpgm
65102
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
66103
%tmp1 = bitcast i32 %tmp0 to float
67104
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %tmp1, float %y, i32 %old, i1 true)
@@ -75,6 +112,13 @@ define amdgpu_cs void @test_cvt_sr_bf8_f32_byte0(i32 %a, i32 %r, i32 %old, ptr a
75112
; GFX12-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
76113
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
77114
; GFX12-NEXT: s_endpgm
115+
;
116+
; GFX1250-LABEL: test_cvt_sr_bf8_f32_byte0:
117+
; GFX1250: ; %bb.0:
118+
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
119+
; GFX1250-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
120+
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
121+
; GFX1250-NEXT: s_endpgm
78122
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
79123
%tmp1 = bitcast i32 %tmp0 to float
80124
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %tmp1, i32 %r, i32 %old, i32 0)
@@ -88,6 +132,13 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr a
88132
; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
89133
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
90134
; GFX12-NEXT: s_endpgm
135+
;
136+
; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte1:
137+
; GFX1250: ; %bb.0:
138+
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
139+
; GFX1250-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
140+
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
141+
; GFX1250-NEXT: s_endpgm
91142
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
92143
%tmp1 = bitcast i32 %tmp0 to float
93144
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %tmp1, i32 %r, i32 %old, i32 1)
@@ -101,6 +152,13 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte2(i32 %a, i32 %r, i32 %old, ptr a
101152
; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
102153
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
103154
; GFX12-NEXT: s_endpgm
155+
;
156+
; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte2:
157+
; GFX1250: ; %bb.0:
158+
; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
159+
; GFX1250-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
160+
; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
161+
; GFX1250-NEXT: s_endpgm
104162
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
105163
%tmp1 = bitcast i32 %tmp0 to float
106164
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %tmp1, i32 %r, i32 %old, i32 2)

llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,15 @@ v_cvt_pk_f16_fp8 v1, s2
8888
v_cvt_pk_f16_fp8 v1, 100
8989
// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]
9090

91+
v_cvt_f32_bf8_e32 v1, s3
92+
// GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e]
93+
94+
v_cvt_f32_bf8_e32 v1, 3
95+
// GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e]
96+
97+
v_cvt_f32_bf8_e32 v1, v3
98+
// GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e]
99+
91100
v_cvt_f32_fp8_e32 v1, s3
92101
// GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e]
93102

llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,15 @@ v_cvt_pk_f16_fp8 v1, s2
9797
v_cvt_pk_f16_fp8 v1, 100
9898
// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]
9999

100+
v_cvt_f32_bf8_e32 v1, s3
101+
// GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e]
102+
103+
v_cvt_f32_bf8_e32 v1, 3
104+
// GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e]
105+
106+
v_cvt_f32_bf8_e32 v1, v3
107+
// GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e]
108+
100109
v_cvt_f32_fp8_e32 v1, s3
101110
// GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e]
102111

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,42 @@
11
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
22
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
33

4+
v_cvt_f32_bf8_e64 v1, s3
5+
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00]
6+
7+
v_cvt_f32_bf8_e64 v1, s3 byte_sel:1
8+
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00]
9+
10+
v_cvt_f32_bf8_e64 v1, s3 byte_sel:2
11+
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00]
12+
13+
v_cvt_f32_bf8_e64 v1, s3 byte_sel:3
14+
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00]
15+
16+
v_cvt_f32_bf8_e64 v1, 3
17+
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
18+
19+
v_cvt_f32_bf8_e64 v1, 3 byte_sel:1
20+
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00]
21+
22+
v_cvt_f32_bf8_e64 v1, 3 byte_sel:2
23+
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00]
24+
25+
v_cvt_f32_bf8_e64 v1, 3 byte_sel:3
26+
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00]
27+
28+
v_cvt_f32_bf8_e64 v1, v3
29+
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00]
30+
31+
v_cvt_f32_bf8_e64 v1, v3 byte_sel:1
32+
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00]
33+
34+
v_cvt_f32_bf8_e64 v1, v3 byte_sel:2
35+
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00]
36+
37+
v_cvt_f32_bf8_e64 v1, v3 byte_sel:3
38+
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00]
39+
440
v_cvt_f32_fp8_e64 v1, s3
541
// GFX1250: v_cvt_f32_fp8_e64 v1, s3 ; encoding: [0x01,0x00,0xec,0xd5,0x03,0x00,0x00,0x00]
642

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,42 @@
11
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
22
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
33

4+
v_cvt_f32_bf8_e64 v1, s3
5+
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00]
6+
7+
v_cvt_f32_bf8_e64 v1, s3 byte_sel:1
8+
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00]
9+
10+
v_cvt_f32_bf8_e64 v1, s3 byte_sel:2
11+
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00]
12+
13+
v_cvt_f32_bf8_e64 v1, s3 byte_sel:3
14+
// GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00]
15+
16+
v_cvt_f32_bf8_e64 v1, 3
17+
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
18+
19+
v_cvt_f32_bf8_e64 v1, 3 byte_sel:1
20+
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00]
21+
22+
v_cvt_f32_bf8_e64 v1, 3 byte_sel:2
23+
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00]
24+
25+
v_cvt_f32_bf8_e64 v1, 3 byte_sel:3
26+
// GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00]
27+
28+
v_cvt_f32_bf8_e64 v1, v3
29+
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00]
30+
31+
v_cvt_f32_bf8_e64 v1, v3 byte_sel:1
32+
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00]
33+
34+
v_cvt_f32_bf8_e64 v1, v3 byte_sel:2
35+
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00]
36+
37+
v_cvt_f32_bf8_e64 v1, v3 byte_sel:3
38+
// GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00]
39+
440
v_cvt_f32_fp8_e64 v1, s3
541
// GFX1250: v_cvt_f32_fp8_e64 v1, s3 ; encoding: [0x01,0x00,0xec,0xd5,0x03,0x00,0x00,0x00]
642

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,15 @@
108108
# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.l ; encoding: [0x02,0xeb,0x02,0x7e]
109109
# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
110110

111+
0x03,0xda,0x02,0x7e
112+
# GFX1250: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xda,0x02,0x7e]
113+
114+
0x83,0xda,0x02,0x7e
115+
# GFX1250: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xda,0x02,0x7e]
116+
117+
0x03,0xdb,0x02,0x7e
118+
# GFX1250: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xdb,0x02,0x7e]
119+
111120
0x03,0xd8,0x02,0x7e
112121
# GFX1250: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xd8,0x02,0x7e]
113122

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@
7575
0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e
7676
# GFX1250: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe ; encoding: [0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e]
7777

78+
0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac
79+
# GFX1250: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xc ; encoding: [0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac]
80+
81+
0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e
82+
# GFX1250: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe ; encoding: [0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e]
83+
7884
0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff
7985
# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
8086
# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,42 @@
22
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
33
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
44

5+
0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00
6+
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
7+
8+
0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00
9+
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x83,0x00,0x00,0x00]
10+
11+
0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00
12+
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x83,0x00,0x00,0x00]
13+
14+
0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00
15+
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x83,0x00,0x00,0x00]
16+
17+
0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00
18+
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x00,0x00,0x00]
19+
20+
0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00
21+
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x00,0x00,0x00]
22+
23+
0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00
24+
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x00,0x00,0x00]
25+
26+
0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00
27+
# GFX1250: v_cvt_f32_bf8_e64 v1, s3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x00,0x00,0x00]
28+
29+
0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00
30+
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 ; encoding: [0x01,0x00,0xed,0xd5,0x03,0x01,0x00,0x00]
31+
32+
0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00
33+
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:1 ; encoding: [0x01,0x10,0xed,0xd5,0x03,0x01,0x00,0x00]
34+
35+
0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00
36+
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:2 ; encoding: [0x01,0x08,0xed,0xd5,0x03,0x01,0x00,0x00]
37+
38+
0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00
39+
# GFX1250: v_cvt_f32_bf8_e64 v1, v3 byte_sel:3 ; encoding: [0x01,0x18,0xed,0xd5,0x03,0x01,0x00,0x00]
40+
541
0x01,0x00,0xec,0xd5,0x83,0x00,0x00,0x00
642
# GFX1250: v_cvt_f32_fp8_e64 v1, 3 ; encoding: [0x01,0x00,0xec,0xd5,0x83,0x00,0x00,0x00]
743

0 commit comments

Comments
 (0)