1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
2
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
3
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
3
4
4
5
define amdgpu_cs float @test_cvt_f32_bf8_byte0 (i32 %a ) {
5
6
; GFX12-LABEL: test_cvt_f32_bf8_byte0:
6
7
; GFX12: ; %bb.0:
7
8
; GFX12-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
8
9
; GFX12-NEXT: ; return to shader part epilog
10
+ ;
11
+ ; GFX1250-LABEL: test_cvt_f32_bf8_byte0:
12
+ ; GFX1250: ; %bb.0:
13
+ ; GFX1250-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
14
+ ; GFX1250-NEXT: ; return to shader part epilog
9
15
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32 (i32 %a , i32 228 , i32 15 , i32 15 , i1 1 )
10
16
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8 (i32 %tmp0 , i32 0 )
11
17
ret float %ret
@@ -16,6 +22,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) {
16
22
; GFX12: ; %bb.0:
17
23
; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
18
24
; GFX12-NEXT: ; return to shader part epilog
25
+ ;
26
+ ; GFX1250-LABEL: test_cvt_f32_bf8_byte1:
27
+ ; GFX1250: ; %bb.0:
28
+ ; GFX1250-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
29
+ ; GFX1250-NEXT: ; return to shader part epilog
19
30
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32 (i32 %a , i32 228 , i32 15 , i32 15 , i1 1 )
20
31
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8 (i32 %tmp0 , i32 1 )
21
32
ret float %ret
@@ -26,6 +37,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) {
26
37
; GFX12: ; %bb.0:
27
38
; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
28
39
; GFX12-NEXT: ; return to shader part epilog
40
+ ;
41
+ ; GFX1250-LABEL: test_cvt_f32_bf8_byte2:
42
+ ; GFX1250: ; %bb.0:
43
+ ; GFX1250-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
44
+ ; GFX1250-NEXT: ; return to shader part epilog
29
45
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32 (i32 %a , i32 228 , i32 15 , i32 15 , i1 1 )
30
46
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8 (i32 %tmp0 , i32 2 )
31
47
ret float %ret
@@ -36,6 +52,11 @@ define amdgpu_cs float @test_cvt_f32_fp8_byte3(i32 %a) {
36
52
; GFX12: ; %bb.0:
37
53
; GFX12-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
38
54
; GFX12-NEXT: ; return to shader part epilog
55
+ ;
56
+ ; GFX1250-LABEL: test_cvt_f32_fp8_byte3:
57
+ ; GFX1250: ; %bb.0:
58
+ ; GFX1250-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
59
+ ; GFX1250-NEXT: ; return to shader part epilog
39
60
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32 (i32 %a , i32 228 , i32 15 , i32 15 , i1 1 )
40
61
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8 (i32 %tmp0 , i32 3 )
41
62
ret float %ret
@@ -47,6 +68,13 @@ define amdgpu_cs void @test_cvt_pk_bf8_f32_word0(i32 %a, float %y, i32 %old, ptr
47
68
; GFX12-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
48
69
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
49
70
; GFX12-NEXT: s_endpgm
71
+ ;
72
+ ; GFX1250-LABEL: test_cvt_pk_bf8_f32_word0:
73
+ ; GFX1250: ; %bb.0:
74
+ ; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
75
+ ; GFX1250-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
76
+ ; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
77
+ ; GFX1250-NEXT: s_endpgm
50
78
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32 (i32 %a , i32 228 , i32 15 , i32 15 , i1 1 )
51
79
%tmp1 = bitcast i32 %tmp0 to float
52
80
%ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32 (float %tmp1 , float %y , i32 %old , i1 false )
@@ -62,6 +90,15 @@ define amdgpu_cs void @test_cvt_pk_fp8_f32_word1(i32 %a, float %y, i32 %old, ptr
62
90
; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
63
91
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
64
92
; GFX12-NEXT: s_endpgm
93
+ ;
94
+ ; GFX1250-LABEL: test_cvt_pk_fp8_f32_word1:
95
+ ; GFX1250: ; %bb.0:
96
+ ; GFX1250-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
97
+ ; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
98
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
99
+ ; GFX1250-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
100
+ ; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
101
+ ; GFX1250-NEXT: s_endpgm
65
102
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32 (i32 %a , i32 228 , i32 15 , i32 15 , i1 1 )
66
103
%tmp1 = bitcast i32 %tmp0 to float
67
104
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32 (float %tmp1 , float %y , i32 %old , i1 true )
@@ -75,6 +112,13 @@ define amdgpu_cs void @test_cvt_sr_bf8_f32_byte0(i32 %a, i32 %r, i32 %old, ptr a
75
112
; GFX12-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
76
113
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
77
114
; GFX12-NEXT: s_endpgm
115
+ ;
116
+ ; GFX1250-LABEL: test_cvt_sr_bf8_f32_byte0:
117
+ ; GFX1250: ; %bb.0:
118
+ ; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
119
+ ; GFX1250-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
120
+ ; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
121
+ ; GFX1250-NEXT: s_endpgm
78
122
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32 (i32 %a , i32 228 , i32 15 , i32 15 , i1 1 )
79
123
%tmp1 = bitcast i32 %tmp0 to float
80
124
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32 (float %tmp1 , i32 %r , i32 %old , i32 0 )
@@ -88,6 +132,13 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr a
88
132
; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
89
133
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
90
134
; GFX12-NEXT: s_endpgm
135
+ ;
136
+ ; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte1:
137
+ ; GFX1250: ; %bb.0:
138
+ ; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
139
+ ; GFX1250-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
140
+ ; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
141
+ ; GFX1250-NEXT: s_endpgm
91
142
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32 (i32 %a , i32 228 , i32 15 , i32 15 , i1 1 )
92
143
%tmp1 = bitcast i32 %tmp0 to float
93
144
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32 (float %tmp1 , i32 %r , i32 %old , i32 1 )
@@ -101,6 +152,13 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte2(i32 %a, i32 %r, i32 %old, ptr a
101
152
; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
102
153
; GFX12-NEXT: global_store_b32 v[3:4], v2, off
103
154
; GFX12-NEXT: s_endpgm
155
+ ;
156
+ ; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte2:
157
+ ; GFX1250: ; %bb.0:
158
+ ; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
159
+ ; GFX1250-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
160
+ ; GFX1250-NEXT: global_store_b32 v[4:5], v2, off
161
+ ; GFX1250-NEXT: s_endpgm
104
162
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32 (i32 %a , i32 228 , i32 15 , i32 15 , i1 1 )
105
163
%tmp1 = bitcast i32 %tmp0 to float
106
164
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32 (float %tmp1 , i32 %r , i32 %old , i32 2 )
0 commit comments