2
2
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
3
3
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
4
4
5
+ ; TODO: GlobalISel should avoid generating v_ldexp_f32.
5
6
define amdgpu_cs float @v_s_exp_f32 (float inreg %src ) {
6
- ; GFX12-LABEL: v_s_exp_f32:
7
- ; GFX12: ; %bb.0:
8
- ; GFX12-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000
9
- ; GFX12-NEXT: s_cselect_b32 s1, 0x42800000, 0
10
- ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
11
- ; GFX12-NEXT: s_add_f32 s0, s0, s1
12
- ; GFX12-NEXT: s_cselect_b32 s1, 0xffffffc0, 0
13
- ; GFX12-NEXT: v_s_exp_f32 s0, s0
14
- ; GFX12-NEXT: s_wait_alu 0xf1ff
15
- ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
16
- ; GFX12-NEXT: v_ldexp_f32 v0, s0, s1
17
- ; GFX12-NEXT: ; return to shader part epilog
7
+ ; GFX12-SDAG-LABEL: v_s_exp_f32:
8
+ ; GFX12-SDAG: ; %bb.0:
9
+ ; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000
10
+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42800000, 0
11
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
12
+ ; GFX12-SDAG-NEXT: s_add_f32 s0, s0, s1
13
+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x1f800000, 1.0
14
+ ; GFX12-SDAG-NEXT: v_s_exp_f32 s0, s0
15
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
16
+ ; GFX12-SDAG-NEXT: s_mul_f32 s0, s0, s1
17
+ ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
18
+ ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
19
+ ; GFX12-SDAG-NEXT: ; return to shader part epilog
20
+ ;
21
+ ; GFX12-GISEL-LABEL: v_s_exp_f32:
22
+ ; GFX12-GISEL: ; %bb.0:
23
+ ; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000
24
+ ; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42800000, 0
25
+ ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
26
+ ; GFX12-GISEL-NEXT: s_add_f32 s0, s0, s1
27
+ ; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0xffffffc0, 0
28
+ ; GFX12-GISEL-NEXT: v_s_exp_f32 s0, s0
29
+ ; GFX12-GISEL-NEXT: s_wait_alu 0xf1ff
30
+ ; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
31
+ ; GFX12-GISEL-NEXT: v_ldexp_f32 v0, s0, s1
32
+ ; GFX12-GISEL-NEXT: ; return to shader part epilog
18
33
%result = call float @llvm.exp2.f32 (float %src )
19
34
ret float %result
20
35
}
@@ -59,14 +74,15 @@ define amdgpu_cs float @v_s_log_f32(float inreg %src) {
59
74
; GFX12-SDAG-LABEL: v_s_log_f32:
60
75
; GFX12-SDAG: ; %bb.0:
61
76
; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0x800000
62
- ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 32, 0
63
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
64
- ; GFX12-SDAG-NEXT: v_ldexp_f32 v0, s0, s1
65
- ; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x42000000, 0
66
- ; GFX12-SDAG-NEXT: v_log_f32_e32 v0, v0
77
+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0
78
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
79
+ ; GFX12-SDAG-NEXT: s_mul_f32 s0, s0, s1
80
+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42000000, 0
81
+ ; GFX12-SDAG-NEXT: v_s_log_f32 s0, s0
82
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
83
+ ; GFX12-SDAG-NEXT: s_sub_f32 s0, s0, s1
67
84
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
68
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
69
- ; GFX12-SDAG-NEXT: v_subrev_f32_e32 v0, s0, v0
85
+ ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
70
86
; GFX12-SDAG-NEXT: ; return to shader part epilog
71
87
;
72
88
; GFX12-GISEL-LABEL: v_s_log_f32:
@@ -147,7 +163,7 @@ define amdgpu_cs half @v_s_rcp_f16(half inreg %src) {
147
163
ret half %result
148
164
}
149
165
150
- ; TODO-GFX12 : GlobalISel should generate v_s_rsq.
166
+ ; TODO: GlobalISel should generate v_s_rsq.
151
167
define amdgpu_cs float @v_s_rsq_f32 (float inreg %src ) {
152
168
; GFX12-SDAG-LABEL: v_s_rsq_f32:
153
169
; GFX12-SDAG: ; %bb.0:
@@ -184,7 +200,7 @@ define amdgpu_cs half @v_s_rsq_f16(half inreg %src) {
184
200
ret half %result
185
201
}
186
202
187
- ; TODO-GFX12 : Should not use any VALU instructions .
203
+ ; TODO: Should avoid generating v_cmp_class_f32 .
188
204
define amdgpu_cs float @v_s_sqrt_f32 (float inreg %src ) {
189
205
; GFX12-SDAG-LABEL: v_s_sqrt_f32:
190
206
; GFX12-SDAG: ; %bb.0:
@@ -298,16 +314,18 @@ define amdgpu_cs half @v_amdgcn_sqrt_f16(half inreg %src) {
298
314
define amdgpu_cs float @srcmods_abs_f32 (float inreg %src ) {
299
315
; GFX12-SDAG-LABEL: srcmods_abs_f32:
300
316
; GFX12-SDAG: ; %bb.0:
301
- ; GFX12-SDAG-NEXT: s_and_b32 s1, s0, 0x7fffffff
317
+ ; GFX12-SDAG-NEXT: s_bitset0_b32 s0, 31
302
318
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
303
- ; GFX12-SDAG-NEXT: s_cmp_lt_f32 s1, 0x800000
304
- ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 32, 0
305
- ; GFX12-SDAG-NEXT: v_ldexp_f32 v0, |s0|, s1
306
- ; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x42000000, 0
307
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
308
- ; GFX12-SDAG-NEXT: v_log_f32_e32 v0, v0
319
+ ; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0x800000
320
+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0
321
+ ; GFX12-SDAG-NEXT: s_mul_f32 s0, s0, s1
322
+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42000000, 0
323
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(TRANS32_DEP_1)
324
+ ; GFX12-SDAG-NEXT: v_s_log_f32 s0, s0
325
+ ; GFX12-SDAG-NEXT: s_sub_f32 s0, s0, s1
309
326
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
310
- ; GFX12-SDAG-NEXT: v_subrev_f32_e32 v0, s0, v0
327
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
328
+ ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
311
329
; GFX12-SDAG-NEXT: ; return to shader part epilog
312
330
;
313
331
; GFX12-GISEL-LABEL: srcmods_abs_f32:
@@ -333,15 +351,17 @@ define amdgpu_cs float @srcmods_abs_f32(float inreg %src) {
333
351
define amdgpu_cs float @srcmods_neg_f32 (float inreg %src ) {
334
352
; GFX12-SDAG-LABEL: srcmods_neg_f32:
335
353
; GFX12-SDAG: ; %bb.0:
354
+ ; GFX12-SDAG-NEXT: s_xor_b32 s1, s0, 0x80000000
336
355
; GFX12-SDAG-NEXT: s_cmp_gt_f32 s0, 0x80800000
337
- ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 32, 0
338
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
339
- ; GFX12-SDAG-NEXT: v_ldexp_f32 v0, -s0, s1
340
- ; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x42000000, 0
341
- ; GFX12-SDAG-NEXT: v_log_f32_e32 v0, v0
356
+ ; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x4f800000, 1.0
357
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
358
+ ; GFX12-SDAG-NEXT: s_mul_f32 s0, s1, s0
359
+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42000000, 0
360
+ ; GFX12-SDAG-NEXT: v_s_log_f32 s0, s0
361
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
362
+ ; GFX12-SDAG-NEXT: s_sub_f32 s0, s0, s1
342
363
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
343
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
344
- ; GFX12-SDAG-NEXT: v_subrev_f32_e32 v0, s0, v0
364
+ ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
345
365
; GFX12-SDAG-NEXT: ; return to shader part epilog
346
366
;
347
367
; GFX12-GISEL-LABEL: srcmods_neg_f32:
0 commit comments