Skip to content

Commit a44d740

Browse files
committed
[AMDGPU][NFC] Pre-commit test for PR #94133
1 parent 145815c commit a44d740

File tree

1 file changed

+87
-4
lines changed
  • llvm/test/CodeGen/AMDGPU

1 file changed

+87
-4
lines changed

llvm/test/CodeGen/AMDGPU/wqm.ll

Lines changed: 87 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2936,6 +2936,89 @@ ENDIF:
29362936
ret float %r
29372937
}
29382938

2939+
; WQM -> StrictWQM transition must be preserved because kill breaks WQM mask
2940+
define amdgpu_ps float @test_strict_wqm_within_wqm_with_kill(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data, i32 %wqm_data) {
2941+
; GFX9-W64-LABEL: test_strict_wqm_within_wqm_with_kill:
2942+
; GFX9-W64: ; %bb.0: ; %main_body
2943+
; GFX9-W64-NEXT: s_mov_b64 s[12:13], exec
2944+
; GFX9-W64-NEXT: s_mov_b64 s[14:15], exec
2945+
; GFX9-W64-NEXT: s_wqm_b64 exec, exec
2946+
; GFX9-W64-NEXT: v_mov_b32_e32 v3, v2
2947+
; GFX9-W64-NEXT: s_mov_b64 exec, s[14:15]
2948+
; GFX9-W64-NEXT: s_wqm_b64 exec, exec
2949+
; GFX9-W64-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
2950+
; GFX9-W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
2951+
; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
2952+
; GFX9-W64-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
2953+
; GFX9-W64-NEXT: s_xor_b64 s[0:1], vcc, exec
2954+
; GFX9-W64-NEXT: s_andn2_b64 s[12:13], s[12:13], s[0:1]
2955+
; GFX9-W64-NEXT: s_cbranch_scc0 .LBB51_2
2956+
; GFX9-W64-NEXT: ; %bb.1: ; %main_body
2957+
; GFX9-W64-NEXT: s_and_b64 exec, exec, vcc
2958+
; GFX9-W64-NEXT: ds_swizzle_b32 v3, v3 offset:swizzle(SWAP,2)
2959+
; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
2960+
; GFX9-W64-NEXT: v_mov_b32_e32 v1, v3
2961+
; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v1, v1
2962+
; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
2963+
; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v1
2964+
; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
2965+
; GFX9-W64-NEXT: s_and_b64 exec, exec, s[12:13]
2966+
; GFX9-W64-NEXT: s_branch .LBB51_3
2967+
; GFX9-W64-NEXT: .LBB51_2:
2968+
; GFX9-W64-NEXT: s_mov_b64 exec, 0
2969+
; GFX9-W64-NEXT: exp null off, off, off, off done vm
2970+
; GFX9-W64-NEXT: s_endpgm
2971+
; GFX9-W64-NEXT: .LBB51_3:
2972+
;
2973+
; GFX10-W32-LABEL: test_strict_wqm_within_wqm_with_kill:
2974+
; GFX10-W32: ; %bb.0: ; %main_body
2975+
; GFX10-W32-NEXT: s_mov_b32 s12, exec_lo
2976+
; GFX10-W32-NEXT: s_mov_b32 s13, exec_lo
2977+
; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo
2978+
; GFX10-W32-NEXT: v_mov_b32_e32 v3, v2
2979+
; GFX10-W32-NEXT: s_mov_b32 exec_lo, s13
2980+
; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo
2981+
; GFX10-W32-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
2982+
; GFX10-W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2983+
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
2984+
; GFX10-W32-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
2985+
; GFX10-W32-NEXT: s_xor_b32 s0, vcc_lo, exec_lo
2986+
; GFX10-W32-NEXT: s_andn2_b32 s12, s12, s0
2987+
; GFX10-W32-NEXT: s_cbranch_scc0 .LBB51_2
2988+
; GFX10-W32-NEXT: ; %bb.1: ; %main_body
2989+
; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, vcc_lo
2990+
; GFX10-W32-NEXT: ds_swizzle_b32 v3, v3 offset:swizzle(SWAP,2)
2991+
; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
2992+
; GFX10-W32-NEXT: v_mov_b32_e32 v1, v3
2993+
; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v1, v1
2994+
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
2995+
; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v1
2996+
; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
2997+
; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12
2998+
; GFX10-W32-NEXT: s_branch .LBB51_3
2999+
; GFX10-W32-NEXT: .LBB51_2:
3000+
; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
3001+
; GFX10-W32-NEXT: exp null off, off, off, off done vm
3002+
; GFX10-W32-NEXT: s_endpgm
3003+
; GFX10-W32-NEXT: .LBB51_3:
3004+
main_body:
3005+
%c.bc = bitcast i32 %c to float
3006+
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
3007+
%tex0 = extractelement <4 x float> %tex, i32 0
3008+
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
3009+
%cmp = icmp eq i32 %z, 0
3010+
call void @llvm.amdgcn.kill(i1 %cmp)
3011+
%dataf = extractelement <4 x float> %dtex, i32 0
3012+
%data2 = call i32 @llvm.amdgcn.ds.swizzle(i32 %wqm_data, i32 2079)
3013+
%data3 = call i32 @llvm.amdgcn.strict.wqm.i32(i32 %data2)
3014+
%data3f = sitofp i32 %data3 to float
3015+
%result.f = fadd float %dataf, %data3f
3016+
%result.i = bitcast float %result.f to i32
3017+
%result.wqm = call i32 @llvm.amdgcn.wqm.i32(i32 %result.i)
3018+
%result = bitcast i32 %result.wqm to float
3019+
ret float %result
3020+
}
3021+
29393022
;TODO: StrictWQM -> WQM transition could be improved. WQM could use the exec from the previous state instead of calling s_wqm again.
29403023
define amdgpu_ps float @test_strict_wqm_strict_wwm_wqm(i32 inreg %idx0, i32 inreg %idx1, ptr addrspace(8) inreg %res, ptr addrspace(8) inreg %res2, float %inp, <8 x i32> inreg %res3) {
29413024
; GFX9-W64-LABEL: test_strict_wqm_strict_wwm_wqm:
@@ -3281,9 +3364,9 @@ define amdgpu_ps void @test_for_deactivating_lanes_in_wave32(ptr addrspace(6) in
32813364
; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
32823365
; GFX9-W64-NEXT: v_cmp_le_f32_e64 vcc, s0, 0
32833366
; GFX9-W64-NEXT: s_andn2_b64 s[4:5], exec, vcc
3284-
; GFX9-W64-NEXT: s_cbranch_scc0 .LBB54_1
3367+
; GFX9-W64-NEXT: s_cbranch_scc0 .LBB55_1
32853368
; GFX9-W64-NEXT: s_endpgm
3286-
; GFX9-W64-NEXT: .LBB54_1:
3369+
; GFX9-W64-NEXT: .LBB55_1:
32873370
; GFX9-W64-NEXT: s_mov_b64 exec, 0
32883371
; GFX9-W64-NEXT: exp null off, off, off, off done vm
32893372
; GFX9-W64-NEXT: s_endpgm
@@ -3297,9 +3380,9 @@ define amdgpu_ps void @test_for_deactivating_lanes_in_wave32(ptr addrspace(6) in
32973380
; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
32983381
; GFX10-W32-NEXT: v_cmp_le_f32_e64 vcc_lo, s0, 0
32993382
; GFX10-W32-NEXT: s_andn2_b32 s4, exec_lo, vcc_lo
3300-
; GFX10-W32-NEXT: s_cbranch_scc0 .LBB54_1
3383+
; GFX10-W32-NEXT: s_cbranch_scc0 .LBB55_1
33013384
; GFX10-W32-NEXT: s_endpgm
3302-
; GFX10-W32-NEXT: .LBB54_1:
3385+
; GFX10-W32-NEXT: .LBB55_1:
33033386
; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
33043387
; GFX10-W32-NEXT: exp null off, off, off, off done vm
33053388
; GFX10-W32-NEXT: s_endpgm

0 commit comments

Comments
 (0)