Skip to content

Commit 520e045

Browse files
authored
[AMDGPU] Handle llvm.amdgcn.pops.exiting.wave.id with calls (#98614)
1 parent 038c48c commit 520e045

File tree

2 files changed

+99
-35
lines changed

2 files changed

+99
-35
lines changed

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -290,14 +290,12 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
290290
case AMDGPU::SRC_PRIVATE_BASE:
291291
case AMDGPU::SRC_PRIVATE_LIMIT_LO:
292292
case AMDGPU::SRC_PRIVATE_LIMIT:
293+
case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
293294
case AMDGPU::SGPR_NULL:
294295
case AMDGPU::SGPR_NULL64:
295296
case AMDGPU::MODE:
296297
continue;
297298

298-
case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
299-
llvm_unreachable("src_pops_exiting_wave_id should not be used");
300-
301299
case AMDGPU::NoRegister:
302300
assert(MI.isDebugInstr() &&
303301
"Instruction uses invalid noreg register");

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll

Lines changed: 98 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2-
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG
3-
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL
4-
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG
5-
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,SDAG,GFX9-SDAG
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL
4+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,SDAG,GFX10-SDAG
5+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
6+
7+
declare void @foo(i32)
68

79
define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
810
; SDAG-LABEL: test:
@@ -34,35 +36,25 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
3436
}
3537

3638
define amdgpu_ps void @test_loop() {
37-
; SDAG-LABEL: test_loop:
38-
; SDAG: ; %bb.0:
39-
; SDAG-NEXT: .LBB1_1: ; %loop
40-
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
41-
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
42-
; SDAG-NEXT: s_cmp_eq_u32 s0, 0
43-
; SDAG-NEXT: s_cbranch_scc1 .LBB1_1
44-
; SDAG-NEXT: ; %bb.2: ; %exit
45-
; SDAG-NEXT: s_endpgm
39+
; GFX9-LABEL: test_loop:
40+
; GFX9: ; %bb.0:
41+
; GFX9-NEXT: .LBB1_1: ; %loop
42+
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
43+
; GFX9-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
44+
; GFX9-NEXT: s_cmp_eq_u32 s0, 0
45+
; GFX9-NEXT: s_cbranch_scc1 .LBB1_1
46+
; GFX9-NEXT: ; %bb.2: ; %exit
47+
; GFX9-NEXT: s_endpgm
4648
;
47-
; GFX9-GISEL-LABEL: test_loop:
48-
; GFX9-GISEL: ; %bb.0:
49-
; GFX9-GISEL-NEXT: .LBB1_1: ; %loop
50-
; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
51-
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
52-
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s0, 0
53-
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
54-
; GFX9-GISEL-NEXT: ; %bb.2: ; %exit
55-
; GFX9-GISEL-NEXT: s_endpgm
56-
;
57-
; GFX10-GISEL-LABEL: test_loop:
58-
; GFX10-GISEL: ; %bb.0:
59-
; GFX10-GISEL-NEXT: .LBB1_1: ; %loop
60-
; GFX10-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
61-
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
62-
; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
63-
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
64-
; GFX10-GISEL-NEXT: ; %bb.2: ; %exit
65-
; GFX10-GISEL-NEXT: s_endpgm
49+
; GFX10-LABEL: test_loop:
50+
; GFX10: ; %bb.0:
51+
; GFX10-NEXT: .LBB1_1: ; %loop
52+
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
53+
; GFX10-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
54+
; GFX10-NEXT: s_cmp_eq_u32 s0, 0
55+
; GFX10-NEXT: s_cbranch_scc1 .LBB1_1
56+
; GFX10-NEXT: ; %bb.2: ; %exit
57+
; GFX10-NEXT: s_endpgm
6658
br label %loop
6759
loop:
6860
%id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
@@ -117,3 +109,77 @@ exit:
117109
%id = phi i32 [ %id1, %entry ], [ %id2, %body ]
118110
ret i32 %id
119111
}
112+
113+
define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) {
114+
; GFX9-SDAG-LABEL: test_call:
115+
; GFX9-SDAG: ; %bb.0:
116+
; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
117+
; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
118+
; GFX9-SDAG-NEXT: s_mov_b32 s38, -1
119+
; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000
120+
; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s2
121+
; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0
122+
; GFX9-SDAG-NEXT: s_getpc_b64 s[0:1]
123+
; GFX9-SDAG-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4
124+
; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12
125+
; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
126+
; GFX9-SDAG-NEXT: s_mov_b32 s6, src_pops_exiting_wave_id
127+
; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
128+
; GFX9-SDAG-NEXT: s_mov_b64 s[8:9], 36
129+
; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
130+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6
131+
; GFX9-SDAG-NEXT: s_mov_b32 s32, 0
132+
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
133+
; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
134+
; GFX9-SDAG-NEXT: s_endpgm
135+
;
136+
; GFX9-GISEL-LABEL: test_call:
137+
; GFX9-GISEL: ; %bb.0:
138+
; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
139+
; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
140+
; GFX9-GISEL-NEXT: s_mov_b32 s38, -1
141+
; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000
142+
; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s2
143+
; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0
144+
; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1]
145+
; GFX9-GISEL-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4
146+
; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12
147+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
148+
; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
149+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
150+
; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
151+
; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
152+
; GFX9-GISEL-NEXT: s_mov_b64 s[8:9], 36
153+
; GFX9-GISEL-NEXT: s_mov_b32 s32, 0
154+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
155+
; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
156+
; GFX9-GISEL-NEXT: s_endpgm
157+
;
158+
; GFX10-LABEL: test_call:
159+
; GFX10: ; %bb.0:
160+
; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
161+
; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
162+
; GFX10-NEXT: s_mov_b32 s38, -1
163+
; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
164+
; GFX10-NEXT: s_add_u32 s36, s36, s2
165+
; GFX10-NEXT: s_addc_u32 s37, s37, 0
166+
; GFX10-NEXT: s_getpc_b64 s[0:1]
167+
; GFX10-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4
168+
; GFX10-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12
169+
; GFX10-NEXT: s_mov_b64 s[8:9], 36
170+
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
171+
; GFX10-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
172+
; GFX10-NEXT: s_mov_b32 s32, 0
173+
; GFX10-NEXT: v_mov_b32_e32 v0, s0
174+
; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
175+
; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
176+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
177+
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
178+
; GFX10-NEXT: s_endpgm
179+
%id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
180+
call void @foo(i32 %id)
181+
ret void
182+
}
183+
184+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
185+
; GFX10-SDAG: {{.*}}

0 commit comments

Comments
 (0)