Skip to content

Commit 7895c87

Browse files
committed
[AMDGPU] Split the lit test spill-vgpr-to-agpr.ll to different tests
[1]. Move the test which reject the usage of agpr before gfx908 into a separate file - reject-agpr-usage-before-gfx908.ll. [2]. Move those tests which are applicable to both gfx900 and gfx908 into a separate file - spill-vgpr.ll. [3]. Keep those tests which are specific to only gfx908 in the file spill-vgpr-to-agpr.ll. Above split is required to properly update the tests in D123525. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D123973
1 parent 72315d0 commit 7895c87

File tree

3 files changed

+244
-235
lines changed

3 files changed

+244
-235
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
; RUN: not llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GCN %s
2+
; RUN: not llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GCN %s
3+
4+
; GCN: couldn't allocate input reg for constraint 'a'
5+
6+
define amdgpu_kernel void @used_1a() {
7+
call void asm sideeffect "", "a"(i32 1)
8+
ret void
9+
}
Lines changed: 12 additions & 235 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,6 @@
1-
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s
2-
; RUN: not llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GCN,GFX900 %s
1+
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
32

4-
; GFX900: couldn't allocate input reg for constraint 'a'
5-
6-
7-
; GCN-LABEL: {{^}}max_10_vgprs:
8-
; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
9-
; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
10-
; GFX908-NOT: SCRATCH_RSRC
11-
; GFX908-DAG: v_accvgpr_write_b32 [[A_REG:a[0-9]+]], v{{[0-9]}}
12-
; GFX900: buffer_store_dword v{{[0-9]}},
13-
; GFX900: buffer_store_dword v{{[0-9]}},
14-
; GFX900: buffer_load_dword v{{[0-9]}},
15-
; GFX900: buffer_load_dword v{{[0-9]}},
16-
; GFX908-NOT: buffer_
17-
; GFX908-DAG: v_mov_b32_e32 v{{[0-9]}}, [[V_REG:v[0-9]+]]
18-
; GFX908-DAG: v_accvgpr_read_b32 [[V_REG]], [[A_REG]]
19-
20-
; GCN: NumVgprs: 10
21-
; GFX900: ScratchSize: 12
22-
; GFX908: ScratchSize: 0
23-
; GCN: VGPRBlocks: 2
24-
; GCN: NumVGPRsForWavesPerEU: 10
25-
define amdgpu_kernel void @max_10_vgprs(i32 addrspace(1)* %p) #0 {
26-
%tid = load volatile i32, i32 addrspace(1)* undef
27-
%p1 = getelementptr inbounds i32, i32 addrspace(1)* %p, i32 %tid
28-
%p2 = getelementptr inbounds i32, i32 addrspace(1)* %p1, i32 4
29-
%p3 = getelementptr inbounds i32, i32 addrspace(1)* %p2, i32 8
30-
%p4 = getelementptr inbounds i32, i32 addrspace(1)* %p3, i32 12
31-
%p5 = getelementptr inbounds i32, i32 addrspace(1)* %p4, i32 16
32-
%p6 = getelementptr inbounds i32, i32 addrspace(1)* %p5, i32 20
33-
%p7 = getelementptr inbounds i32, i32 addrspace(1)* %p6, i32 24
34-
%p8 = getelementptr inbounds i32, i32 addrspace(1)* %p7, i32 28
35-
%p9 = getelementptr inbounds i32, i32 addrspace(1)* %p8, i32 32
36-
%p10 = getelementptr inbounds i32, i32 addrspace(1)* %p9, i32 36
37-
%v1 = load volatile i32, i32 addrspace(1)* %p1
38-
%v2 = load volatile i32, i32 addrspace(1)* %p2
39-
%v3 = load volatile i32, i32 addrspace(1)* %p3
40-
%v4 = load volatile i32, i32 addrspace(1)* %p4
41-
%v5 = load volatile i32, i32 addrspace(1)* %p5
42-
%v6 = load volatile i32, i32 addrspace(1)* %p6
43-
%v7 = load volatile i32, i32 addrspace(1)* %p7
44-
%v8 = load volatile i32, i32 addrspace(1)* %p8
45-
%v9 = load volatile i32, i32 addrspace(1)* %p9
46-
%v10 = load volatile i32, i32 addrspace(1)* %p10
47-
call void asm sideeffect "", "v,v,v,v,v,v,v,v,v,v"(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8, i32 %v9, i32 %v10)
48-
store volatile i32 %v1, i32 addrspace(1)* undef
49-
store volatile i32 %v2, i32 addrspace(1)* undef
50-
store volatile i32 %v3, i32 addrspace(1)* undef
51-
store volatile i32 %v4, i32 addrspace(1)* undef
52-
store volatile i32 %v5, i32 addrspace(1)* undef
53-
store volatile i32 %v6, i32 addrspace(1)* undef
54-
store volatile i32 %v7, i32 addrspace(1)* undef
55-
store volatile i32 %v8, i32 addrspace(1)* undef
56-
store volatile i32 %v9, i32 addrspace(1)* undef
57-
store volatile i32 %v10, i32 addrspace(1)* undef
58-
ret void
59-
}
60-
61-
; GCN-LABEL: {{^}}max_10_vgprs_used_9a:
3+
; GFX908-LABEL: {{^}}max_10_vgprs_used_9a:
624
; GFX908-NOT: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
635
; GFX908-NOT: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
646
; GFX908-DAG: v_accvgpr_write_b32 [[A_REG:a[0-9]+]], v{{[0-9]}}
@@ -109,12 +51,12 @@ define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #0 {
10951
ret void
11052
}
11153

112-
; GCN-LABEL: {{^}}max_10_vgprs_used_1a_partial_spill:
113-
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
114-
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
54+
; GFX908-LABEL: {{^}}max_10_vgprs_used_1a_partial_spill:
55+
; GFX908-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
56+
; GFX908-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
11557
; GFX908-DAG: v_accvgpr_write_b32 a0, 1
116-
; GCN-DAG: buffer_store_dword v{{[0-9]}},
117-
; GCN-DAG: buffer_store_dword v{{[0-9]}},
58+
; GFX908-DAG: buffer_store_dword v{{[0-9]}},
59+
; GFX908-DAG: buffer_store_dword v{{[0-9]}},
11860
; GFX908-DAG: v_accvgpr_write_b32 a1, v{{[0-9]}}
11961
; GFX908-DAG: v_accvgpr_write_b32 a2, v{{[0-9]}}
12062
; GFX908-DAG: v_accvgpr_write_b32 a3, v{{[0-9]}}
@@ -124,8 +66,8 @@ define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #0 {
12466
; GFX908-DAG: v_accvgpr_write_b32 a7, v{{[0-9]}}
12567
; GFX908-DAG: v_accvgpr_write_b32 a8, v{{[0-9]}}
12668
; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}}
127-
; GCN-DAG: buffer_load_dword v{{[0-9]}},
128-
; GCN-DAG: buffer_load_dword v{{[0-9]}},
69+
; GFX908-DAG: buffer_load_dword v{{[0-9]}},
70+
; GFX908-DAG: buffer_load_dword v{{[0-9]}},
12971
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a1
13072
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a2
13173
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a3
@@ -136,11 +78,11 @@ define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #0 {
13678
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a8
13779
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a9
13880

139-
; GCN: NumVgprs: 10
81+
; GFX908: NumVgprs: 10
14082
; GFX900: ScratchSize: 44
14183
; GFX908: ScratchSize: 12
142-
; GCN: VGPRBlocks: 2
143-
; GCN: NumVGPRsForWavesPerEU: 10
84+
; GFX908: VGPRBlocks: 2
85+
; GFX908: NumVGPRsForWavesPerEU: 10
14486
define amdgpu_kernel void @max_10_vgprs_used_1a_partial_spill(i64 addrspace(1)* %p) #0 {
14587
%tid = load volatile i32, i32 addrspace(1)* undef
14688
call void asm sideeffect "", "a"(i32 1)
@@ -163,169 +105,4 @@ define amdgpu_kernel void @max_10_vgprs_used_1a_partial_spill(i64 addrspace(1)*
163105
ret void
164106
}
165107

166-
; GCN-LABEL: {{^}}max_10_vgprs_spill_v32:
167-
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
168-
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
169-
; GCN: buffer_store_dword v{{[0-9]}},
170-
; GFX908-DAG: v_accvgpr_write_b32 a0, v{{[0-9]}}
171-
; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}}
172-
; GCN-NOT: a10
173-
174-
; GFX908: NumVgprs: 10
175-
; GFX900: ScratchSize: 100
176-
; GFX908: ScratchSize: 68
177-
; GFX908: VGPRBlocks: 2
178-
; GFX908: NumVGPRsForWavesPerEU: 10
179-
define amdgpu_kernel void @max_10_vgprs_spill_v32(<32 x float> addrspace(1)* %p) #0 {
180-
%tid = call i32 @llvm.amdgcn.workitem.id.x()
181-
%gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p, i32 %tid
182-
%v = load volatile <32 x float>, <32 x float> addrspace(1)* %gep
183-
store volatile <32 x float> %v, <32 x float> addrspace(1)* undef
184-
ret void
185-
}
186-
187-
; GCN-LABEL: {{^}}max_256_vgprs_spill_9x32:
188-
; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
189-
; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
190-
; GFX908-NOT: SCRATCH_RSRC
191-
; GFX908-DAG: v_accvgpr_write_b32 a0, v
192-
; GFX900: buffer_store_dword v
193-
; GFX900: buffer_load_dword v
194-
; GFX908-NOT: buffer_
195-
; GFX908-DAG: v_accvgpr_read_b32
196-
197-
; GFX900: NumVgprs: 256
198-
; GFX900: ScratchSize: 148
199-
; GFX908: NumVgprs: 255
200-
; GFX908: ScratchSize: 0
201-
; GCN: VGPRBlocks: 63
202-
; GFX900: NumVGPRsForWavesPerEU: 256
203-
; GFX908: NumVGPRsForWavesPerEU: 255
204-
define amdgpu_kernel void @max_256_vgprs_spill_9x32(<32 x float> addrspace(1)* %p) #1 {
205-
%tid = call i32 @llvm.amdgcn.workitem.id.x()
206-
%p1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p, i32 %tid
207-
%p2 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p1, i32 %tid
208-
%p3 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p2, i32 %tid
209-
%p4 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p3, i32 %tid
210-
%p5 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p4, i32 %tid
211-
%p6 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p5, i32 %tid
212-
%p7 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p6, i32 %tid
213-
%p8 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p7, i32 %tid
214-
%p9 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p8, i32 %tid
215-
%v1 = load volatile <32 x float>, <32 x float> addrspace(1)* %p1
216-
%v2 = load volatile <32 x float>, <32 x float> addrspace(1)* %p2
217-
%v3 = load volatile <32 x float>, <32 x float> addrspace(1)* %p3
218-
%v4 = load volatile <32 x float>, <32 x float> addrspace(1)* %p4
219-
%v5 = load volatile <32 x float>, <32 x float> addrspace(1)* %p5
220-
%v6 = load volatile <32 x float>, <32 x float> addrspace(1)* %p6
221-
%v7 = load volatile <32 x float>, <32 x float> addrspace(1)* %p7
222-
%v8 = load volatile <32 x float>, <32 x float> addrspace(1)* %p8
223-
%v9 = load volatile <32 x float>, <32 x float> addrspace(1)* %p9
224-
store volatile <32 x float> %v1, <32 x float> addrspace(1)* undef
225-
store volatile <32 x float> %v2, <32 x float> addrspace(1)* undef
226-
store volatile <32 x float> %v3, <32 x float> addrspace(1)* undef
227-
store volatile <32 x float> %v4, <32 x float> addrspace(1)* undef
228-
store volatile <32 x float> %v5, <32 x float> addrspace(1)* undef
229-
store volatile <32 x float> %v6, <32 x float> addrspace(1)* undef
230-
store volatile <32 x float> %v7, <32 x float> addrspace(1)* undef
231-
store volatile <32 x float> %v8, <32 x float> addrspace(1)* undef
232-
store volatile <32 x float> %v9, <32 x float> addrspace(1)* undef
233-
ret void
234-
}
235-
236-
; GCN-LABEL: {{^}}max_256_vgprs_spill_9x32_2bb:
237-
; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
238-
; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
239-
; GFX908-NOT: SCRATCH_RSRC
240-
; GFX908: v_accvgpr_write_b32
241-
; GFX908: global_load_
242-
; GFX900: buffer_store_dword v
243-
; GFX900: buffer_load_dword v
244-
; GFX908-NOT: buffer_
245-
; GFX908-DAG: v_accvgpr_read_b32
246-
247-
; GFX900: NumVgprs: 256
248-
; GFX908: NumVgprs: 253
249-
; GFX900: ScratchSize: 2052
250-
; GFX908: ScratchSize: 0
251-
; GCN: VGPRBlocks: 63
252-
; GFX900: NumVGPRsForWavesPerEU: 256
253-
; GFX908: NumVGPRsForWavesPerEU: 253
254-
define amdgpu_kernel void @max_256_vgprs_spill_9x32_2bb(<32 x float> addrspace(1)* %p) #1 {
255-
%tid = call i32 @llvm.amdgcn.workitem.id.x()
256-
%p1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p, i32 %tid
257-
%p2 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p1, i32 %tid
258-
%p3 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p2, i32 %tid
259-
%p4 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p3, i32 %tid
260-
%p5 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p4, i32 %tid
261-
%p6 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p5, i32 %tid
262-
%p7 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p6, i32 %tid
263-
%p8 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p7, i32 %tid
264-
%p9 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p8, i32 %tid
265-
%v1 = load volatile <32 x float>, <32 x float> addrspace(1)* %p1
266-
%v2 = load volatile <32 x float>, <32 x float> addrspace(1)* %p2
267-
%v3 = load volatile <32 x float>, <32 x float> addrspace(1)* %p3
268-
%v4 = load volatile <32 x float>, <32 x float> addrspace(1)* %p4
269-
%v5 = load volatile <32 x float>, <32 x float> addrspace(1)* %p5
270-
%v6 = load volatile <32 x float>, <32 x float> addrspace(1)* %p6
271-
%v7 = load volatile <32 x float>, <32 x float> addrspace(1)* %p7
272-
%v8 = load volatile <32 x float>, <32 x float> addrspace(1)* %p8
273-
%v9 = load volatile <32 x float>, <32 x float> addrspace(1)* %p9
274-
br label %st
275-
276-
st:
277-
store volatile <32 x float> %v1, <32 x float> addrspace(1)* undef
278-
store volatile <32 x float> %v2, <32 x float> addrspace(1)* undef
279-
store volatile <32 x float> %v3, <32 x float> addrspace(1)* undef
280-
store volatile <32 x float> %v4, <32 x float> addrspace(1)* undef
281-
store volatile <32 x float> %v5, <32 x float> addrspace(1)* undef
282-
store volatile <32 x float> %v6, <32 x float> addrspace(1)* undef
283-
store volatile <32 x float> %v7, <32 x float> addrspace(1)* undef
284-
store volatile <32 x float> %v8, <32 x float> addrspace(1)* undef
285-
store volatile <32 x float> %v9, <32 x float> addrspace(1)* undef
286-
ret void
287-
}
288-
289-
; Make sure there's no crash when we have loads from fixed stack
290-
; objects and are processing VGPR spills
291-
292-
; GCN-LABEL: {{^}}stack_args_vgpr_spill:
293-
; GFX908: v_accvgpr_write_b32
294-
; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32
295-
; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
296-
define void @stack_args_vgpr_spill(<32 x float> %arg0, <32 x float> %arg1, <32 x float> addrspace(1)* %p) #1 {
297-
%tid = call i32 @llvm.amdgcn.workitem.id.x()
298-
%p1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p, i32 %tid
299-
%p2 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p1, i32 %tid
300-
%p3 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p2, i32 %tid
301-
%p4 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p3, i32 %tid
302-
%p5 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p4, i32 %tid
303-
%p6 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p5, i32 %tid
304-
%p7 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p6, i32 %tid
305-
%v1 = load volatile <32 x float>, <32 x float> addrspace(1)* %p1
306-
%v2 = load volatile <32 x float>, <32 x float> addrspace(1)* %p2
307-
%v3 = load volatile <32 x float>, <32 x float> addrspace(1)* %p3
308-
%v4 = load volatile <32 x float>, <32 x float> addrspace(1)* %p4
309-
%v5 = load volatile <32 x float>, <32 x float> addrspace(1)* %p5
310-
%v6 = load volatile <32 x float>, <32 x float> addrspace(1)* %p6
311-
%v7 = load volatile <32 x float>, <32 x float> addrspace(1)* %p7
312-
br label %st
313-
314-
st:
315-
store volatile <32 x float> %arg0, <32 x float> addrspace(1)* undef
316-
store volatile <32 x float> %arg1, <32 x float> addrspace(1)* undef
317-
store volatile <32 x float> %v1, <32 x float> addrspace(1)* undef
318-
store volatile <32 x float> %v2, <32 x float> addrspace(1)* undef
319-
store volatile <32 x float> %v3, <32 x float> addrspace(1)* undef
320-
store volatile <32 x float> %v4, <32 x float> addrspace(1)* undef
321-
store volatile <32 x float> %v5, <32 x float> addrspace(1)* undef
322-
store volatile <32 x float> %v6, <32 x float> addrspace(1)* undef
323-
store volatile <32 x float> %v7, <32 x float> addrspace(1)* undef
324-
ret void
325-
}
326-
327-
328-
declare i32 @llvm.amdgcn.workitem.id.x()
329-
330108
attributes #0 = { nounwind "amdgpu-num-vgpr"="10" }
331-
attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }

0 commit comments

Comments
 (0)