1
- ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s
2
- ; RUN: not llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GCN,GFX900 %s
1
+ ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
3
2
4
- ; GFX900: couldn't allocate input reg for constraint 'a'
5
-
6
-
7
- ; GCN-LABEL: {{^}}max_10_vgprs:
8
- ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
9
- ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
10
- ; GFX908-NOT: SCRATCH_RSRC
11
- ; GFX908-DAG: v_accvgpr_write_b32 [[A_REG:a[0-9]+]], v{{[0-9]}}
12
- ; GFX900: buffer_store_dword v{{[0-9]}},
13
- ; GFX900: buffer_store_dword v{{[0-9]}},
14
- ; GFX900: buffer_load_dword v{{[0-9]}},
15
- ; GFX900: buffer_load_dword v{{[0-9]}},
16
- ; GFX908-NOT: buffer_
17
- ; GFX908-DAG: v_mov_b32_e32 v{{[0-9]}}, [[V_REG:v[0-9]+]]
18
- ; GFX908-DAG: v_accvgpr_read_b32 [[V_REG]], [[A_REG]]
19
-
20
- ; GCN: NumVgprs: 10
21
- ; GFX900: ScratchSize: 12
22
- ; GFX908: ScratchSize: 0
23
- ; GCN: VGPRBlocks: 2
24
- ; GCN: NumVGPRsForWavesPerEU: 10
25
- define amdgpu_kernel void @max_10_vgprs (i32 addrspace (1 )* %p ) #0 {
26
- %tid = load volatile i32 , i32 addrspace (1 )* undef
27
- %p1 = getelementptr inbounds i32 , i32 addrspace (1 )* %p , i32 %tid
28
- %p2 = getelementptr inbounds i32 , i32 addrspace (1 )* %p1 , i32 4
29
- %p3 = getelementptr inbounds i32 , i32 addrspace (1 )* %p2 , i32 8
30
- %p4 = getelementptr inbounds i32 , i32 addrspace (1 )* %p3 , i32 12
31
- %p5 = getelementptr inbounds i32 , i32 addrspace (1 )* %p4 , i32 16
32
- %p6 = getelementptr inbounds i32 , i32 addrspace (1 )* %p5 , i32 20
33
- %p7 = getelementptr inbounds i32 , i32 addrspace (1 )* %p6 , i32 24
34
- %p8 = getelementptr inbounds i32 , i32 addrspace (1 )* %p7 , i32 28
35
- %p9 = getelementptr inbounds i32 , i32 addrspace (1 )* %p8 , i32 32
36
- %p10 = getelementptr inbounds i32 , i32 addrspace (1 )* %p9 , i32 36
37
- %v1 = load volatile i32 , i32 addrspace (1 )* %p1
38
- %v2 = load volatile i32 , i32 addrspace (1 )* %p2
39
- %v3 = load volatile i32 , i32 addrspace (1 )* %p3
40
- %v4 = load volatile i32 , i32 addrspace (1 )* %p4
41
- %v5 = load volatile i32 , i32 addrspace (1 )* %p5
42
- %v6 = load volatile i32 , i32 addrspace (1 )* %p6
43
- %v7 = load volatile i32 , i32 addrspace (1 )* %p7
44
- %v8 = load volatile i32 , i32 addrspace (1 )* %p8
45
- %v9 = load volatile i32 , i32 addrspace (1 )* %p9
46
- %v10 = load volatile i32 , i32 addrspace (1 )* %p10
47
- call void asm sideeffect "" , "v,v,v,v,v,v,v,v,v,v" (i32 %v1 , i32 %v2 , i32 %v3 , i32 %v4 , i32 %v5 , i32 %v6 , i32 %v7 , i32 %v8 , i32 %v9 , i32 %v10 )
48
- store volatile i32 %v1 , i32 addrspace (1 )* undef
49
- store volatile i32 %v2 , i32 addrspace (1 )* undef
50
- store volatile i32 %v3 , i32 addrspace (1 )* undef
51
- store volatile i32 %v4 , i32 addrspace (1 )* undef
52
- store volatile i32 %v5 , i32 addrspace (1 )* undef
53
- store volatile i32 %v6 , i32 addrspace (1 )* undef
54
- store volatile i32 %v7 , i32 addrspace (1 )* undef
55
- store volatile i32 %v8 , i32 addrspace (1 )* undef
56
- store volatile i32 %v9 , i32 addrspace (1 )* undef
57
- store volatile i32 %v10 , i32 addrspace (1 )* undef
58
- ret void
59
- }
60
-
61
- ; GCN-LABEL: {{^}}max_10_vgprs_used_9a:
3
+ ; GFX908-LABEL: {{^}}max_10_vgprs_used_9a:
62
4
; GFX908-NOT: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
63
5
; GFX908-NOT: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
64
6
; GFX908-DAG: v_accvgpr_write_b32 [[A_REG:a[0-9]+]], v{{[0-9]}}
@@ -109,12 +51,12 @@ define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #0 {
109
51
ret void
110
52
}
111
53
112
- ; GCN -LABEL: {{^}}max_10_vgprs_used_1a_partial_spill:
113
- ; GCN -DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
114
- ; GCN -DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
54
+ ; GFX908 -LABEL: {{^}}max_10_vgprs_used_1a_partial_spill:
55
+ ; GFX908 -DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
56
+ ; GFX908 -DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
115
57
; GFX908-DAG: v_accvgpr_write_b32 a0, 1
116
- ; GCN -DAG: buffer_store_dword v{{[0-9]}},
117
- ; GCN -DAG: buffer_store_dword v{{[0-9]}},
58
+ ; GFX908 -DAG: buffer_store_dword v{{[0-9]}},
59
+ ; GFX908 -DAG: buffer_store_dword v{{[0-9]}},
118
60
; GFX908-DAG: v_accvgpr_write_b32 a1, v{{[0-9]}}
119
61
; GFX908-DAG: v_accvgpr_write_b32 a2, v{{[0-9]}}
120
62
; GFX908-DAG: v_accvgpr_write_b32 a3, v{{[0-9]}}
@@ -124,8 +66,8 @@ define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #0 {
124
66
; GFX908-DAG: v_accvgpr_write_b32 a7, v{{[0-9]}}
125
67
; GFX908-DAG: v_accvgpr_write_b32 a8, v{{[0-9]}}
126
68
; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}}
127
- ; GCN -DAG: buffer_load_dword v{{[0-9]}},
128
- ; GCN -DAG: buffer_load_dword v{{[0-9]}},
69
+ ; GFX908 -DAG: buffer_load_dword v{{[0-9]}},
70
+ ; GFX908 -DAG: buffer_load_dword v{{[0-9]}},
129
71
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a1
130
72
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a2
131
73
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a3
@@ -136,11 +78,11 @@ define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #0 {
136
78
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a8
137
79
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a9
138
80
139
- ; GCN: NumVgprs: 10
81
+ ; GFX908: NumVgprs: 10
140
82
; GFX900: ScratchSize: 44
141
83
; GFX908: ScratchSize: 12
142
- ; GCN: VGPRBlocks: 2
143
- ; GCN: NumVGPRsForWavesPerEU: 10
84
+ ; GFX908: VGPRBlocks: 2
85
+ ; GFX908: NumVGPRsForWavesPerEU: 10
144
86
define amdgpu_kernel void @max_10_vgprs_used_1a_partial_spill (i64 addrspace (1 )* %p ) #0 {
145
87
%tid = load volatile i32 , i32 addrspace (1 )* undef
146
88
call void asm sideeffect "" , "a" (i32 1 )
@@ -163,169 +105,4 @@ define amdgpu_kernel void @max_10_vgprs_used_1a_partial_spill(i64 addrspace(1)*
163
105
ret void
164
106
}
165
107
166
- ; GCN-LABEL: {{^}}max_10_vgprs_spill_v32:
167
- ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
168
- ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
169
- ; GCN: buffer_store_dword v{{[0-9]}},
170
- ; GFX908-DAG: v_accvgpr_write_b32 a0, v{{[0-9]}}
171
- ; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}}
172
- ; GCN-NOT: a10
173
-
174
- ; GFX908: NumVgprs: 10
175
- ; GFX900: ScratchSize: 100
176
- ; GFX908: ScratchSize: 68
177
- ; GFX908: VGPRBlocks: 2
178
- ; GFX908: NumVGPRsForWavesPerEU: 10
179
- define amdgpu_kernel void @max_10_vgprs_spill_v32 (<32 x float > addrspace (1 )* %p ) #0 {
180
- %tid = call i32 @llvm.amdgcn.workitem.id.x ()
181
- %gep = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p , i32 %tid
182
- %v = load volatile <32 x float >, <32 x float > addrspace (1 )* %gep
183
- store volatile <32 x float > %v , <32 x float > addrspace (1 )* undef
184
- ret void
185
- }
186
-
187
- ; GCN-LABEL: {{^}}max_256_vgprs_spill_9x32:
188
- ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
189
- ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
190
- ; GFX908-NOT: SCRATCH_RSRC
191
- ; GFX908-DAG: v_accvgpr_write_b32 a0, v
192
- ; GFX900: buffer_store_dword v
193
- ; GFX900: buffer_load_dword v
194
- ; GFX908-NOT: buffer_
195
- ; GFX908-DAG: v_accvgpr_read_b32
196
-
197
- ; GFX900: NumVgprs: 256
198
- ; GFX900: ScratchSize: 148
199
- ; GFX908: NumVgprs: 255
200
- ; GFX908: ScratchSize: 0
201
- ; GCN: VGPRBlocks: 63
202
- ; GFX900: NumVGPRsForWavesPerEU: 256
203
- ; GFX908: NumVGPRsForWavesPerEU: 255
204
- define amdgpu_kernel void @max_256_vgprs_spill_9x32 (<32 x float > addrspace (1 )* %p ) #1 {
205
- %tid = call i32 @llvm.amdgcn.workitem.id.x ()
206
- %p1 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p , i32 %tid
207
- %p2 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p1 , i32 %tid
208
- %p3 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p2 , i32 %tid
209
- %p4 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p3 , i32 %tid
210
- %p5 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p4 , i32 %tid
211
- %p6 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p5 , i32 %tid
212
- %p7 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p6 , i32 %tid
213
- %p8 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p7 , i32 %tid
214
- %p9 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p8 , i32 %tid
215
- %v1 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p1
216
- %v2 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p2
217
- %v3 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p3
218
- %v4 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p4
219
- %v5 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p5
220
- %v6 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p6
221
- %v7 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p7
222
- %v8 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p8
223
- %v9 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p9
224
- store volatile <32 x float > %v1 , <32 x float > addrspace (1 )* undef
225
- store volatile <32 x float > %v2 , <32 x float > addrspace (1 )* undef
226
- store volatile <32 x float > %v3 , <32 x float > addrspace (1 )* undef
227
- store volatile <32 x float > %v4 , <32 x float > addrspace (1 )* undef
228
- store volatile <32 x float > %v5 , <32 x float > addrspace (1 )* undef
229
- store volatile <32 x float > %v6 , <32 x float > addrspace (1 )* undef
230
- store volatile <32 x float > %v7 , <32 x float > addrspace (1 )* undef
231
- store volatile <32 x float > %v8 , <32 x float > addrspace (1 )* undef
232
- store volatile <32 x float > %v9 , <32 x float > addrspace (1 )* undef
233
- ret void
234
- }
235
-
236
- ; GCN-LABEL: {{^}}max_256_vgprs_spill_9x32_2bb:
237
- ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
238
- ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
239
- ; GFX908-NOT: SCRATCH_RSRC
240
- ; GFX908: v_accvgpr_write_b32
241
- ; GFX908: global_load_
242
- ; GFX900: buffer_store_dword v
243
- ; GFX900: buffer_load_dword v
244
- ; GFX908-NOT: buffer_
245
- ; GFX908-DAG: v_accvgpr_read_b32
246
-
247
- ; GFX900: NumVgprs: 256
248
- ; GFX908: NumVgprs: 253
249
- ; GFX900: ScratchSize: 2052
250
- ; GFX908: ScratchSize: 0
251
- ; GCN: VGPRBlocks: 63
252
- ; GFX900: NumVGPRsForWavesPerEU: 256
253
- ; GFX908: NumVGPRsForWavesPerEU: 253
254
- define amdgpu_kernel void @max_256_vgprs_spill_9x32_2bb (<32 x float > addrspace (1 )* %p ) #1 {
255
- %tid = call i32 @llvm.amdgcn.workitem.id.x ()
256
- %p1 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p , i32 %tid
257
- %p2 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p1 , i32 %tid
258
- %p3 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p2 , i32 %tid
259
- %p4 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p3 , i32 %tid
260
- %p5 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p4 , i32 %tid
261
- %p6 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p5 , i32 %tid
262
- %p7 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p6 , i32 %tid
263
- %p8 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p7 , i32 %tid
264
- %p9 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p8 , i32 %tid
265
- %v1 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p1
266
- %v2 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p2
267
- %v3 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p3
268
- %v4 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p4
269
- %v5 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p5
270
- %v6 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p6
271
- %v7 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p7
272
- %v8 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p8
273
- %v9 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p9
274
- br label %st
275
-
276
- st:
277
- store volatile <32 x float > %v1 , <32 x float > addrspace (1 )* undef
278
- store volatile <32 x float > %v2 , <32 x float > addrspace (1 )* undef
279
- store volatile <32 x float > %v3 , <32 x float > addrspace (1 )* undef
280
- store volatile <32 x float > %v4 , <32 x float > addrspace (1 )* undef
281
- store volatile <32 x float > %v5 , <32 x float > addrspace (1 )* undef
282
- store volatile <32 x float > %v6 , <32 x float > addrspace (1 )* undef
283
- store volatile <32 x float > %v7 , <32 x float > addrspace (1 )* undef
284
- store volatile <32 x float > %v8 , <32 x float > addrspace (1 )* undef
285
- store volatile <32 x float > %v9 , <32 x float > addrspace (1 )* undef
286
- ret void
287
- }
288
-
289
- ; Make sure there's no crash when we have loads from fixed stack
290
- ; objects and are processing VGPR spills
291
-
292
- ; GCN-LABEL: {{^}}stack_args_vgpr_spill:
293
- ; GFX908: v_accvgpr_write_b32
294
- ; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32
295
- ; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
296
- define void @stack_args_vgpr_spill (<32 x float > %arg0 , <32 x float > %arg1 , <32 x float > addrspace (1 )* %p ) #1 {
297
- %tid = call i32 @llvm.amdgcn.workitem.id.x ()
298
- %p1 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p , i32 %tid
299
- %p2 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p1 , i32 %tid
300
- %p3 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p2 , i32 %tid
301
- %p4 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p3 , i32 %tid
302
- %p5 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p4 , i32 %tid
303
- %p6 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p5 , i32 %tid
304
- %p7 = getelementptr inbounds <32 x float >, <32 x float > addrspace (1 )* %p6 , i32 %tid
305
- %v1 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p1
306
- %v2 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p2
307
- %v3 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p3
308
- %v4 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p4
309
- %v5 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p5
310
- %v6 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p6
311
- %v7 = load volatile <32 x float >, <32 x float > addrspace (1 )* %p7
312
- br label %st
313
-
314
- st:
315
- store volatile <32 x float > %arg0 , <32 x float > addrspace (1 )* undef
316
- store volatile <32 x float > %arg1 , <32 x float > addrspace (1 )* undef
317
- store volatile <32 x float > %v1 , <32 x float > addrspace (1 )* undef
318
- store volatile <32 x float > %v2 , <32 x float > addrspace (1 )* undef
319
- store volatile <32 x float > %v3 , <32 x float > addrspace (1 )* undef
320
- store volatile <32 x float > %v4 , <32 x float > addrspace (1 )* undef
321
- store volatile <32 x float > %v5 , <32 x float > addrspace (1 )* undef
322
- store volatile <32 x float > %v6 , <32 x float > addrspace (1 )* undef
323
- store volatile <32 x float > %v7 , <32 x float > addrspace (1 )* undef
324
- ret void
325
- }
326
-
327
-
328
- declare i32 @llvm.amdgcn.workitem.id.x ()
329
-
330
108
attributes #0 = { nounwind "amdgpu-num-vgpr" ="10" }
331
- attributes #1 = { "amdgpu-flat-work-group-size" ="1,256" }
0 commit comments