Skip to content

Commit d045cc9

Browse files
committed
[AMDGPU] trunc-bitcast-vector.ll - regenerate test checks
1 parent 3a18c09 commit d045cc9

File tree

1 file changed

+267
-27
lines changed

1 file changed

+267
-27
lines changed

llvm/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll

Lines changed: 267 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,123 @@
1-
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2-
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
3+
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
34

4-
; GCN-LABEL: {{^}}trunc_i64_bitcast_v2i32:
5-
; GCN: buffer_load_dword v
6-
; GCN: buffer_store_dword v
75
define amdgpu_kernel void @trunc_i64_bitcast_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
6+
; SI-LABEL: trunc_i64_bitcast_v2i32:
7+
; SI: ; %bb.0:
8+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
9+
; SI-NEXT: s_mov_b32 s7, 0xf000
10+
; SI-NEXT: s_mov_b32 s6, -1
11+
; SI-NEXT: s_mov_b32 s10, s6
12+
; SI-NEXT: s_mov_b32 s11, s7
13+
; SI-NEXT: s_waitcnt lgkmcnt(0)
14+
; SI-NEXT: s_mov_b32 s8, s2
15+
; SI-NEXT: s_mov_b32 s9, s3
16+
; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
17+
; SI-NEXT: s_mov_b32 s4, s0
18+
; SI-NEXT: s_mov_b32 s5, s1
19+
; SI-NEXT: s_waitcnt vmcnt(0)
20+
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
21+
; SI-NEXT: s_endpgm
22+
;
23+
; VI-LABEL: trunc_i64_bitcast_v2i32:
24+
; VI: ; %bb.0:
25+
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
26+
; VI-NEXT: s_mov_b32 s7, 0xf000
27+
; VI-NEXT: s_mov_b32 s6, -1
28+
; VI-NEXT: s_mov_b32 s10, s6
29+
; VI-NEXT: s_mov_b32 s11, s7
30+
; VI-NEXT: s_waitcnt lgkmcnt(0)
31+
; VI-NEXT: s_mov_b32 s8, s2
32+
; VI-NEXT: s_mov_b32 s9, s3
33+
; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
34+
; VI-NEXT: s_mov_b32 s4, s0
35+
; VI-NEXT: s_mov_b32 s5, s1
36+
; VI-NEXT: s_waitcnt vmcnt(0)
37+
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
38+
; VI-NEXT: s_endpgm
839
%ld = load <2 x i32>, ptr addrspace(1) %in
940
%bc = bitcast <2 x i32> %ld to i64
1041
%trunc = trunc i64 %bc to i32
1142
store i32 %trunc, ptr addrspace(1) %out
1243
ret void
1344
}
1445

15-
; GCN-LABEL: {{^}}trunc_i96_bitcast_v3i32:
16-
; GCN: buffer_load_dword v
17-
; GCN: buffer_store_dword v
1846
define amdgpu_kernel void @trunc_i96_bitcast_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
47+
; SI-LABEL: trunc_i96_bitcast_v3i32:
48+
; SI: ; %bb.0:
49+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
50+
; SI-NEXT: s_mov_b32 s7, 0xf000
51+
; SI-NEXT: s_mov_b32 s6, -1
52+
; SI-NEXT: s_mov_b32 s10, s6
53+
; SI-NEXT: s_mov_b32 s11, s7
54+
; SI-NEXT: s_waitcnt lgkmcnt(0)
55+
; SI-NEXT: s_mov_b32 s8, s2
56+
; SI-NEXT: s_mov_b32 s9, s3
57+
; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
58+
; SI-NEXT: s_mov_b32 s4, s0
59+
; SI-NEXT: s_mov_b32 s5, s1
60+
; SI-NEXT: s_waitcnt vmcnt(0)
61+
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
62+
; SI-NEXT: s_endpgm
63+
;
64+
; VI-LABEL: trunc_i96_bitcast_v3i32:
65+
; VI: ; %bb.0:
66+
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
67+
; VI-NEXT: s_mov_b32 s7, 0xf000
68+
; VI-NEXT: s_mov_b32 s6, -1
69+
; VI-NEXT: s_mov_b32 s10, s6
70+
; VI-NEXT: s_mov_b32 s11, s7
71+
; VI-NEXT: s_waitcnt lgkmcnt(0)
72+
; VI-NEXT: s_mov_b32 s8, s2
73+
; VI-NEXT: s_mov_b32 s9, s3
74+
; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
75+
; VI-NEXT: s_mov_b32 s4, s0
76+
; VI-NEXT: s_mov_b32 s5, s1
77+
; VI-NEXT: s_waitcnt vmcnt(0)
78+
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
79+
; VI-NEXT: s_endpgm
1980
%ld = load <3 x i32>, ptr addrspace(1) %in
2081
%bc = bitcast <3 x i32> %ld to i96
2182
%trunc = trunc i96 %bc to i32
2283
store i32 %trunc, ptr addrspace(1) %out
2384
ret void
2485
}
2586

26-
; GCN-LABEL: {{^}}trunc_i128_bitcast_v4i32:
27-
; GCN: buffer_load_dword v
28-
; GCN: buffer_store_dword v
2987
define amdgpu_kernel void @trunc_i128_bitcast_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
88+
; SI-LABEL: trunc_i128_bitcast_v4i32:
89+
; SI: ; %bb.0:
90+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
91+
; SI-NEXT: s_mov_b32 s7, 0xf000
92+
; SI-NEXT: s_mov_b32 s6, -1
93+
; SI-NEXT: s_mov_b32 s10, s6
94+
; SI-NEXT: s_mov_b32 s11, s7
95+
; SI-NEXT: s_waitcnt lgkmcnt(0)
96+
; SI-NEXT: s_mov_b32 s8, s2
97+
; SI-NEXT: s_mov_b32 s9, s3
98+
; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
99+
; SI-NEXT: s_mov_b32 s4, s0
100+
; SI-NEXT: s_mov_b32 s5, s1
101+
; SI-NEXT: s_waitcnt vmcnt(0)
102+
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
103+
; SI-NEXT: s_endpgm
104+
;
105+
; VI-LABEL: trunc_i128_bitcast_v4i32:
106+
; VI: ; %bb.0:
107+
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
108+
; VI-NEXT: s_mov_b32 s7, 0xf000
109+
; VI-NEXT: s_mov_b32 s6, -1
110+
; VI-NEXT: s_mov_b32 s10, s6
111+
; VI-NEXT: s_mov_b32 s11, s7
112+
; VI-NEXT: s_waitcnt lgkmcnt(0)
113+
; VI-NEXT: s_mov_b32 s8, s2
114+
; VI-NEXT: s_mov_b32 s9, s3
115+
; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
116+
; VI-NEXT: s_mov_b32 s4, s0
117+
; VI-NEXT: s_mov_b32 s5, s1
118+
; VI-NEXT: s_waitcnt vmcnt(0)
119+
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
120+
; VI-NEXT: s_endpgm
30121
%ld = load <4 x i32>, ptr addrspace(1) %in
31122
%bc = bitcast <4 x i32> %ld to i128
32123
%trunc = trunc i128 %bc to i32
@@ -35,25 +126,85 @@ define amdgpu_kernel void @trunc_i128_bitcast_v4i32(ptr addrspace(1) %out, ptr a
35126
}
36127

37128
; Don't want load width reduced in this case.
38-
; GCN-LABEL: {{^}}trunc_i16_bitcast_v2i16:
39-
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
40-
; GCN: buffer_store_short [[VAL]]
41129
define amdgpu_kernel void @trunc_i16_bitcast_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
130+
; SI-LABEL: trunc_i16_bitcast_v2i16:
131+
; SI: ; %bb.0:
132+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
133+
; SI-NEXT: s_mov_b32 s7, 0xf000
134+
; SI-NEXT: s_mov_b32 s6, -1
135+
; SI-NEXT: s_mov_b32 s10, s6
136+
; SI-NEXT: s_mov_b32 s11, s7
137+
; SI-NEXT: s_waitcnt lgkmcnt(0)
138+
; SI-NEXT: s_mov_b32 s8, s2
139+
; SI-NEXT: s_mov_b32 s9, s3
140+
; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
141+
; SI-NEXT: s_mov_b32 s4, s0
142+
; SI-NEXT: s_mov_b32 s5, s1
143+
; SI-NEXT: s_waitcnt vmcnt(0)
144+
; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
145+
; SI-NEXT: s_endpgm
146+
;
147+
; VI-LABEL: trunc_i16_bitcast_v2i16:
148+
; VI: ; %bb.0:
149+
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
150+
; VI-NEXT: s_mov_b32 s7, 0xf000
151+
; VI-NEXT: s_mov_b32 s6, -1
152+
; VI-NEXT: s_mov_b32 s10, s6
153+
; VI-NEXT: s_mov_b32 s11, s7
154+
; VI-NEXT: s_waitcnt lgkmcnt(0)
155+
; VI-NEXT: s_mov_b32 s8, s2
156+
; VI-NEXT: s_mov_b32 s9, s3
157+
; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
158+
; VI-NEXT: s_mov_b32 s4, s0
159+
; VI-NEXT: s_mov_b32 s5, s1
160+
; VI-NEXT: s_waitcnt vmcnt(0)
161+
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
162+
; VI-NEXT: s_endpgm
42163
%ld = load <2 x i16>, ptr addrspace(1) %in
43164
%bc = bitcast <2 x i16> %ld to i32
44165
%trunc = trunc i32 %bc to i16
45166
store i16 %trunc, ptr addrspace(1) %out
46167
ret void
47168
}
48169

49-
; GCN-LABEL: {{^}}trunc_i16_bitcast_v4i16:
50170
; FIXME We need to teach the dagcombiner to reduce load width for:
51171
; t21: v2i32,ch = load<LD8[%in(addrspace=1)]> t12, t10, undef:i64
52172
; t23: i64 = bitcast t21
53173
; t30: i16 = truncate t23
54-
; GCN: buffer_load_dword v[[VAL:[0-9]+]]
55-
; GCN: buffer_store_short v[[VAL]], off
56174
define amdgpu_kernel void @trunc_i16_bitcast_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
175+
; SI-LABEL: trunc_i16_bitcast_v4i16:
176+
; SI: ; %bb.0:
177+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
178+
; SI-NEXT: s_mov_b32 s7, 0xf000
179+
; SI-NEXT: s_mov_b32 s6, -1
180+
; SI-NEXT: s_mov_b32 s10, s6
181+
; SI-NEXT: s_mov_b32 s11, s7
182+
; SI-NEXT: s_waitcnt lgkmcnt(0)
183+
; SI-NEXT: s_mov_b32 s8, s2
184+
; SI-NEXT: s_mov_b32 s9, s3
185+
; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
186+
; SI-NEXT: s_mov_b32 s4, s0
187+
; SI-NEXT: s_mov_b32 s5, s1
188+
; SI-NEXT: s_waitcnt vmcnt(0)
189+
; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
190+
; SI-NEXT: s_endpgm
191+
;
192+
; VI-LABEL: trunc_i16_bitcast_v4i16:
193+
; VI: ; %bb.0:
194+
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
195+
; VI-NEXT: s_mov_b32 s7, 0xf000
196+
; VI-NEXT: s_mov_b32 s6, -1
197+
; VI-NEXT: s_mov_b32 s10, s6
198+
; VI-NEXT: s_mov_b32 s11, s7
199+
; VI-NEXT: s_waitcnt lgkmcnt(0)
200+
; VI-NEXT: s_mov_b32 s8, s2
201+
; VI-NEXT: s_mov_b32 s9, s3
202+
; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
203+
; VI-NEXT: s_mov_b32 s4, s0
204+
; VI-NEXT: s_mov_b32 s5, s1
205+
; VI-NEXT: s_waitcnt vmcnt(0)
206+
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
207+
; VI-NEXT: s_endpgm
57208
%ld = load <4 x i16>, ptr addrspace(1) %in
58209
%bc = bitcast <4 x i16> %ld to i64
59210
%trunc = trunc i64 %bc to i16
@@ -62,33 +213,122 @@ define amdgpu_kernel void @trunc_i16_bitcast_v4i16(ptr addrspace(1) %out, ptr ad
62213
}
63214

64215
; FIXME: Consistently shrink or not here
65-
; GCN-LABEL: {{^}}trunc_i8_bitcast_v2i8:
66-
; SI: buffer_load_ubyte [[VAL:v[0-9]+]]
67-
; VI: buffer_load_ushort [[VAL:v[0-9]+]]
68-
; GCN: buffer_store_byte [[VAL]]
69216
define amdgpu_kernel void @trunc_i8_bitcast_v2i8(ptr addrspace(1) %out, ptr addrspace(1) %in) {
217+
; SI-LABEL: trunc_i8_bitcast_v2i8:
218+
; SI: ; %bb.0:
219+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
220+
; SI-NEXT: s_mov_b32 s7, 0xf000
221+
; SI-NEXT: s_mov_b32 s6, -1
222+
; SI-NEXT: s_mov_b32 s10, s6
223+
; SI-NEXT: s_mov_b32 s11, s7
224+
; SI-NEXT: s_waitcnt lgkmcnt(0)
225+
; SI-NEXT: s_mov_b32 s8, s2
226+
; SI-NEXT: s_mov_b32 s9, s3
227+
; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
228+
; SI-NEXT: s_mov_b32 s4, s0
229+
; SI-NEXT: s_mov_b32 s5, s1
230+
; SI-NEXT: s_waitcnt vmcnt(0)
231+
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
232+
; SI-NEXT: s_endpgm
233+
;
234+
; VI-LABEL: trunc_i8_bitcast_v2i8:
235+
; VI: ; %bb.0:
236+
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
237+
; VI-NEXT: s_mov_b32 s7, 0xf000
238+
; VI-NEXT: s_mov_b32 s6, -1
239+
; VI-NEXT: s_mov_b32 s10, s6
240+
; VI-NEXT: s_mov_b32 s11, s7
241+
; VI-NEXT: s_waitcnt lgkmcnt(0)
242+
; VI-NEXT: s_mov_b32 s8, s2
243+
; VI-NEXT: s_mov_b32 s9, s3
244+
; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
245+
; VI-NEXT: s_mov_b32 s4, s0
246+
; VI-NEXT: s_mov_b32 s5, s1
247+
; VI-NEXT: s_waitcnt vmcnt(0)
248+
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
249+
; VI-NEXT: s_endpgm
70250
%ld = load <2 x i8>, ptr addrspace(1) %in
71251
%bc = bitcast <2 x i8> %ld to i16
72252
%trunc = trunc i16 %bc to i8
73253
store i8 %trunc, ptr addrspace(1) %out
74254
ret void
75255
}
76256

77-
; GCN-LABEL: {{^}}trunc_i32_bitcast_v4i8:
78-
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
79-
; GCN: buffer_store_byte [[VAL]]
80257
define amdgpu_kernel void @trunc_i32_bitcast_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) {
258+
; SI-LABEL: trunc_i32_bitcast_v4i8:
259+
; SI: ; %bb.0:
260+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
261+
; SI-NEXT: s_mov_b32 s7, 0xf000
262+
; SI-NEXT: s_mov_b32 s6, -1
263+
; SI-NEXT: s_mov_b32 s10, s6
264+
; SI-NEXT: s_mov_b32 s11, s7
265+
; SI-NEXT: s_waitcnt lgkmcnt(0)
266+
; SI-NEXT: s_mov_b32 s8, s2
267+
; SI-NEXT: s_mov_b32 s9, s3
268+
; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
269+
; SI-NEXT: s_mov_b32 s4, s0
270+
; SI-NEXT: s_mov_b32 s5, s1
271+
; SI-NEXT: s_waitcnt vmcnt(0)
272+
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
273+
; SI-NEXT: s_endpgm
274+
;
275+
; VI-LABEL: trunc_i32_bitcast_v4i8:
276+
; VI: ; %bb.0:
277+
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
278+
; VI-NEXT: s_mov_b32 s7, 0xf000
279+
; VI-NEXT: s_mov_b32 s6, -1
280+
; VI-NEXT: s_mov_b32 s10, s6
281+
; VI-NEXT: s_mov_b32 s11, s7
282+
; VI-NEXT: s_waitcnt lgkmcnt(0)
283+
; VI-NEXT: s_mov_b32 s8, s2
284+
; VI-NEXT: s_mov_b32 s9, s3
285+
; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
286+
; VI-NEXT: s_mov_b32 s4, s0
287+
; VI-NEXT: s_mov_b32 s5, s1
288+
; VI-NEXT: s_waitcnt vmcnt(0)
289+
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
290+
; VI-NEXT: s_endpgm
81291
%ld = load <4 x i8>, ptr addrspace(1) %in
82292
%bc = bitcast <4 x i8> %ld to i32
83293
%trunc = trunc i32 %bc to i8
84294
store i8 %trunc, ptr addrspace(1) %out
85295
ret void
86296
}
87297

88-
; GCN-LABEL: {{^}}trunc_i24_bitcast_v3i8:
89-
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
90-
; GCN: buffer_store_byte [[VAL]]
91298
define amdgpu_kernel void @trunc_i24_bitcast_v3i8(ptr addrspace(1) %out, ptr addrspace(1) %in) {
299+
; SI-LABEL: trunc_i24_bitcast_v3i8:
300+
; SI: ; %bb.0:
301+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
302+
; SI-NEXT: s_mov_b32 s7, 0xf000
303+
; SI-NEXT: s_mov_b32 s6, -1
304+
; SI-NEXT: s_mov_b32 s10, s6
305+
; SI-NEXT: s_mov_b32 s11, s7
306+
; SI-NEXT: s_waitcnt lgkmcnt(0)
307+
; SI-NEXT: s_mov_b32 s8, s2
308+
; SI-NEXT: s_mov_b32 s9, s3
309+
; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
310+
; SI-NEXT: s_mov_b32 s4, s0
311+
; SI-NEXT: s_mov_b32 s5, s1
312+
; SI-NEXT: s_waitcnt vmcnt(0)
313+
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
314+
; SI-NEXT: s_endpgm
315+
;
316+
; VI-LABEL: trunc_i24_bitcast_v3i8:
317+
; VI: ; %bb.0:
318+
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
319+
; VI-NEXT: s_mov_b32 s7, 0xf000
320+
; VI-NEXT: s_mov_b32 s6, -1
321+
; VI-NEXT: s_mov_b32 s10, s6
322+
; VI-NEXT: s_mov_b32 s11, s7
323+
; VI-NEXT: s_waitcnt lgkmcnt(0)
324+
; VI-NEXT: s_mov_b32 s8, s2
325+
; VI-NEXT: s_mov_b32 s9, s3
326+
; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
327+
; VI-NEXT: s_mov_b32 s4, s0
328+
; VI-NEXT: s_mov_b32 s5, s1
329+
; VI-NEXT: s_waitcnt vmcnt(0)
330+
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
331+
; VI-NEXT: s_endpgm
92332
%ld = load <3 x i8>, ptr addrspace(1) %in
93333
%bc = bitcast <3 x i8> %ld to i24
94334
%trunc = trunc i24 %bc to i8

0 commit comments

Comments
 (0)