1
- ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2
- ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
+ ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
3
+ ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
3
4
4
- ; GCN-LABEL: {{^}}trunc_i64_bitcast_v2i32:
5
- ; GCN: buffer_load_dword v
6
- ; GCN: buffer_store_dword v
7
5
define amdgpu_kernel void @trunc_i64_bitcast_v2i32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
6
+ ; SI-LABEL: trunc_i64_bitcast_v2i32:
7
+ ; SI: ; %bb.0:
8
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
9
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
10
+ ; SI-NEXT: s_mov_b32 s6, -1
11
+ ; SI-NEXT: s_mov_b32 s10, s6
12
+ ; SI-NEXT: s_mov_b32 s11, s7
13
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
14
+ ; SI-NEXT: s_mov_b32 s8, s2
15
+ ; SI-NEXT: s_mov_b32 s9, s3
16
+ ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
17
+ ; SI-NEXT: s_mov_b32 s4, s0
18
+ ; SI-NEXT: s_mov_b32 s5, s1
19
+ ; SI-NEXT: s_waitcnt vmcnt(0)
20
+ ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
21
+ ; SI-NEXT: s_endpgm
22
+ ;
23
+ ; VI-LABEL: trunc_i64_bitcast_v2i32:
24
+ ; VI: ; %bb.0:
25
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
26
+ ; VI-NEXT: s_mov_b32 s7, 0xf000
27
+ ; VI-NEXT: s_mov_b32 s6, -1
28
+ ; VI-NEXT: s_mov_b32 s10, s6
29
+ ; VI-NEXT: s_mov_b32 s11, s7
30
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
31
+ ; VI-NEXT: s_mov_b32 s8, s2
32
+ ; VI-NEXT: s_mov_b32 s9, s3
33
+ ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
34
+ ; VI-NEXT: s_mov_b32 s4, s0
35
+ ; VI-NEXT: s_mov_b32 s5, s1
36
+ ; VI-NEXT: s_waitcnt vmcnt(0)
37
+ ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
38
+ ; VI-NEXT: s_endpgm
8
39
%ld = load <2 x i32 >, ptr addrspace (1 ) %in
9
40
%bc = bitcast <2 x i32 > %ld to i64
10
41
%trunc = trunc i64 %bc to i32
11
42
store i32 %trunc , ptr addrspace (1 ) %out
12
43
ret void
13
44
}
14
45
15
- ; GCN-LABEL: {{^}}trunc_i96_bitcast_v3i32:
16
- ; GCN: buffer_load_dword v
17
- ; GCN: buffer_store_dword v
18
46
define amdgpu_kernel void @trunc_i96_bitcast_v3i32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
47
+ ; SI-LABEL: trunc_i96_bitcast_v3i32:
48
+ ; SI: ; %bb.0:
49
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
50
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
51
+ ; SI-NEXT: s_mov_b32 s6, -1
52
+ ; SI-NEXT: s_mov_b32 s10, s6
53
+ ; SI-NEXT: s_mov_b32 s11, s7
54
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
55
+ ; SI-NEXT: s_mov_b32 s8, s2
56
+ ; SI-NEXT: s_mov_b32 s9, s3
57
+ ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
58
+ ; SI-NEXT: s_mov_b32 s4, s0
59
+ ; SI-NEXT: s_mov_b32 s5, s1
60
+ ; SI-NEXT: s_waitcnt vmcnt(0)
61
+ ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
62
+ ; SI-NEXT: s_endpgm
63
+ ;
64
+ ; VI-LABEL: trunc_i96_bitcast_v3i32:
65
+ ; VI: ; %bb.0:
66
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
67
+ ; VI-NEXT: s_mov_b32 s7, 0xf000
68
+ ; VI-NEXT: s_mov_b32 s6, -1
69
+ ; VI-NEXT: s_mov_b32 s10, s6
70
+ ; VI-NEXT: s_mov_b32 s11, s7
71
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
72
+ ; VI-NEXT: s_mov_b32 s8, s2
73
+ ; VI-NEXT: s_mov_b32 s9, s3
74
+ ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
75
+ ; VI-NEXT: s_mov_b32 s4, s0
76
+ ; VI-NEXT: s_mov_b32 s5, s1
77
+ ; VI-NEXT: s_waitcnt vmcnt(0)
78
+ ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
79
+ ; VI-NEXT: s_endpgm
19
80
%ld = load <3 x i32 >, ptr addrspace (1 ) %in
20
81
%bc = bitcast <3 x i32 > %ld to i96
21
82
%trunc = trunc i96 %bc to i32
22
83
store i32 %trunc , ptr addrspace (1 ) %out
23
84
ret void
24
85
}
25
86
26
- ; GCN-LABEL: {{^}}trunc_i128_bitcast_v4i32:
27
- ; GCN: buffer_load_dword v
28
- ; GCN: buffer_store_dword v
29
87
define amdgpu_kernel void @trunc_i128_bitcast_v4i32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
88
+ ; SI-LABEL: trunc_i128_bitcast_v4i32:
89
+ ; SI: ; %bb.0:
90
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
91
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
92
+ ; SI-NEXT: s_mov_b32 s6, -1
93
+ ; SI-NEXT: s_mov_b32 s10, s6
94
+ ; SI-NEXT: s_mov_b32 s11, s7
95
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
96
+ ; SI-NEXT: s_mov_b32 s8, s2
97
+ ; SI-NEXT: s_mov_b32 s9, s3
98
+ ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
99
+ ; SI-NEXT: s_mov_b32 s4, s0
100
+ ; SI-NEXT: s_mov_b32 s5, s1
101
+ ; SI-NEXT: s_waitcnt vmcnt(0)
102
+ ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
103
+ ; SI-NEXT: s_endpgm
104
+ ;
105
+ ; VI-LABEL: trunc_i128_bitcast_v4i32:
106
+ ; VI: ; %bb.0:
107
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
108
+ ; VI-NEXT: s_mov_b32 s7, 0xf000
109
+ ; VI-NEXT: s_mov_b32 s6, -1
110
+ ; VI-NEXT: s_mov_b32 s10, s6
111
+ ; VI-NEXT: s_mov_b32 s11, s7
112
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
113
+ ; VI-NEXT: s_mov_b32 s8, s2
114
+ ; VI-NEXT: s_mov_b32 s9, s3
115
+ ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
116
+ ; VI-NEXT: s_mov_b32 s4, s0
117
+ ; VI-NEXT: s_mov_b32 s5, s1
118
+ ; VI-NEXT: s_waitcnt vmcnt(0)
119
+ ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
120
+ ; VI-NEXT: s_endpgm
30
121
%ld = load <4 x i32 >, ptr addrspace (1 ) %in
31
122
%bc = bitcast <4 x i32 > %ld to i128
32
123
%trunc = trunc i128 %bc to i32
@@ -35,25 +126,85 @@ define amdgpu_kernel void @trunc_i128_bitcast_v4i32(ptr addrspace(1) %out, ptr a
35
126
}
36
127
37
128
; Don't want load width reduced in this case.
38
- ; GCN-LABEL: {{^}}trunc_i16_bitcast_v2i16:
39
- ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
40
- ; GCN: buffer_store_short [[VAL]]
41
129
define amdgpu_kernel void @trunc_i16_bitcast_v2i16 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
130
+ ; SI-LABEL: trunc_i16_bitcast_v2i16:
131
+ ; SI: ; %bb.0:
132
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
133
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
134
+ ; SI-NEXT: s_mov_b32 s6, -1
135
+ ; SI-NEXT: s_mov_b32 s10, s6
136
+ ; SI-NEXT: s_mov_b32 s11, s7
137
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
138
+ ; SI-NEXT: s_mov_b32 s8, s2
139
+ ; SI-NEXT: s_mov_b32 s9, s3
140
+ ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
141
+ ; SI-NEXT: s_mov_b32 s4, s0
142
+ ; SI-NEXT: s_mov_b32 s5, s1
143
+ ; SI-NEXT: s_waitcnt vmcnt(0)
144
+ ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
145
+ ; SI-NEXT: s_endpgm
146
+ ;
147
+ ; VI-LABEL: trunc_i16_bitcast_v2i16:
148
+ ; VI: ; %bb.0:
149
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
150
+ ; VI-NEXT: s_mov_b32 s7, 0xf000
151
+ ; VI-NEXT: s_mov_b32 s6, -1
152
+ ; VI-NEXT: s_mov_b32 s10, s6
153
+ ; VI-NEXT: s_mov_b32 s11, s7
154
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
155
+ ; VI-NEXT: s_mov_b32 s8, s2
156
+ ; VI-NEXT: s_mov_b32 s9, s3
157
+ ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
158
+ ; VI-NEXT: s_mov_b32 s4, s0
159
+ ; VI-NEXT: s_mov_b32 s5, s1
160
+ ; VI-NEXT: s_waitcnt vmcnt(0)
161
+ ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
162
+ ; VI-NEXT: s_endpgm
42
163
%ld = load <2 x i16 >, ptr addrspace (1 ) %in
43
164
%bc = bitcast <2 x i16 > %ld to i32
44
165
%trunc = trunc i32 %bc to i16
45
166
store i16 %trunc , ptr addrspace (1 ) %out
46
167
ret void
47
168
}
48
169
49
- ; GCN-LABEL: {{^}}trunc_i16_bitcast_v4i16:
50
170
; FIXME We need to teach the dagcombiner to reduce load width for:
51
171
; t21: v2i32,ch = load<LD8[%in(addrspace=1)]> t12, t10, undef:i64
52
172
; t23: i64 = bitcast t21
53
173
; t30: i16 = truncate t23
54
- ; GCN: buffer_load_dword v[[VAL:[0-9]+]]
55
- ; GCN: buffer_store_short v[[VAL]], off
56
174
define amdgpu_kernel void @trunc_i16_bitcast_v4i16 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
175
+ ; SI-LABEL: trunc_i16_bitcast_v4i16:
176
+ ; SI: ; %bb.0:
177
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
178
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
179
+ ; SI-NEXT: s_mov_b32 s6, -1
180
+ ; SI-NEXT: s_mov_b32 s10, s6
181
+ ; SI-NEXT: s_mov_b32 s11, s7
182
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
183
+ ; SI-NEXT: s_mov_b32 s8, s2
184
+ ; SI-NEXT: s_mov_b32 s9, s3
185
+ ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
186
+ ; SI-NEXT: s_mov_b32 s4, s0
187
+ ; SI-NEXT: s_mov_b32 s5, s1
188
+ ; SI-NEXT: s_waitcnt vmcnt(0)
189
+ ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
190
+ ; SI-NEXT: s_endpgm
191
+ ;
192
+ ; VI-LABEL: trunc_i16_bitcast_v4i16:
193
+ ; VI: ; %bb.0:
194
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
195
+ ; VI-NEXT: s_mov_b32 s7, 0xf000
196
+ ; VI-NEXT: s_mov_b32 s6, -1
197
+ ; VI-NEXT: s_mov_b32 s10, s6
198
+ ; VI-NEXT: s_mov_b32 s11, s7
199
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
200
+ ; VI-NEXT: s_mov_b32 s8, s2
201
+ ; VI-NEXT: s_mov_b32 s9, s3
202
+ ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
203
+ ; VI-NEXT: s_mov_b32 s4, s0
204
+ ; VI-NEXT: s_mov_b32 s5, s1
205
+ ; VI-NEXT: s_waitcnt vmcnt(0)
206
+ ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
207
+ ; VI-NEXT: s_endpgm
57
208
%ld = load <4 x i16 >, ptr addrspace (1 ) %in
58
209
%bc = bitcast <4 x i16 > %ld to i64
59
210
%trunc = trunc i64 %bc to i16
@@ -62,33 +213,122 @@ define amdgpu_kernel void @trunc_i16_bitcast_v4i16(ptr addrspace(1) %out, ptr ad
62
213
}
63
214
64
215
; FIXME: Consistently shrink or not here
65
- ; GCN-LABEL: {{^}}trunc_i8_bitcast_v2i8:
66
- ; SI: buffer_load_ubyte [[VAL:v[0-9]+]]
67
- ; VI: buffer_load_ushort [[VAL:v[0-9]+]]
68
- ; GCN: buffer_store_byte [[VAL]]
69
216
define amdgpu_kernel void @trunc_i8_bitcast_v2i8 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
217
+ ; SI-LABEL: trunc_i8_bitcast_v2i8:
218
+ ; SI: ; %bb.0:
219
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
220
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
221
+ ; SI-NEXT: s_mov_b32 s6, -1
222
+ ; SI-NEXT: s_mov_b32 s10, s6
223
+ ; SI-NEXT: s_mov_b32 s11, s7
224
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
225
+ ; SI-NEXT: s_mov_b32 s8, s2
226
+ ; SI-NEXT: s_mov_b32 s9, s3
227
+ ; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
228
+ ; SI-NEXT: s_mov_b32 s4, s0
229
+ ; SI-NEXT: s_mov_b32 s5, s1
230
+ ; SI-NEXT: s_waitcnt vmcnt(0)
231
+ ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
232
+ ; SI-NEXT: s_endpgm
233
+ ;
234
+ ; VI-LABEL: trunc_i8_bitcast_v2i8:
235
+ ; VI: ; %bb.0:
236
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
237
+ ; VI-NEXT: s_mov_b32 s7, 0xf000
238
+ ; VI-NEXT: s_mov_b32 s6, -1
239
+ ; VI-NEXT: s_mov_b32 s10, s6
240
+ ; VI-NEXT: s_mov_b32 s11, s7
241
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
242
+ ; VI-NEXT: s_mov_b32 s8, s2
243
+ ; VI-NEXT: s_mov_b32 s9, s3
244
+ ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
245
+ ; VI-NEXT: s_mov_b32 s4, s0
246
+ ; VI-NEXT: s_mov_b32 s5, s1
247
+ ; VI-NEXT: s_waitcnt vmcnt(0)
248
+ ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
249
+ ; VI-NEXT: s_endpgm
70
250
%ld = load <2 x i8 >, ptr addrspace (1 ) %in
71
251
%bc = bitcast <2 x i8 > %ld to i16
72
252
%trunc = trunc i16 %bc to i8
73
253
store i8 %trunc , ptr addrspace (1 ) %out
74
254
ret void
75
255
}
76
256
77
- ; GCN-LABEL: {{^}}trunc_i32_bitcast_v4i8:
78
- ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
79
- ; GCN: buffer_store_byte [[VAL]]
80
257
define amdgpu_kernel void @trunc_i32_bitcast_v4i8 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
258
+ ; SI-LABEL: trunc_i32_bitcast_v4i8:
259
+ ; SI: ; %bb.0:
260
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
261
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
262
+ ; SI-NEXT: s_mov_b32 s6, -1
263
+ ; SI-NEXT: s_mov_b32 s10, s6
264
+ ; SI-NEXT: s_mov_b32 s11, s7
265
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
266
+ ; SI-NEXT: s_mov_b32 s8, s2
267
+ ; SI-NEXT: s_mov_b32 s9, s3
268
+ ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
269
+ ; SI-NEXT: s_mov_b32 s4, s0
270
+ ; SI-NEXT: s_mov_b32 s5, s1
271
+ ; SI-NEXT: s_waitcnt vmcnt(0)
272
+ ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
273
+ ; SI-NEXT: s_endpgm
274
+ ;
275
+ ; VI-LABEL: trunc_i32_bitcast_v4i8:
276
+ ; VI: ; %bb.0:
277
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
278
+ ; VI-NEXT: s_mov_b32 s7, 0xf000
279
+ ; VI-NEXT: s_mov_b32 s6, -1
280
+ ; VI-NEXT: s_mov_b32 s10, s6
281
+ ; VI-NEXT: s_mov_b32 s11, s7
282
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
283
+ ; VI-NEXT: s_mov_b32 s8, s2
284
+ ; VI-NEXT: s_mov_b32 s9, s3
285
+ ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
286
+ ; VI-NEXT: s_mov_b32 s4, s0
287
+ ; VI-NEXT: s_mov_b32 s5, s1
288
+ ; VI-NEXT: s_waitcnt vmcnt(0)
289
+ ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
290
+ ; VI-NEXT: s_endpgm
81
291
%ld = load <4 x i8 >, ptr addrspace (1 ) %in
82
292
%bc = bitcast <4 x i8 > %ld to i32
83
293
%trunc = trunc i32 %bc to i8
84
294
store i8 %trunc , ptr addrspace (1 ) %out
85
295
ret void
86
296
}
87
297
88
- ; GCN-LABEL: {{^}}trunc_i24_bitcast_v3i8:
89
- ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
90
- ; GCN: buffer_store_byte [[VAL]]
91
298
define amdgpu_kernel void @trunc_i24_bitcast_v3i8 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
299
+ ; SI-LABEL: trunc_i24_bitcast_v3i8:
300
+ ; SI: ; %bb.0:
301
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
302
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
303
+ ; SI-NEXT: s_mov_b32 s6, -1
304
+ ; SI-NEXT: s_mov_b32 s10, s6
305
+ ; SI-NEXT: s_mov_b32 s11, s7
306
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
307
+ ; SI-NEXT: s_mov_b32 s8, s2
308
+ ; SI-NEXT: s_mov_b32 s9, s3
309
+ ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
310
+ ; SI-NEXT: s_mov_b32 s4, s0
311
+ ; SI-NEXT: s_mov_b32 s5, s1
312
+ ; SI-NEXT: s_waitcnt vmcnt(0)
313
+ ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
314
+ ; SI-NEXT: s_endpgm
315
+ ;
316
+ ; VI-LABEL: trunc_i24_bitcast_v3i8:
317
+ ; VI: ; %bb.0:
318
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
319
+ ; VI-NEXT: s_mov_b32 s7, 0xf000
320
+ ; VI-NEXT: s_mov_b32 s6, -1
321
+ ; VI-NEXT: s_mov_b32 s10, s6
322
+ ; VI-NEXT: s_mov_b32 s11, s7
323
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
324
+ ; VI-NEXT: s_mov_b32 s8, s2
325
+ ; VI-NEXT: s_mov_b32 s9, s3
326
+ ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
327
+ ; VI-NEXT: s_mov_b32 s4, s0
328
+ ; VI-NEXT: s_mov_b32 s5, s1
329
+ ; VI-NEXT: s_waitcnt vmcnt(0)
330
+ ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
331
+ ; VI-NEXT: s_endpgm
92
332
%ld = load <3 x i8 >, ptr addrspace (1 ) %in
93
333
%bc = bitcast <3 x i8 > %ld to i24
94
334
%trunc = trunc i24 %bc to i8
0 commit comments