@@ -171,3 +171,173 @@ define double @larger_fp_scalar_256bit_vec(<8 x float>* align 32 dereferenceable
171
171
%r = load double , double * %bc , align 32
172
172
ret double %r
173
173
}
174
+
175
+ define <4 x float > @load_f32_insert_v4f32 (float * align 16 dereferenceable (16 ) %p ) {
176
+ ; CHECK-LABEL: @load_f32_insert_v4f32(
177
+ ; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4
178
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
179
+ ; CHECK-NEXT: ret <4 x float> [[R]]
180
+ ;
181
+ %s = load float , float * %p , align 4
182
+ %r = insertelement <4 x float > undef , float %s , i32 0
183
+ ret <4 x float > %r
184
+ }
185
+
186
+ define <4 x float > @casted_load_f32_insert_v4f32 (<4 x float >* align 4 dereferenceable (16 ) %p ) {
187
+ ; CHECK-LABEL: @casted_load_f32_insert_v4f32(
188
+ ; CHECK-NEXT: [[B:%.*]] = bitcast <4 x float>* [[P:%.*]] to float*
189
+ ; CHECK-NEXT: [[S:%.*]] = load float, float* [[B]], align 4
190
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
191
+ ; CHECK-NEXT: ret <4 x float> [[R]]
192
+ ;
193
+ %b = bitcast <4 x float >* %p to float *
194
+ %s = load float , float * %b , align 4
195
+ %r = insertelement <4 x float > undef , float %s , i32 0
196
+ ret <4 x float > %r
197
+ }
198
+
199
+ define <4 x i32 > @load_i32_insert_v4i32 (i32* align 16 dereferenceable (16 ) %p ) {
200
+ ; CHECK-LABEL: @load_i32_insert_v4i32(
201
+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[P:%.*]], align 4
202
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i32 0
203
+ ; CHECK-NEXT: ret <4 x i32> [[R]]
204
+ ;
205
+ %s = load i32 , i32* %p , align 4
206
+ %r = insertelement <4 x i32 > undef , i32 %s , i32 0
207
+ ret <4 x i32 > %r
208
+ }
209
+
210
+ define <4 x i32 > @casted_load_i32_insert_v4i32 (<16 x i8 >* align 4 dereferenceable (16 ) %p ) {
211
+ ; CHECK-LABEL: @casted_load_i32_insert_v4i32(
212
+ ; CHECK-NEXT: [[B:%.*]] = bitcast <16 x i8>* [[P:%.*]] to i32*
213
+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 4
214
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i32 0
215
+ ; CHECK-NEXT: ret <4 x i32> [[R]]
216
+ ;
217
+ %b = bitcast <16 x i8 >* %p to i32*
218
+ %s = load i32 , i32* %b , align 4
219
+ %r = insertelement <4 x i32 > undef , i32 %s , i32 0
220
+ ret <4 x i32 > %r
221
+ }
222
+
223
+ define <4 x float > @gep00_load_f32_insert_v4f32 (<4 x float >* align 16 dereferenceable (16 ) %p ) {
224
+ ; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
225
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 0
226
+ ; CHECK-NEXT: [[S:%.*]] = load float, float* [[GEP]], align 16
227
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i64 0
228
+ ; CHECK-NEXT: ret <4 x float> [[R]]
229
+ ;
230
+ %gep = getelementptr inbounds <4 x float >, <4 x float >* %p , i64 0 , i64 0
231
+ %s = load float , float * %gep , align 16
232
+ %r = insertelement <4 x float > undef , float %s , i64 0
233
+ ret <4 x float > %r
234
+ }
235
+
236
+ define <8 x i16 > @gep01_load_i16_insert_v8i16 (<8 x i16 >* align 16 dereferenceable (18 ) %p ) {
237
+ ; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
238
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
239
+ ; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 2
240
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
241
+ ; CHECK-NEXT: ret <8 x i16> [[R]]
242
+ ;
243
+ %gep = getelementptr inbounds <8 x i16 >, <8 x i16 >* %p , i64 0 , i64 1
244
+ %s = load i16 , i16* %gep , align 2
245
+ %r = insertelement <8 x i16 > undef , i16 %s , i64 0
246
+ ret <8 x i16 > %r
247
+ }
248
+
249
+ define <8 x i16 > @gep01_load_i16_insert_v8i16_deref (<8 x i16 >* align 16 dereferenceable (17 ) %p ) {
250
+ ; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref(
251
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
252
+ ; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 2
253
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
254
+ ; CHECK-NEXT: ret <8 x i16> [[R]]
255
+ ;
256
+ %gep = getelementptr inbounds <8 x i16 >, <8 x i16 >* %p , i64 0 , i64 1
257
+ %s = load i16 , i16* %gep , align 2
258
+ %r = insertelement <8 x i16 > undef , i16 %s , i64 0
259
+ ret <8 x i16 > %r
260
+ }
261
+
262
+ define <8 x i16 > @gep10_load_i16_insert_v8i16 (<8 x i16 >* align 16 dereferenceable (32 ) %p ) {
263
+ ; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
264
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
265
+ ; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16
266
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
267
+ ; CHECK-NEXT: ret <8 x i16> [[R]]
268
+ ;
269
+ %gep = getelementptr inbounds <8 x i16 >, <8 x i16 >* %p , i64 1 , i64 0
270
+ %s = load i16 , i16* %gep , align 16
271
+ %r = insertelement <8 x i16 > undef , i16 %s , i64 0
272
+ ret <8 x i16 > %r
273
+ }
274
+
275
+ define <8 x i16 > @gep10_load_i16_insert_v8i16_deref (<8 x i16 >* align 16 dereferenceable (31 ) %p ) {
276
+ ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_deref(
277
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
278
+ ; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16
279
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
280
+ ; CHECK-NEXT: ret <8 x i16> [[R]]
281
+ ;
282
+ %gep = getelementptr inbounds <8 x i16 >, <8 x i16 >* %p , i64 1 , i64 0
283
+ %s = load i16 , i16* %gep , align 16
284
+ %r = insertelement <8 x i16 > undef , i16 %s , i64 0
285
+ ret <8 x i16 > %r
286
+ }
287
+
288
+ define <4 x float > @load_f32_insert_v4f32_volatile (float * align 16 dereferenceable (16 ) %p ) {
289
+ ; CHECK-LABEL: @load_f32_insert_v4f32_volatile(
290
+ ; CHECK-NEXT: [[S:%.*]] = load volatile float, float* [[P:%.*]], align 4
291
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
292
+ ; CHECK-NEXT: ret <4 x float> [[R]]
293
+ ;
294
+ %s = load volatile float , float * %p , align 4
295
+ %r = insertelement <4 x float > undef , float %s , i32 0
296
+ ret <4 x float > %r
297
+ }
298
+
299
+ define <4 x float > @load_f32_insert_v4f32_align (float * align 1 dereferenceable (16 ) %p ) {
300
+ ; CHECK-LABEL: @load_f32_insert_v4f32_align(
301
+ ; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4
302
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
303
+ ; CHECK-NEXT: ret <4 x float> [[R]]
304
+ ;
305
+ %s = load float , float * %p , align 4
306
+ %r = insertelement <4 x float > undef , float %s , i32 0
307
+ ret <4 x float > %r
308
+ }
309
+
310
+ define <4 x float > @load_f32_insert_v4f32_deref (float * align 4 dereferenceable (15 ) %p ) {
311
+ ; CHECK-LABEL: @load_f32_insert_v4f32_deref(
312
+ ; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4
313
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
314
+ ; CHECK-NEXT: ret <4 x float> [[R]]
315
+ ;
316
+ %s = load float , float * %p , align 4
317
+ %r = insertelement <4 x float > undef , float %s , i32 0
318
+ ret <4 x float > %r
319
+ }
320
+
321
+ define <8 x i32 > @load_i32_insert_v8i32 (i32* align 16 dereferenceable (16 ) %p ) {
322
+ ; CHECK-LABEL: @load_i32_insert_v8i32(
323
+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[P:%.*]], align 4
324
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
325
+ ; CHECK-NEXT: ret <8 x i32> [[R]]
326
+ ;
327
+ %s = load i32 , i32* %p , align 4
328
+ %r = insertelement <8 x i32 > undef , i32 %s , i32 0
329
+ ret <8 x i32 > %r
330
+ }
331
+
332
+ define <8 x i32 > @casted_load_i32_insert_v8i32 (<4 x i32 >* align 4 dereferenceable (16 ) %p ) {
333
+ ; CHECK-LABEL: @casted_load_i32_insert_v8i32(
334
+ ; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i32>* [[P:%.*]] to i32*
335
+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 4
336
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
337
+ ; CHECK-NEXT: ret <8 x i32> [[R]]
338
+ ;
339
+ %b = bitcast <4 x i32 >* %p to i32*
340
+ %s = load i32 , i32* %b , align 4
341
+ %r = insertelement <8 x i32 > undef , i32 %s , i32 0
342
+ ret <8 x i32 > %r
343
+ }
0 commit comments