@@ -338,6 +338,79 @@ loop.exit:
338
338
ret void
339
339
}
340
340
341
+ ; Same as @drop_vector_nuw_nsw, except built with avx1; in this case,
342
+ ; we make scalar clones of the 'sub' operation. These clones also need
343
+ ; cleared flags.
344
+ define void @drop_nonvector_nuw_nsw_avx1 (ptr noalias nocapture readonly %input , ptr %output , ptr noalias %ptrs ) local_unnamed_addr #1 {
345
+ ; CHECK-LABEL: define void @drop_nonvector_nuw_nsw_avx1(
346
+ ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]], ptr noalias [[PTRS:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
347
+ ; CHECK-NEXT: [[ENTRY:.*:]]
348
+ ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
349
+ ; CHECK: [[VECTOR_PH]]:
350
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
351
+ ; CHECK: [[VECTOR_BODY]]:
352
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
353
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
354
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
355
+ ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
356
+ ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
357
+ ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
358
+ ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
359
+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[TMP0]]
360
+ ; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1
361
+ ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1
362
+ ; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP2]], 1
363
+ ; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1
364
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP6]]
365
+ ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP7]]
366
+ ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP8]]
367
+ ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP9]]
368
+ ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP10]], i32 0
369
+ ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1
370
+ ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2
371
+ ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3
372
+ ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0
373
+ ; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP18]], align 8
374
+ ; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
375
+ ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[TMP10]], i32 0
376
+ ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP20]], i32 4, <4 x i1> [[TMP19]], <4 x float> poison), !invariant.load [[META0]]
377
+ ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
378
+ ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[TMP0]]
379
+ ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 0
380
+ ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP22]], align 4
381
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
382
+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
383
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
384
+ ; CHECK: [[MIDDLE_BLOCK]]:
385
+ ;
386
+ entry:
387
+ br label %loop.header
388
+
389
+ loop.header:
390
+ %iv = phi i64 [ 0 , %entry ], [ %iv.inc , %if.end ]
391
+ %i23 = icmp eq i64 %iv , 0
392
+ %gep = getelementptr inbounds ptr , ptr %ptrs , i64 %iv
393
+ %i27 = sub nuw nsw i64 %iv , 1
394
+ %i29 = getelementptr inbounds float , ptr %input , i64 %i27
395
+ store ptr %i29 , ptr %gep
396
+ br i1 %i23 , label %if.end , label %if.then
397
+
398
+ if.then:
399
+ %i30 = load float , ptr %i29 , align 4 , !invariant.load !0
400
+ br label %if.end
401
+
402
+ if.end:
403
+ %i34 = phi float [ 0 .000000e+00 , %loop.header ], [ %i30 , %if.then ]
404
+ %i35 = getelementptr inbounds float , ptr %output , i64 %iv
405
+ store float %i34 , ptr %i35 , align 4
406
+ %iv.inc = add nuw nsw i64 %iv , 1
407
+ %exitcond = icmp eq i64 %iv.inc , 4
408
+ br i1 %exitcond , label %loop.exit , label %loop.header
409
+
410
+ loop.exit:
411
+ ret void
412
+ }
413
+
341
414
; Preserve poison-generating flags from 'sub', which is not contributing to any address computation
342
415
; of any masked load/store/gather/scatter.
343
416
define void @preserve_nuw_nsw_no_addr (ptr %output ) local_unnamed_addr #0 {
@@ -358,7 +431,7 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
358
431
; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4
359
432
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
360
433
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
361
- ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15 :![0-9]+]]
434
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17 :![0-9]+]]
362
435
; CHECK: [[MIDDLE_BLOCK]]:
363
436
;
364
437
entry:
@@ -411,7 +484,7 @@ define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ptr %outpu
411
484
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4
412
485
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
413
486
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
414
- ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17 :![0-9]+]]
487
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19 :![0-9]+]]
415
488
; CHECK: [[MIDDLE_BLOCK]]:
416
489
;
417
490
entry:
@@ -465,7 +538,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
465
538
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
466
539
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
467
540
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
468
- ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19 :![0-9]+]]
541
+ ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21 :![0-9]+]]
469
542
; CHECK: [[MIDDLE_BLOCK]]:
470
543
;
471
544
entry:
@@ -520,7 +593,7 @@ define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input
520
593
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
521
594
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
522
595
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
523
- ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21 :![0-9]+]]
596
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23 :![0-9]+]]
524
597
; CHECK: [[MIDDLE_BLOCK]]:
525
598
;
526
599
entry:
@@ -572,7 +645,7 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 {
572
645
; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4
573
646
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
574
647
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
575
- ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23 :![0-9]+]]
648
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25 :![0-9]+]]
576
649
; CHECK: [[MIDDLE_BLOCK]]:
577
650
;
578
651
entry:
@@ -720,7 +793,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
720
793
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP16]], align 4
721
794
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
722
795
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
723
- ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25 :![0-9]+]]
796
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27 :![0-9]+]]
724
797
; CHECK: [[MIDDLE_BLOCK]]:
725
798
;
726
799
@@ -820,7 +893,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst
820
893
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP11]], align 4
821
894
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
822
895
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
823
- ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27 :![0-9]+]]
896
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29 :![0-9]+]]
824
897
; CHECK: [[MIDDLE_BLOCK]]:
825
898
;
826
899
@@ -879,7 +952,7 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read
879
952
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
880
953
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
881
954
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
882
- ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29 :![0-9]+]]
955
+ ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31 :![0-9]+]]
883
956
; CHECK: [[MIDDLE_BLOCK]]:
884
957
;
885
958
@@ -911,5 +984,6 @@ exit:
911
984
}
912
985
913
986
attributes #0 = { noinline nounwind uwtable "target-features" ="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
987
+ attributes #1 = { "target-features" ="+avx" }
914
988
915
989
!0 = !{}
0 commit comments