Skip to content

Commit 093afed

Browse files
authored
[VPlan] Fix miscompile after PR #142433. (#147398)
This fixes a bug introduced by aa24029, "[VPlan] Unroll VPReplicateRecipe by VF", which cloned a VPReplicateRecipe without transferring the flags from the original. That can cause incorrect nsw/nuw flags to be emitted on the new instructions, which may result in miscompiles. It turns out there were no test-cases in the repo which end up hitting the situation where the recipe requires instruction clones to have different flags from the underlying instruction. The existing tests covered the flags being correct when the replacement instruction is a vectorized version of the initial instruction, but not when it required clones. A new test is added covering this.
1 parent bc8aa97 commit 093afed

File tree

2 files changed

+83
-8
lines changed

2 files changed

+83
-8
lines changed

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
486486
auto *New =
487487
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
488488
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
489+
New->transferFlags(*RepR);
489490
New->insertBefore(RepR);
490491
return New;
491492
}

llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,79 @@ loop.exit:
338338
ret void
339339
}
340340

341+
; Same as @drop_vector_nuw_nsw, except built with avx1; in this case,
342+
; we make scalar clones of the 'sub' operation. These clones also need
343+
; cleared flags.
344+
define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ptr %output, ptr noalias %ptrs) local_unnamed_addr #1 {
345+
; CHECK-LABEL: define void @drop_nonvector_nuw_nsw_avx1(
346+
; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]], ptr noalias [[PTRS:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
347+
; CHECK-NEXT: [[ENTRY:.*:]]
348+
; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
349+
; CHECK: [[VECTOR_PH]]:
350+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
351+
; CHECK: [[VECTOR_BODY]]:
352+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
353+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
354+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
355+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
356+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
357+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
358+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
359+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[TMP0]]
360+
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1
361+
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1
362+
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP2]], 1
363+
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1
364+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP6]]
365+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP7]]
366+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP8]]
367+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP9]]
368+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP10]], i32 0
369+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1
370+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2
371+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3
372+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0
373+
; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP18]], align 8
374+
; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
375+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[TMP10]], i32 0
376+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP20]], i32 4, <4 x i1> [[TMP19]], <4 x float> poison), !invariant.load [[META0]]
377+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
378+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[TMP0]]
379+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 0
380+
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP22]], align 4
381+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
382+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
383+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
384+
; CHECK: [[MIDDLE_BLOCK]]:
385+
;
386+
entry:
387+
br label %loop.header
388+
389+
loop.header:
390+
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
391+
%i23 = icmp eq i64 %iv, 0
392+
%gep = getelementptr inbounds ptr, ptr %ptrs, i64 %iv
393+
%i27 = sub nuw nsw i64 %iv, 1
394+
%i29 = getelementptr inbounds float, ptr %input, i64 %i27
395+
store ptr %i29, ptr %gep
396+
br i1 %i23, label %if.end, label %if.then
397+
398+
if.then:
399+
%i30 = load float, ptr %i29, align 4, !invariant.load !0
400+
br label %if.end
401+
402+
if.end:
403+
%i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
404+
%i35 = getelementptr inbounds float, ptr %output, i64 %iv
405+
store float %i34, ptr %i35, align 4
406+
%iv.inc = add nuw nsw i64 %iv, 1
407+
%exitcond = icmp eq i64 %iv.inc, 4
408+
br i1 %exitcond, label %loop.exit, label %loop.header
409+
410+
loop.exit:
411+
ret void
412+
}
413+
341414
; Preserve poison-generating flags from 'sub', which is not contributing to any address computation
342415
; of any masked load/store/gather/scatter.
343416
define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
@@ -358,7 +431,7 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
358431
; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4
359432
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
360433
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
361-
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
434+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
362435
; CHECK: [[MIDDLE_BLOCK]]:
363436
;
364437
entry:
@@ -411,7 +484,7 @@ define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ptr %outpu
411484
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4
412485
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
413486
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
414-
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
487+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
415488
; CHECK: [[MIDDLE_BLOCK]]:
416489
;
417490
entry:
@@ -465,7 +538,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
465538
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
466539
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
467540
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
468-
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
541+
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
469542
; CHECK: [[MIDDLE_BLOCK]]:
470543
;
471544
entry:
@@ -520,7 +593,7 @@ define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input
520593
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
521594
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
522595
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
523-
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
596+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
524597
; CHECK: [[MIDDLE_BLOCK]]:
525598
;
526599
entry:
@@ -572,7 +645,7 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 {
572645
; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4
573646
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
574647
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
575-
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
648+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
576649
; CHECK: [[MIDDLE_BLOCK]]:
577650
;
578651
entry:
@@ -720,7 +793,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
720793
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP16]], align 4
721794
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
722795
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
723-
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
796+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
724797
; CHECK: [[MIDDLE_BLOCK]]:
725798
;
726799

@@ -820,7 +893,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst
820893
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP11]], align 4
821894
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
822895
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
823-
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
896+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
824897
; CHECK: [[MIDDLE_BLOCK]]:
825898
;
826899

@@ -879,7 +952,7 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read
879952
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
880953
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
881954
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
882-
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
955+
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
883956
; CHECK: [[MIDDLE_BLOCK]]:
884957
;
885958

@@ -911,5 +984,6 @@ exit:
911984
}
912985

913986
attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
987+
attributes #1 = { "target-features"="+avx" }
914988

915989
!0 = !{}

0 commit comments

Comments
 (0)