@@ -627,7 +627,7 @@ for.end: ; preds = %for.body, %entry
627
627
ret float %result.0.lcssa
628
628
}
629
629
630
- ; Sub we can create a reduction, but not inloop
630
+ ; Sub we can create a reduction inloop
631
631
define i32 @reduction_sub_lhs (ptr noalias nocapture %A ) {
632
632
; CHECK-LABEL: @reduction_sub_lhs(
633
633
; CHECK-NEXT: entry:
@@ -636,15 +636,16 @@ define i32 @reduction_sub_lhs(ptr noalias nocapture %A) {
636
636
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
637
637
; CHECK: vector.body:
638
638
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
639
- ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer , [[VECTOR_PH]] ], [ [[TMP1 :%.*]], [[VECTOR_BODY]] ]
639
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0 , [[VECTOR_PH]] ], [ [[TMP3 :%.*]], [[VECTOR_BODY]] ]
640
640
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
641
641
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
642
- ; CHECK-NEXT: [[TMP1]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
642
+ ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]]
643
+ ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
644
+ ; CHECK-NEXT: [[TMP3]] = add i32 [[TMP1]], [[VEC_PHI]]
643
645
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
644
646
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
645
647
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
646
648
; CHECK: middle.block:
647
- ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
648
649
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
649
650
; CHECK: scalar.ph:
650
651
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
0 commit comments