@@ -66,6 +66,91 @@ entry:
66
66
ret i32 %sum.0.lcssa
67
67
}
68
68
69
+ ; Check that we correctly unroll two reductions chained together.
70
+ define i64 @reduction_sum_chain (ptr noalias %p , ptr noalias %q ) {
71
+ ; CHECK-LABEL: @reduction_sum_chain(
72
+ ; CHECK-NEXT: entry:
73
+ ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
74
+ ; CHECK: vector.ph:
75
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
76
+ ; CHECK: vector.body:
77
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
78
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
79
+ ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
80
+ ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
81
+ ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
82
+ ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]]
83
+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[INDEX]]
84
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 32
85
+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 64
86
+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i64 96
87
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
88
+ ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
89
+ ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
90
+ ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
91
+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32
92
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP1]], i64 64
93
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96
94
+ ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
95
+ ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
96
+ ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
97
+ ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
98
+ ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD]])
99
+ ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], [[VEC_PHI]]
100
+ ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD4]])
101
+ ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], [[VEC_PHI1]]
102
+ ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD5]])
103
+ ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[VEC_PHI2]]
104
+ ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD6]])
105
+ ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[VEC_PHI3]]
106
+ ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD7]])
107
+ ; CHECK-NEXT: [[TMP17]] = add i64 [[TMP16]], [[TMP9]]
108
+ ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD8]])
109
+ ; CHECK-NEXT: [[TMP19]] = add i64 [[TMP18]], [[TMP11]]
110
+ ; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD9]])
111
+ ; CHECK-NEXT: [[TMP21]] = add i64 [[TMP20]], [[TMP13]]
112
+ ; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD10]])
113
+ ; CHECK-NEXT: [[TMP23]] = add i64 [[TMP22]], [[TMP15]]
114
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
115
+ ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
116
+ ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
117
+ ; CHECK: middle.block:
118
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i64 [[TMP19]], [[TMP17]]
119
+ ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add i64 [[TMP21]], [[BIN_RDX]]
120
+ ; CHECK-NEXT: [[BIN_RDX12:%.*]] = add i64 [[TMP23]], [[BIN_RDX11]]
121
+ ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
122
+ ; CHECK: scalar.ph:
123
+ ; CHECK-NEXT: br label [[LOOP:%.*]]
124
+ ; CHECK: loop:
125
+ ; CHECK-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
126
+ ; CHECK: exit:
127
+ ; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i64 [ poison, [[LOOP]] ], [ [[BIN_RDX12]], [[MIDDLE_BLOCK]] ]
128
+ ; CHECK-NEXT: ret i64 [[ADD2_LCSSA]]
129
+ ;
130
+ entry:
131
+ br label %loop
132
+
133
+ loop:
134
+ %iv = phi i64 [0 , %entry ], [%iv.next , %loop ]
135
+ %rdx = phi i64 [0 , %entry ], [%add2 , %loop ]
136
+
137
+ %p.gep = getelementptr i64 , ptr %p , i64 %iv
138
+ %q.gep = getelementptr i64 , ptr %q , i64 %iv
139
+
140
+ %x = load i64 , ptr %p.gep
141
+ %y = load i64 , ptr %q.gep
142
+
143
+ %add1 = add i64 %rdx , %x
144
+ %add2 = add i64 %add1 , %y
145
+
146
+ %iv.next = add i64 %iv , 1
147
+ %done = icmp eq i64 %iv.next , 256
148
+ br i1 %done , label %exit , label %loop
149
+
150
+ exit:
151
+ ret i64 %add2
152
+ }
153
+
69
154
define i32 @predicated (ptr noalias nocapture %A ) {
70
155
; CHECK-LABEL: @predicated(
71
156
; CHECK-NEXT: entry:
@@ -260,7 +345,7 @@ define i32 @predicated(ptr noalias nocapture %A) {
260
345
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
261
346
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16)
262
347
; CHECK-NEXT: [[TMP111:%.*]] = icmp eq i64 [[INDEX_NEXT]], 272
263
- ; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
348
+ ; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6 :![0-9]+]]
264
349
; CHECK: middle.block:
265
350
; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP104]], [[TMP101]]
266
351
; CHECK-NEXT: [[BIN_RDX37:%.*]] = add i32 [[TMP107]], [[BIN_RDX]]
@@ -269,7 +354,7 @@ define i32 @predicated(ptr noalias nocapture %A) {
269
354
; CHECK: scalar.ph:
270
355
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
271
356
; CHECK: .lr.ph:
272
- ; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5 :![0-9]+]]
357
+ ; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7 :![0-9]+]]
273
358
; CHECK: ._crit_edge:
274
359
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[BIN_RDX38]], [[MIDDLE_BLOCK]] ]
275
360
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
@@ -499,7 +584,7 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) {
499
584
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
500
585
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16)
501
586
; CHECK-NEXT: [[TMP119:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
502
- ; CHECK-NEXT: br i1 [[TMP119]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6 :![0-9]+]]
587
+ ; CHECK-NEXT: br i1 [[TMP119]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
503
588
; CHECK: middle.block:
504
589
; CHECK-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP112]], [[TMP109]]
505
590
; CHECK-NEXT: [[BIN_RDX39:%.*]] = mul i32 [[TMP115]], [[BIN_RDX]]
@@ -512,7 +597,7 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) {
512
597
; CHECK: if.then:
513
598
; CHECK-NEXT: br label [[FOR_INC]]
514
599
; CHECK: for.inc:
515
- ; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7 :![0-9]+]]
600
+ ; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9 :![0-9]+]]
516
601
; CHECK: for.end:
517
602
; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC]] ], [ [[BIN_RDX40]], [[MIDDLE_BLOCK]] ]
518
603
; CHECK-NEXT: ret i32 [[RES_LCSSA]]
0 commit comments