Skip to content

Commit 2787759

Browse files
committed
[VPlan] Allow derived IVs and scalar-steps in narrowing interleave.
Both VPDerivedIVRecipe and VPScalarIVSteps recipe should be supported in narrowInterleaveGroups: * VPDerivedIVRecipe is based on the canonical IV and independent of VF, * VPScalarIVSteps takes the VF as operand, so it will be updated by narrowInterleaveGroup.
1 parent dd02fb3 commit 2787759

File tree

2 files changed

+131
-22
lines changed

2 files changed

+131
-22
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3195,6 +3195,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
31953195
match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
31963196
continue;
31973197

3198+
if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe>(&R) &&
3199+
vputils::onlyFirstLaneUsed(cast<VPSingleDefRecipe>(&R)))
3200+
continue;
3201+
31983202
// Bail out on recipes not supported at the moment:
31993203
// * phi recipes other than the canonical induction
32003204
// * recipes writing to memory except interleave groups

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll

Lines changed: 127 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph\:" --version 5
22
; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF2 %s
3+
; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=VF2IC2 %s
34
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF4 %s
45

56
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
@@ -25,21 +26,52 @@ define void @derived_int_ivs(ptr noalias %a, ptr noalias %b, i64 %end) {
2526
; VF2-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 16
2627
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 16, [[TMP5]]
2728
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
28-
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP6]], align 8
29-
; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
30-
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
29+
; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP6]], align 8
3130
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_IDX]]
32-
; VF2-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[STRIDED_VEC]], <2 x double> [[STRIDED_VEC1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
33-
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
34-
; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
35-
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
31+
; VF2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP7]], align 8
32+
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
3633
; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3734
; VF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3835
; VF2: [[MIDDLE_BLOCK]]:
3936
; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
4037
; VF2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
4138
; VF2: [[SCALAR_PH]]:
4239
;
40+
; VF2IC2-LABEL: define void @derived_int_ivs(
41+
; VF2IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[END:%.*]]) {
42+
; VF2IC2-NEXT: [[ENTRY:.*:]]
43+
; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[END]], -32
44+
; VF2IC2-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
45+
; VF2IC2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
46+
; VF2IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
47+
; VF2IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
48+
; VF2IC2: [[VECTOR_PH]]:
49+
; VF2IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
50+
; VF2IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
51+
; VF2IC2-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 16
52+
; VF2IC2-NEXT: [[TMP4:%.*]] = add i64 16, [[TMP3]]
53+
; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
54+
; VF2IC2: [[VECTOR_BODY]]:
55+
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
56+
; VF2IC2-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 16
57+
; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = add i64 16, [[TMP5]]
58+
; VF2IC2-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 16
59+
; VF2IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
60+
; VF2IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
61+
; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP7]], align 8
62+
; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP8]], align 8
63+
; VF2IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_IDX]]
64+
; VF2IC2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
65+
; VF2IC2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP9]], align 8
66+
; VF2IC2-NEXT: store <2 x double> [[WIDE_LOAD1]], ptr [[TMP10]], align 8
67+
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
68+
; VF2IC2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
69+
; VF2IC2-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
70+
; VF2IC2: [[MIDDLE_BLOCK]]:
71+
; VF2IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
72+
; VF2IC2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
73+
; VF2IC2: [[SCALAR_PH]]:
74+
;
4375
; VF4-LABEL: define void @derived_int_ivs(
4476
; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[END:%.*]]) {
4577
; VF4-NEXT: [[ENTRY:.*:]]
@@ -135,20 +167,71 @@ define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) {
135167
; VF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
136168
; VF2-NEXT: [[OFFSET_IDX6:%.*]] = mul i64 [[INDEX]], 16
137169
; VF2-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX6]]
138-
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[NEXT_GEP]], align 8
139-
; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
140-
; VF2-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
141-
; VF2-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[STRIDED_VEC]], <2 x double> [[STRIDED_VEC8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
142-
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
143-
; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP7]], align 8
144-
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
170+
; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[NEXT_GEP]], align 8
171+
; VF2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[NEXT_GEP7]], align 8
172+
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
145173
; VF2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
146174
; VF2-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
147175
; VF2: [[MIDDLE_BLOCK]]:
148176
; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
149177
; VF2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
150178
; VF2: [[SCALAR_PH]]:
151179
;
180+
; VF2IC2-LABEL: define void @derived_pointer_ivs(
181+
; VF2IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr [[END:%.*]]) {
182+
; VF2IC2-NEXT: [[ENTRY:.*:]]
183+
; VF2IC2-NEXT: [[A5:%.*]] = ptrtoint ptr [[A]] to i64
184+
; VF2IC2-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64
185+
; VF2IC2-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64
186+
; VF2IC2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
187+
; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -16
188+
; VF2IC2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A5]]
189+
; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4
190+
; VF2IC2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
191+
; VF2IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
192+
; VF2IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
193+
; VF2IC2: [[VECTOR_MEMCHECK]]:
194+
; VF2IC2-NEXT: [[TMP4:%.*]] = add i64 [[END1]], -16
195+
; VF2IC2-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[A2]]
196+
; VF2IC2-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 4
197+
; VF2IC2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 4
198+
; VF2IC2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 16
199+
; VF2IC2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
200+
; VF2IC2-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
201+
; VF2IC2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP3]]
202+
; VF2IC2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
203+
; VF2IC2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
204+
; VF2IC2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
205+
; VF2IC2: [[VECTOR_PH]]:
206+
; VF2IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
207+
; VF2IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
208+
; VF2IC2-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 16
209+
; VF2IC2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]]
210+
; VF2IC2-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], 16
211+
; VF2IC2-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
212+
; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
213+
; VF2IC2: [[VECTOR_BODY]]:
214+
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
215+
; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
216+
; VF2IC2-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 16
217+
; VF2IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
218+
; VF2IC2-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP13]]
219+
; VF2IC2-NEXT: [[OFFSET_IDX7:%.*]] = mul i64 [[INDEX]], 16
220+
; VF2IC2-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX7]], 16
221+
; VF2IC2-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX7]]
222+
; VF2IC2-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
223+
; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[NEXT_GEP]], align 8
224+
; VF2IC2-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x double>, ptr [[NEXT_GEP6]], align 8
225+
; VF2IC2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[NEXT_GEP8]], align 8
226+
; VF2IC2-NEXT: store <2 x double> [[WIDE_LOAD10]], ptr [[NEXT_GEP9]], align 8
227+
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
228+
; VF2IC2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
229+
; VF2IC2-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
230+
; VF2IC2: [[MIDDLE_BLOCK]]:
231+
; VF2IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
232+
; VF2IC2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
233+
; VF2IC2: [[SCALAR_PH]]:
234+
;
152235
; VF4-LABEL: define void @derived_pointer_ivs(
153236
; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr [[END:%.*]]) {
154237
; VF4-NEXT: [[ENTRY:.*:]]
@@ -235,21 +318,43 @@ define void @narrow_with_uniform_add_and_gep(ptr noalias %p) {
235318
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
236319
; VF2-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 0
237320
; VF2-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP0]]
238-
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
239-
; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
240-
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
241-
; VF2-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
321+
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
242322
; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 1)
243-
; VF2-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
244-
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
245-
; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
246-
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
323+
; VF2-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP1]], align 8
324+
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
247325
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
248326
; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
249327
; VF2: [[MIDDLE_BLOCK]]:
250328
; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]]
251329
; VF2: [[SCALAR_PH]]:
252330
;
331+
; VF2IC2-LABEL: define void @narrow_with_uniform_add_and_gep(
332+
; VF2IC2-SAME: ptr noalias [[P:%.*]]) {
333+
; VF2IC2-NEXT: [[ENTRY:.*:]]
334+
; VF2IC2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
335+
; VF2IC2: [[VECTOR_PH]]:
336+
; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
337+
; VF2IC2: [[VECTOR_BODY]]:
338+
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
339+
; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
340+
; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
341+
; VF2IC2-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 0
342+
; VF2IC2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 0
343+
; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP1]]
344+
; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP2]]
345+
; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
346+
; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
347+
; VF2IC2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[WIDE_LOAD]], splat (i64 1)
348+
; VF2IC2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[WIDE_LOAD1]], splat (i64 1)
349+
; VF2IC2-NEXT: store <2 x i64> [[TMP5]], ptr [[TMP3]], align 8
350+
; VF2IC2-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8
351+
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
352+
; VF2IC2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
353+
; VF2IC2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
354+
; VF2IC2: [[MIDDLE_BLOCK]]:
355+
; VF2IC2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]]
356+
; VF2IC2: [[SCALAR_PH]]:
357+
;
253358
; VF4-LABEL: define void @narrow_with_uniform_add_and_gep(
254359
; VF4-SAME: ptr noalias [[P:%.*]]) {
255360
; VF4-NEXT: [[ENTRY:.*:]]

0 commit comments

Comments
 (0)