Skip to content

Commit 58b939a

Browse files
committed
[VPlan] Support matching constants in narrowInterleaveGroups.
Matching constants can trivially be broadcasted, allow them if the same constant is used for all recipes in a bundle.
1 parent cbfec48 commit 58b939a

File tree

2 files changed

+19
-37
lines changed

2 files changed

+19
-37
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3115,7 +3115,7 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, VPWidenRecipe *WideMember,
31153115
unsigned OpIdx, VPValue *OpV, unsigned Idx) {
31163116
auto *DefR = OpV->getDefiningRecipe();
31173117
if (!DefR)
3118-
return false;
3118+
return WideMember0->getOperand(OpIdx) == OpV;
31193119
if (auto *W = dyn_cast<VPWidenLoadRecipe>(DefR))
31203120
return !W->getMask() && WideMember0->getOperand(OpIdx) == OpV;
31213121

@@ -3251,7 +3251,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
32513251
return;
32523252

32533253
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
3254-
auto NarrowOp = [](VPRecipeBase *R) -> VPValue * {
3254+
auto NarrowOp = [](VPValue *V) -> VPValue * {
3255+
auto *R = V->getDefiningRecipe();
3256+
if (!R)
3257+
return V;
32553258
if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
32563259
// Narrow interleave group to wide load, as transformed VPlan will only
32573260
// process one original iteration.
@@ -3280,11 +3283,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
32803283
if (auto *WideMember0 = dyn_cast<VPWidenRecipe>(
32813284
StoreGroup->getStoredValues()[0]->getDefiningRecipe())) {
32823285
for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)
3283-
WideMember0->setOperand(
3284-
Idx, NarrowOp(WideMember0->getOperand(Idx)->getDefiningRecipe()));
3286+
WideMember0->setOperand(Idx, NarrowOp(WideMember0->getOperand(Idx)));
32853287
Res = WideMember0;
32863288
} else {
3287-
Res = NarrowOp(StoreGroup->getStoredValues()[0]->getDefiningRecipe());
3289+
Res = NarrowOp(StoreGroup->getStoredValues()[0]);
32883290
}
32893291

32903292
auto *S = new VPWidenStoreRecipe(

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-constant-ops.ll

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,28 +13,18 @@ define void @test_add_double_same_const_args_1(ptr %res, ptr noalias %A, ptr noa
1313
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1414
; CHECK: [[VECTOR_BODY]]:
1515
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
16-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
16+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1717
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[INDEX]]
1818
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[TMP1]]
19-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
20-
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
21-
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
22-
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <4 x double>, ptr [[TMP3]], align 4
23-
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
24-
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
25-
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[STRIDED_VEC]], splat (double 1.000000e+00)
26-
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[STRIDED_VEC3]], splat (double 1.000000e+00)
19+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x double>, ptr [[TMP2]], align 4
20+
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = load <2 x double>, ptr [[TMP3]], align 4
2721
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[STRIDED_VEC1]], splat (double 1.000000e+00)
2822
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[STRIDED_VEC4]], splat (double 1.000000e+00)
2923
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[INDEX]]
3024
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[TMP1]]
31-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
32-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
33-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4
34-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
35-
; CHECK-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
36-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC5]], ptr [[TMP9]], align 4
37-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
25+
; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[TMP8]], align 4
26+
; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[TMP9]], align 4
27+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3828
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
3929
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4030
; CHECK: [[MIDDLE_BLOCK]]:
@@ -73,28 +63,18 @@ define void @test_add_double_same_const_args_2(ptr %res, ptr noalias %A, ptr noa
7363
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
7464
; CHECK: [[VECTOR_BODY]]:
7565
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
76-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
66+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
7767
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[INDEX]]
7868
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[TMP1]]
79-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
80-
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
81-
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
82-
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <4 x double>, ptr [[TMP3]], align 4
83-
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
84-
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
85-
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> splat (double 1.000000e+00), [[STRIDED_VEC]]
86-
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> splat (double 1.000000e+00), [[STRIDED_VEC3]]
69+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x double>, ptr [[TMP2]], align 4
70+
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = load <2 x double>, ptr [[TMP3]], align 4
8771
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> splat (double 1.000000e+00), [[STRIDED_VEC1]]
8872
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> splat (double 1.000000e+00), [[STRIDED_VEC4]]
8973
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[INDEX]]
9074
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[TMP1]]
91-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
92-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
93-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4
94-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
95-
; CHECK-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
96-
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC5]], ptr [[TMP9]], align 4
97-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
75+
; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[TMP8]], align 4
76+
; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[TMP9]], align 4
77+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
9878
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
9979
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
10080
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)