Skip to content

Commit 253f8b6

Browse files
committed
[VPlan] Support single-scalar VPReplicateRecipes when narrowing IGs.
When narrowing interleave groups, we can treat single scalar VPReplicateRecipes as already narrowed.
1 parent 76b1dcf commit 253f8b6

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3140,7 +3140,12 @@ static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
31403140
}
31413141

31423142
/// Returns true if \p VPValue is a narrow VPValue.
3143-
static bool isAlreadyNarrow(VPValue *VPV) { return VPV->isLiveIn(); }
3143+
static bool isAlreadyNarrow(VPValue *VPV) {
3144+
if (VPV->isLiveIn())
3145+
return true;
3146+
auto *RepR = dyn_cast<VPReplicateRecipe>(VPV);
3147+
return RepR && RepR->isSingleScalar();
3148+
}
31443149

31453150
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
31463151
unsigned VectorRegWidth) {

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -463,10 +463,8 @@ define void @single_uniform_load_store_interleave_group(ptr noalias %src, ptr no
463463
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0
464464
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
465465
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]]
466-
; VF2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLAT]], <2 x i64> [[BROADCAST_SPLAT]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
467-
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
468-
; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
469-
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
466+
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8
467+
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
470468
; VF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
471469
; VF2-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
472470
; VF2: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)