Skip to content

Commit 85bc868

Browse files
authored
[AArch64][TTI] Reduce cost for splatting whole first vector segment (SVE) (#145701)
Improve cost modeling for splatting the first 128b segment.
1 parent a75587d commit 85bc868

File tree

3 files changed

+73
-4
lines changed

3 files changed

+73
-4
lines changed

llvm/lib/Target/AArch64/AArch64PerfectShuffle.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6750,6 +6750,21 @@ inline std::optional<unsigned> isDUPQMask(ArrayRef<int> Mask, unsigned Segments,
67506750
return std::nullopt;
67516751
}
67526752

6753+
/// isDUPFirstSegmentMask - matches a splat of the first 128b segment.
6754+
inline bool isDUPFirstSegmentMask(ArrayRef<int> Mask, unsigned Segments,
6755+
unsigned SegmentSize) {
6756+
// Make sure there's no size changes.
6757+
if (SegmentSize * Segments != Mask.size())
6758+
return false;
6759+
6760+
// Check that all lanes refer to the equivalent lane in the first segment.
6761+
// Undef/poison lanes (<0) are also accepted.
6762+
return all_of(enumerate(Mask), [&](auto P) {
6763+
const unsigned IndexWithinSegment = P.index() % SegmentSize;
6764+
return P.value() < 0 || unsigned(P.value()) == IndexWithinSegment;
6765+
});
6766+
}
6767+
67536768
} // namespace llvm
67546769

67556770
#endif

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5600,9 +5600,8 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
56005600
}
56015601

56025602
// Segmented shuffle matching.
5603-
if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
5604-
ST->isSVEorStreamingSVEAvailable() && Kind == TTI::SK_PermuteSingleSrc &&
5605-
isa<FixedVectorType>(SrcTy) && !Mask.empty() &&
5603+
if (Kind == TTI::SK_PermuteSingleSrc && isa<FixedVectorType>(SrcTy) &&
5604+
!Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
56065605
SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
56075606
AArch64::SVEBitsPerBlock)) {
56085607

@@ -5612,7 +5611,14 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
56125611
unsigned SegmentElts = VTy->getNumElements() / Segments;
56135612

56145613
// dupq zd.t, zn.t[idx]
5615-
if (isDUPQMask(Mask, Segments, SegmentElts))
5614+
if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
5615+
ST->isSVEorStreamingSVEAvailable() &&
5616+
isDUPQMask(Mask, Segments, SegmentElts))
5617+
return LT.first;
5618+
5619+
// mov zd.q, vn
5620+
if (ST->isSVEorStreamingSVEAvailable() &&
5621+
isDUPFirstSegmentMask(Mask, Segments, SegmentElts))
56165622
return LT.first;
56175623
}
56185624

llvm/test/Analysis/CostModel/AArch64/segmented-shufflevector-patterns.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,53 @@ define void @dup_within_each_segment_512b() #1 {
4949
ret void
5050
}
5151

52+
define void @dup_whole_segment_256b() #0 {
53+
; CHECK-LABEL: 'dup_whole_segment_256b'
54+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
55+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
56+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
57+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
58+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
59+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 1, i32 2, i32 3>
60+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
61+
;
62+
%dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
63+
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
64+
%dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
65+
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
66+
%dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3,
67+
i32 0, i32 1, i32 2, i32 3>
68+
%dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
69+
%dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1,
70+
i32 0, i32 1, i32 0, i32 1>
71+
%dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3,
72+
i32 poison, i32 1, i32 2, i32 3>
73+
ret void
74+
}
75+
76+
define void @dup_whole_segment_512b() #1 {
77+
; CHECK-LABEL: 'dup_whole_segment_512b'
78+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
79+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
80+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
81+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
82+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
83+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 1, i32 2, i32 3>
84+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
85+
;
86+
%dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
87+
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
88+
%dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
89+
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
90+
%dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3,
91+
i32 0, i32 1, i32 2, i32 3>
92+
%dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
93+
%dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1,
94+
i32 0, i32 1, i32 0, i32 1>
95+
%dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3,
96+
i32 poison, i32 1, i32 2, i32 3>
97+
ret void
98+
}
99+
52100
attributes #0 = { noinline vscale_range(2,2) }
53101
attributes #1 = { noinline vscale_range(4,4) }

0 commit comments

Comments
 (0)