Skip to content

Commit 2682a94

Browse files
authored
[SLP][REVEC] Add ExtractSubvector cost for ExternalUses. (#132761)
For llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll, ScalarCost and ExtraCost is 1, so the original scalar will be kept.
1 parent f737df7 commit 2682a94

File tree

3 files changed

+91
-9
lines changed

3 files changed

+91
-9
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5360,6 +5360,25 @@ getScalarizationOverhead(const TargetTransformInfo &TTI, Type *ScalarTy,
53605360
return Cost;
53615361
}
53625362

5363+
/// This is similar to TargetTransformInfo::getVectorInstrCost, but if ScalarTy
5364+
/// is a FixedVectorType, a vector will be extracted instead of a scalar.
5365+
static InstructionCost getVectorInstrCost(
5366+
const TargetTransformInfo &TTI, Type *ScalarTy, unsigned Opcode, Type *Val,
5367+
TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar,
5368+
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
5369+
if (Opcode == Instruction::ExtractElement) {
5370+
if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy)) {
5371+
assert(SLPReVec && "Only supported by REVEC.");
5372+
assert(isa<VectorType>(Val) && "Val must be a vector type.");
5373+
return getShuffleCost(TTI, TTI::SK_ExtractSubvector,
5374+
cast<VectorType>(Val), {}, CostKind,
5375+
Index * VecTy->getNumElements(), VecTy);
5376+
}
5377+
}
5378+
return TTI.getVectorInstrCost(Opcode, Val, CostKind, Index, Scalar,
5379+
ScalarUserAndIdx);
5380+
}
5381+
53635382
/// Correctly creates insert_subvector, checking that the index is multiple of
53645383
/// the subvectors length. Otherwise, generates shuffle using \p Generator or
53655384
/// using default shuffle.
@@ -13649,12 +13668,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
1364913668
!ExtractCostCalculated.insert(EU.Scalar).second)
1365013669
continue;
1365113670

13652-
// No extract cost for vector "scalar"
13653-
if (isa<FixedVectorType>(EU.Scalar->getType()))
13671+
// No extract cost for vector "scalar" if REVEC is disabled
13672+
if (!SLPReVec && isa<FixedVectorType>(EU.Scalar->getType()))
1365413673
continue;
1365513674

1365613675
// If found user is an insertelement, do not calculate extract cost but try
1365713676
// to detect it as a final shuffled/identity match.
13677+
// TODO: what if a user is insertvalue when REVEC is enabled?
1365813678
if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User);
1365913679
VU && VU->getOperand(1) == EU.Scalar) {
1366013680
if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
@@ -13728,7 +13748,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
1372813748
// extend the extracted value back to the original type. Here, we account
1372913749
// for the extract and the added cost of the sign extend if needed.
1373013750
InstructionCost ExtraCost = TTI::TCC_Free;
13731-
auto *VecTy = getWidenedType(EU.Scalar->getType(), BundleWidth);
13751+
auto *ScalarTy = EU.Scalar->getType();
13752+
auto *VecTy = getWidenedType(ScalarTy, BundleWidth);
1373213753
const TreeEntry *Entry = &EU.E;
1373313754
auto It = MinBWs.find(Entry);
1373413755
if (It != MinBWs.end()) {
@@ -13741,8 +13762,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
1374113762
VecTy, EU.Lane);
1374213763
} else {
1374313764
ExtraCost =
13744-
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, CostKind,
13745-
EU.Lane, EU.Scalar, ScalarUserAndIdx);
13765+
getVectorInstrCost(*TTI, ScalarTy, Instruction::ExtractElement, VecTy,
13766+
CostKind, EU.Lane, EU.Scalar, ScalarUserAndIdx);
1374613767
}
1374713768
// Leave the scalar instructions as is if they are cheaper than extracts.
1374813769
if (Entry->Idx != 0 || Entry->getOpcode() == Instruction::GetElementPtr ||
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -slp-revec -pass-remarks-output=%t < %s | FileCheck %s
3+
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
4+
5+
; See https://reviews.llvm.org/D70068 and https://reviews.llvm.org/D70587 for context
6+
7+
; YAML: --- !Passed
8+
; YAML: Pass: slp-vectorizer
9+
; YAML: Name: VectorizedList
10+
; YAML: Function: StructOfVectors
11+
; YAML: Args:
12+
; YAML: - String: 'SLP vectorized with cost '
13+
; YAML: - Cost: '-10'
14+
; YAML: - String: ' and with tree size '
15+
; YAML: - TreeSize: '3'
16+
17+
; YAML: --- !Missed
18+
; YAML: Pass: slp-vectorizer
19+
; YAML: Name: NotBeneficial
20+
; YAML: Function: StructOfVectors
21+
; YAML: Args:
22+
; YAML: - String: 'List vectorization was possible but not beneficial with cost '
23+
; YAML: - Cost: '0'
24+
; YAML: - String: ' >= '
25+
; YAML: - Treshold: '0'
26+
27+
; Checks that vector insertvalues into the struct become SLP seeds.
28+
define { <2 x float>, <2 x float> } @StructOfVectors(ptr %Ptr) {
29+
; CHECK-LABEL: @StructOfVectors(
30+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
31+
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
32+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
33+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
34+
; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[TMP6]], 0
35+
; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP7]], 1
36+
; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]]
37+
;
38+
%L0 = load float, ptr %Ptr
39+
%GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
40+
%L1 = load float, ptr %GEP1
41+
%GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
42+
%L2 = load float, ptr %GEP2
43+
%GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
44+
%L3 = load float, ptr %GEP3
45+
46+
%Fadd0 = fadd fast float %L0, 1.1e+01
47+
%Fadd1 = fadd fast float %L1, 1.2e+01
48+
%Fadd2 = fadd fast float %L2, 1.3e+01
49+
%Fadd3 = fadd fast float %L3, 1.4e+01
50+
51+
%VecIn0 = insertelement <2 x float> poison, float %Fadd0, i64 0
52+
%VecIn1 = insertelement <2 x float> %VecIn0, float %Fadd1, i64 1
53+
54+
%VecIn2 = insertelement <2 x float> poison, float %Fadd2, i64 0
55+
%VecIn3 = insertelement <2 x float> %VecIn2, float %Fadd3, i64 1
56+
57+
%Ret0 = insertvalue {<2 x float>, <2 x float>} poison, <2 x float> %VecIn1, 0
58+
%Ret1 = insertvalue {<2 x float>, <2 x float>} %Ret0, <2 x float> %VecIn3, 1
59+
ret {<2 x float>, <2 x float>} %Ret1
60+
}

llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,16 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) {
135135
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x float> [[TMP14]], <16 x float> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
136136
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
137137
; CHECK-NEXT: [[TMP4:%.*]] = fmul <32 x float> [[TMP3]], [[TMP2]]
138-
; CHECK-NEXT: store <32 x float> [[TMP4]], ptr [[IN2:%.*]], align 16
139138
; CHECK-NEXT: [[GEP10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN1]], i64 32
139+
; CHECK-NEXT: [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2:%.*]], i64 128
140+
; CHECK-NEXT: [[TMP17:%.*]] = load <8 x float>, ptr [[IN0]], align 16
141+
; CHECK-NEXT: store <32 x float> [[TMP4]], ptr [[IN2]], align 16
140142
; CHECK-NEXT: [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1
141-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
142-
; CHECK-NEXT: [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2]], i64 128
143143
; CHECK-NEXT: [[TMP6:%.*]] = uitofp <16 x i8> [[LOAD5]] to <16 x float>
144144
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
145-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
145+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
146146
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
147+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
147148
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
148149
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
149150
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>

0 commit comments

Comments
 (0)