Skip to content

Commit 0617629

Browse files
authored
[SLP][REVEC] Fix cost model for getBuildVectorCost with FixedVectorType ScalarTy. (llvm#110073)
BoUpSLP::gather always use CreateInsertVector for FixedVectorType ScalarTy.
1 parent be6a5dc commit 0617629

File tree

2 files changed

+52
-5
lines changed

2 files changed

+52
-5
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9369,10 +9369,18 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
93699369
bool NeedShuffle =
93709370
count(VL, *It) > 1 &&
93719371
(VL.front() != *It || !all_of(VL.drop_front(), IsaPred<UndefValue>));
9372-
if (!NeedShuffle)
9372+
if (!NeedShuffle) {
9373+
if (isa<FixedVectorType>(ScalarTy)) {
9374+
assert(SLPReVec && "FixedVectorType is not expected.");
9375+
return TTI.getShuffleCost(
9376+
TTI::SK_InsertSubvector, VecTy, {}, CostKind,
9377+
std::distance(VL.begin(), It) * getNumElements(ScalarTy),
9378+
cast<FixedVectorType>(ScalarTy));
9379+
}
93739380
return TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
93749381
CostKind, std::distance(VL.begin(), It),
93759382
PoisonValue::get(VecTy), *It);
9383+
}
93769384

93779385
SmallVector<int> ShuffleMask(VL.size(), PoisonMaskElem);
93789386
transform(VL, ShuffleMask.begin(), [](Value *V) {

llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-10 -pass-remarks-output=%t %s | FileCheck %s
2+
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-20 -pass-remarks-output=%t %s | FileCheck %s
33
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
44

55
; YAML: --- !Passed
66
; YAML: Pass: slp-vectorizer
77
; YAML: Name: StoresVectorized
8-
; YAML: Function: test
8+
; YAML: Function: test1
99
; YAML: Args:
1010
; YAML: - String: 'Stores SLP vectorized with cost '
1111
; YAML: - Cost: '6'
1212
; YAML: - String: ' and with tree size '
1313
; YAML: - TreeSize: '5'
1414

15-
define void @test(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, <4 x float> %load17, <4 x float> %fmuladd7, <4 x float> %fmuladd16, ptr %out_ptr) {
16-
; CHECK-LABEL: @test(
15+
define void @test1(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, <4 x float> %load17, <4 x float> %fmuladd7, <4 x float> %fmuladd16, ptr %out_ptr) {
16+
; CHECK-LABEL: @test1(
1717
; CHECK-NEXT: entry:
1818
; CHECK-NEXT: [[VEXT165_I:%.*]] = shufflevector <4 x float> [[LOAD6:%.*]], <4 x float> [[LOAD7:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1919
; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <4 x float> [[LOAD7]], <4 x float> [[LOAD8:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
@@ -40,3 +40,42 @@ entry:
4040
}
4141

4242
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
43+
44+
; YAML: --- !Passed
45+
; YAML: Pass: slp-vectorizer
46+
; YAML: Name: StoresVectorized
47+
; YAML: Function: test2
48+
; YAML: Args:
49+
; YAML: - String: 'Stores SLP vectorized with cost '
50+
; YAML: - Cost: '16'
51+
; YAML: - String: ' and with tree size '
52+
; YAML: - TreeSize: '5'
53+
54+
define void @test2(<8 x float> %load6, <8 x float> %load7, <8 x float> %load8, <8 x float> %load17, <8 x float> %fmuladd7, <8 x float> %fmuladd16, ptr %out_ptr) {
55+
; CHECK-LABEL: @test2(
56+
; CHECK-NEXT: entry:
57+
; CHECK-NEXT: [[VEXT165_I:%.*]] = shufflevector <8 x float> [[LOAD6:%.*]], <8 x float> [[LOAD7:%.*]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
58+
; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <8 x float> [[LOAD7]], <8 x float> [[LOAD8:%.*]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
59+
; CHECK-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[VEXT165_I]], i64 0)
60+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> [[VEXT309_I]], i64 8)
61+
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 8)
62+
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP2]], <8 x float> [[LOAD17:%.*]], i64 0)
63+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
64+
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[FMULADD7:%.*]], i64 0)
65+
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP5]], <8 x float> [[FMULADD16:%.*]], i64 8)
66+
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP4]], <16 x float> [[TMP6]])
67+
; CHECK-NEXT: store <16 x float> [[TMP7]], ptr [[OUT_PTR:%.*]], align 4
68+
; CHECK-NEXT: ret void
69+
;
70+
entry:
71+
%vext165.i = shufflevector <8 x float> %load6, <8 x float> %load7, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
72+
%vext309.i = shufflevector <8 x float> %load7, <8 x float> %load8, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
73+
%fmuladd8 = tail call noundef <8 x float> @llvm.fmuladd.v8f32(<8 x float> %vext165.i, <8 x float> %load17, <8 x float> %fmuladd7)
74+
%fmuladd17 = tail call noundef <8 x float> @llvm.fmuladd.v8f32(<8 x float> %vext309.i, <8 x float> %load17, <8 x float> %fmuladd16)
75+
%add.ptr.i.i = getelementptr inbounds i8, ptr %out_ptr, i64 32
76+
store <8 x float> %fmuladd8, ptr %out_ptr, align 4
77+
store <8 x float> %fmuladd17, ptr %add.ptr.i.i, align 4
78+
ret void
79+
}
80+
81+
declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)

0 commit comments

Comments
 (0)