Skip to content

Commit 0524534

Browse files
committed
[FuncSpec] Enable specialization of literal constants.
To do so we have to tweak the cost model such that specialization does not trigger excessively. Differential Revision: https://reviews.llvm.org/D150649
1 parent 6441358 commit 0524534

16 files changed

+216
-150
lines changed

llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ class FunctionSpecializer {
188188

189189
bool run();
190190

191+
static unsigned getBlockFreqMultiplier();
192+
191193
InstCostVisitor getInstCostVisitorFor(Function *F) {
192194
auto &BFI = (GetBFI)(*F);
193195
auto &TTI = (GetTTI)(*F);

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,22 @@ static cl::opt<bool> ForceSpecialization(
7474
"Force function specialization for every call site with a constant "
7575
"argument"));
7676

77+
// Set to 2^3 to model three levels of if-else nest.
78+
static cl::opt<unsigned> BlockFreqMultiplier(
79+
"funcspec-block-freq-multiplier", cl::init(8), cl::Hidden, cl::desc(
80+
"Multiplier to scale block frequency of user instructions during "
81+
"specialization bonus estimation"));
82+
83+
static cl::opt<unsigned> MinEntryFreq(
84+
"funcspec-min-entry-freq", cl::init(450), cl::Hidden, cl::desc(
85+
"Do not specialize functions with entry block frequency lower than "
86+
"this value"));
87+
88+
static cl::opt<unsigned> MinScore(
89+
"funcspec-min-score", cl::init(2), cl::Hidden, cl::desc(
90+
"Do not specialize functions with score lower than this value "
91+
"(the ratio of specialization bonus over specialization cost)"));
92+
7793
static cl::opt<unsigned> MaxClones(
7894
"funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
7995
"The maximum number of clones allowed for a single function "
@@ -88,15 +104,15 @@ static cl::opt<bool> SpecializeOnAddress(
88104
"funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
89105
"Enable function specialization on the address of global values"));
90106

91-
// Disabled by default as it can significantly increase compilation times.
92-
//
93-
// https://llvm-compile-time-tracker.com
94-
// https://github.com/nikic/llvm-compile-time-tracker
95107
static cl::opt<bool> SpecializeLiteralConstant(
96-
"funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
108+
"funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc(
97109
"Enable specialization of functions that take a literal constant as an "
98110
"argument"));
99111

112+
unsigned FunctionSpecializer::getBlockFreqMultiplier() {
113+
return BlockFreqMultiplier;
114+
}
115+
100116
// Estimates the instruction cost of all the basic blocks in \p WorkList.
101117
// The successors of such blocks are added to the list as long as they are
102118
// executable and they have a unique predecessor. \p WorkList represents
@@ -114,7 +130,8 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
114130
while (!WorkList.empty()) {
115131
BasicBlock *BB = WorkList.pop_back_val();
116132

117-
uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
133+
uint64_t Weight = BlockFreqMultiplier *
134+
BFI.getBlockFreq(BB).getFrequency() /
118135
BFI.getEntryFreq();
119136
if (!Weight)
120137
continue;
@@ -167,7 +184,8 @@ Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
167184

168185
KnownConstants.insert({User, C});
169186

170-
uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
187+
uint64_t Weight = BlockFreqMultiplier *
188+
BFI.getBlockFreq(User->getParent()).getFrequency() /
171189
BFI.getEntryFreq();
172190
if (!Weight)
173191
return 0;
@@ -649,6 +667,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
649667
if (Args.empty())
650668
return false;
651669

670+
bool HasCheckedEntryFreq = false;
652671
for (User *U : F->users()) {
653672
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
654673
continue;
@@ -684,6 +703,21 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
684703
if (S.Args.empty())
685704
continue;
686705

706+
// Check the function entry frequency only once. We sink this code here to
707+
// postpone running the Block Frequency Analysis until we know for sure
708+
// there are Specialization candidates, otherwise we are adding unnecessary
709+
// overhead.
710+
if (!HasCheckedEntryFreq) {
711+
// Reject cold functions (for some definition of 'cold').
712+
uint64_t EntryFreq = (GetBFI)(*F).getEntryFreq();
713+
if (!ForceSpecialization && EntryFreq < MinEntryFreq)
714+
return false;
715+
716+
HasCheckedEntryFreq = true;
717+
LLVM_DEBUG(dbgs() << "FnSpecialization: Entry block frequency for "
718+
<< F->getName() << " = " << EntryFreq << "\n");
719+
}
720+
687721
// Check if we have encountered the same specialisation already.
688722
if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
689723
// Existing specialisation. Add the call to the list to rewrite, unless
@@ -698,13 +732,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
698732
AllSpecs[Index].CallSites.push_back(&CS);
699733
} else {
700734
// Calculate the specialisation gain.
701-
Cost Score = 0 - SpecCost;
735+
Cost Score = 0;
702736
InstCostVisitor Visitor = getInstCostVisitorFor(F);
703737
for (ArgInfo &A : S.Args)
704738
Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
739+
Score /= SpecCost;
705740

706741
// Discard unprofitable specialisations.
707-
if (!ForceSpecialization && Score <= 0)
742+
if (!ForceSpecialization && Score < MinScore)
708743
continue;
709744

710745
// Create a new specialisation entry.

llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -S --passes="default<O3>" < %s | FileCheck %s
1+
; RUN: opt -S --passes="default<O3>" -force-specialization < %s | FileCheck %s
22

33
define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr {
44
entry:

llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1+
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
22

33
; Test function specialization wouldn't crash due to constant expression.
44
; Note that this test case shows that function specialization pass would
55
; transform the function even if no specialization happened.
66

7-
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
8-
97
%struct = type { i8, i16, i32, i64, i64}
108
@Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4}
119

@@ -26,19 +24,6 @@ entry:
2624
}
2725

2826
define internal i64 @zoo(i1 %flag) {
29-
; CHECK-LABEL: @zoo(
30-
; CHECK-NEXT: entry:
31-
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
32-
; CHECK: plus:
33-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
34-
; CHECK-NEXT: br label [[MERGE:%.*]]
35-
; CHECK: minus:
36-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 4))
37-
; CHECK-NEXT: br label [[MERGE]]
38-
; CHECK: merge:
39-
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3) to i64), [[PLUS]] ], [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), [[MINUS]] ]
40-
; CHECK-NEXT: ret i64 [[TMP2]]
41-
;
4227
entry:
4328
br i1 %flag, label %plus, label %minus
4429

@@ -60,14 +45,39 @@ merge:
6045

6146
define i64 @main() {
6247
; CHECK-LABEL: @main(
63-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo(i1 false)
64-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo(i1 true)
65-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP2]]
66-
; CHECK-NEXT: ret i64 [[TMP3]]
48+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo.4(i1 false)
49+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo.3(i1 true)
50+
; CHECK-NEXT: ret i64 add (i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 3) to i64))
6751
;
6852
%1 = call i64 @zoo(i1 0)
6953
%2 = call i64 @zoo(i1 1)
7054
%3 = add i64 %1, %2
7155
ret i64 %3
7256
}
7357

58+
; CHECK-LABEL: @func2.1(
59+
; CHECK-NEXT: entry:
60+
; CHECK-NEXT: ret i64 undef
61+
62+
; CHECK-LABEL: @func2.2(
63+
; CHECK-NEXT: entry:
64+
; CHECK-NEXT: ret i64 undef
65+
66+
; CHECK-LABEL: @zoo.3(
67+
; CHECK-NEXT: entry:
68+
; CHECK-NEXT: br label [[PLUS:%.*]]
69+
; CHECK: plus:
70+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
71+
; CHECK-NEXT: br label [[MERGE:%.*]]
72+
; CHECK: merge:
73+
; CHECK-NEXT: ret i64 undef
74+
75+
; CHECK-LABEL: @zoo.4(
76+
; CHECK-NEXT: entry:
77+
; CHECK-NEXT: br label [[MINUS:%.*]]
78+
; CHECK: minus:
79+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4))
80+
; CHECK-NEXT: br label [[MERGE:%.*]]
81+
; CHECK: merge:
82+
; CHECK-NEXT: ret i64 undef
83+

llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
1+
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
22

33
; Checks for callsites that have been annotated with MinSize. We only expect
44
; specialisation for the call that does not have the attribute:

llvm/test/Transforms/FunctionSpecialization/function-specialization.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
2-
; RUN: opt -passes="ipsccp<no-func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s --check-prefix=NOFSPEC
1+
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
2+
; RUN: opt -passes="ipsccp<no-func-spec>" -force-specialization -S < %s | FileCheck %s --check-prefix=NOFSPEC
33

44
define i64 @main(i64 %x, i1 %flag) {
55
;

llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll

Lines changed: 0 additions & 88 deletions
This file was deleted.

llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
1+
; RUN: opt -S --passes="ipsccp<func-spec>" -force-specialization < %s | FileCheck %s
22
define dso_local i32 @p0(i32 noundef %x) {
33
entry:
44
%add = add nsw i32 %x, 1

llvm/test/Transforms/FunctionSpecialization/global-rank.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 < %s | FileCheck %s
1+
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization < %s | FileCheck %s
2+
23
define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
34
entry:
45
%call = tail call i32 %p(i32 noundef %x)

llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ define i64 @main(i64 %x, i64 %y, i1 %flag) {
66
; CHECK-NEXT: entry:
77
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
88
; CHECK: plus:
9-
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
9+
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 42, ptr @plus, ptr @minus)
1010
; CHECK-NEXT: br label [[MERGE:%.*]]
1111
; CHECK: minus:
12-
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
12+
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y:%.*]], ptr @minus, ptr @plus)
1313
; CHECK-NEXT: br label [[MERGE]]
1414
; CHECK: merge:
1515
; CHECK-NEXT: [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ]
@@ -20,7 +20,7 @@ entry:
2020
br i1 %flag, label %plus, label %minus
2121

2222
plus:
23-
%cmp0 = call i64 @compute(i64 %x, i64 %y, ptr @plus, ptr @minus)
23+
%cmp0 = call i64 @compute(i64 %x, i64 42, ptr @plus, ptr @minus)
2424
br label %merge
2525

2626
minus:
@@ -68,9 +68,9 @@ entry:
6868

6969
; CHECK-LABEL: @compute.2
7070
; CHECK-NEXT: entry:
71-
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
72-
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
73-
; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
71+
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 42)
72+
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 42)
73+
; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 42, ptr @plus, ptr @plus)
7474

7575
; CHECK-LABEL: @compute.3
7676
; CHECK-NEXT: entry:

0 commit comments

Comments
 (0)