Skip to content

Commit 838701a

Browse files
MemProf: Add minimum count threshold for inlining of promoted calls (#148001)
Allow users to set the minimum absolute count for inlining of indirect calls promoted during cloning. This is primarily meant to enable generation of synthetic vp metadata introduced in PR141164 when profiling memprof-optimized binaries.
1 parent 74a6e5c commit 838701a

File tree

2 files changed

+60
-7
lines changed

2 files changed

+60
-7
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,12 @@ static cl::opt<bool> AllowRecursiveContexts(
181181
"memprof-allow-recursive-contexts", cl::init(true), cl::Hidden,
182182
cl::desc("Allow cloning of contexts having recursive cycles"));
183183

184+
// Set the minimum absolute count threshold for allowing inlining of indirect
185+
// calls promoted during cloning.
186+
static cl::opt<unsigned> MemProfICPNoInlineThreshold(
187+
"memprof-icp-noinline-threshold", cl::init(2), cl::Hidden,
188+
cl::desc("Minimum absolute count for promoted target to be inlinable"));
189+
184190
namespace llvm {
185191
cl::opt<bool> EnableMemProfContextDisambiguation(
186192
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
@@ -5573,6 +5579,15 @@ void MemProfContextDisambiguation::performICP(
55735579
.getCallee());
55745580
}
55755581
DirectCall.setCalledFunction(TargetToUse);
5582+
// During matching we generate synthetic VP metadata for indirect calls
5583+
// not already having any, from the memprof profile's callee GUIDs. If
5584+
// we subsequently promote and inline those callees, we currently lose
5585+
// the ability to generate this synthetic VP metadata. Optionally apply
5586+
// a noinline attribute to promoted direct calls, where the threshold is
5587+
// set to capture synthetic VP metadata targets which get a count of 1.
5588+
if (MemProfICPNoInlineThreshold &&
5589+
Candidate.Count < MemProfICPNoInlineThreshold)
5590+
DirectCall.setIsNoInline();
55765591
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
55775592
<< ore::NV("Call", CBClone) << " in clone "
55785593
<< ore::NV("Caller", CBClone->getFunction())

llvm/test/ThinLTO/X86/memprof-icp-recursive.ll

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,40 @@
5454
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
5555
; RUN: --check-prefix=REMARKS
5656

57-
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR
57+
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-INLINE
58+
59+
;; Next, add a threshold to prevent inlining of the promoted calls which have
60+
;; count 2 (the default threshold of 2 means they are inlinable by default).
61+
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
62+
; RUN: -memprof-icp-noinline-threshold=3 \
63+
; RUN: -enable-memprof-indirect-call-support=true \
64+
; RUN: -memprof-allow-recursive-callsites \
65+
; RUN: -supports-hot-cold-new \
66+
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
67+
; RUN: -r=%t/foo.o,_ZN2B03barEj, \
68+
; RUN: -r=%t/foo.o,_ZN1B3barEj, \
69+
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
70+
; RUN: -r=%t/main.o,_Znwm, \
71+
; RUN: -r=%t/main.o,_ZdlPvm, \
72+
; RUN: -r=%t/main.o,_Z8externalPi, \
73+
; RUN: -r=%t/main.o,main,plx \
74+
; RUN: -r=%t/main.o,_ZN2B03barEj,plx \
75+
; RUN: -r=%t/main.o,_ZN1B3barEj,plx \
76+
; RUN: -r=%t/main.o,_ZTV1B,plx \
77+
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
78+
; RUN: -r=%t/main.o,_ZTS1B,plx \
79+
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
80+
; RUN: -r=%t/main.o,_ZTS2B0,plx \
81+
; RUN: -r=%t/main.o,_ZTI2B0,plx \
82+
; RUN: -r=%t/main.o,_ZTI1B,plx \
83+
; RUN: -r=%t/main.o,_ZTV2B0,plx \
84+
; RUN: -thinlto-threads=1 \
85+
; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \
86+
; RUN: -pass-remarks=. -save-temps \
87+
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
88+
; RUN: --check-prefix=REMARKS
89+
90+
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-NOINLINE
5891

5992
; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
6093
; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
@@ -98,12 +131,14 @@
98131
; IR: %[[R1:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
99132
; IR: br i1 %[[R1]], label %if.true.direct_targ, label %if.false.orig_indirect
100133
; IR: if.true.direct_targ:
101-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
134+
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
135+
; IR-NOINLINE: call {{.*}} @_ZN1B3barEj(ptr null, i32 0) #[[NOINLINE:[0-9]+]]
102136
; IR: if.false.orig_indirect:
103137
; IR: %[[R2:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
104138
; IR: br i1 %[[R2]], label %if.true.direct_targ1, label %if.false.orig_indirect2
105139
; IR: if.true.direct_targ1:
106-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
140+
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
141+
; IR-NOINLINE: call {{.*}} @_ZN2B03barEj(ptr null, i32 0) #[[NOINLINE]]
107142
; IR: if.false.orig_indirect2:
108143
; IR: call {{.*}} %0
109144

@@ -114,17 +149,20 @@
114149
; IR: %[[R3:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
115150
; IR: br i1 %[[R3]], label %if.true.direct_targ, label %if.false.orig_indirect
116151
; IR: if.true.direct_targ:
117-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
152+
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
153+
; IR-NOINLINE: call {{.*}} @_ZN1B3barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
118154
; IR: if.false.orig_indirect:
119155
; IR: %[[R4:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
120156
; IR: br i1 %[[R4]], label %if.true.direct_targ1, label %if.false.orig_indirect2
121157
; IR: if.true.direct_targ1:
122-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
158+
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
159+
; IR-NOINLINE: call {{.*}} @_ZN2B03barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
123160
; IR: if.false.orig_indirect2:
124161
; IR: call {{.*}} %0
125162

126-
; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
127-
; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold"
163+
; IR-INLINE: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
164+
; IR-INLINE: attributes #[[COLD]] = {{.*}} "memprof"="cold"
165+
; IR-NOINLINE: attributes #[[NOINLINE]] = { noinline }
128166

129167
;--- foo.ll
130168
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"

0 commit comments

Comments
 (0)