Skip to content

Commit cd9236d

Browse files
authored
Account for inline assembly instructions in inlining cost. (#146628)
Inliner currently treats every "call asm" IR instruction as a single instruction regardless of how many instructions the inline assembly may contain. This may underestimate the cost of inlining for a callee containing long inline assembly. Besides, we may need to assign a higher cost to instructions in inline assembly since they cannot be analyzed and optimized by the compiler. This PR introduces a new option `-inline-asm-instr-cost` -- set zero by default, which can control the cost of inline assembly instructions in inliner's cost-benefit analysis.
1 parent a63846b commit cd9236d

File tree

2 files changed

+90
-0
lines changed

2 files changed

+90
-0
lines changed

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "llvm/IR/Dominators.h"
3838
#include "llvm/IR/GetElementPtrTypeIterator.h"
3939
#include "llvm/IR/GlobalAlias.h"
40+
#include "llvm/IR/InlineAsm.h"
4041
#include "llvm/IR/InstVisitor.h"
4142
#include "llvm/IR/IntrinsicInst.h"
4243
#include "llvm/IR/Operator.h"
@@ -141,6 +142,10 @@ static cl::opt<int>
141142
InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
142143
cl::desc("Cost of a single instruction when inlining"));
143144

145+
static cl::opt<int> InlineAsmInstrCost(
146+
"inline-asm-instr-cost", cl::Hidden, cl::init(0),
147+
cl::desc("Cost of a single inline asm instruction when inlining"));
148+
144149
static cl::opt<int>
145150
MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
146151
cl::desc("Cost of load/store instruction when inlining"));
@@ -351,6 +356,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
351356
/// for.
352357
virtual void onMissedSimplification() {}
353358

359+
/// Account for inline assembly instructions.
360+
virtual void onInlineAsm(const InlineAsm &Arg) {}
361+
354362
/// Start accounting potential benefits due to SROA for the given alloca.
355363
virtual void onInitializeSROAArg(AllocaInst *Arg) {}
356364

@@ -382,6 +390,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
382390
/// Number of bytes allocated statically by the callee.
383391
uint64_t AllocatedSize = 0;
384392
unsigned NumInstructions = 0;
393+
unsigned NumInlineAsmInstructions = 0;
385394
unsigned NumVectorInstructions = 0;
386395

387396
/// While we walk the potentially-inlined instructions, we build up and
@@ -777,6 +786,48 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
777786

778787
addCost(SwitchCost);
779788
}
789+
790+
// Parses the inline assembly argument to account for its cost. Inline
791+
// assembly instructions incur higher costs for inlining since they cannot be
792+
// analyzed and optimized.
793+
void onInlineAsm(const InlineAsm &Arg) override {
794+
if (!InlineAsmInstrCost)
795+
return;
796+
SmallVector<StringRef, 4> AsmStrs;
797+
Arg.collectAsmStrs(AsmStrs);
798+
int SectionLevel = 0;
799+
int InlineAsmInstrCount = 0;
800+
for (StringRef AsmStr : AsmStrs) {
801+
// Trim whitespaces and comments.
802+
StringRef Trimmed = AsmStr.trim();
803+
size_t hashPos = Trimmed.find('#');
804+
if (hashPos != StringRef::npos)
805+
Trimmed = Trimmed.substr(0, hashPos);
806+
// Ignore comments.
807+
if (Trimmed.empty())
808+
continue;
809+
// Filter out the outlined assembly instructions from the cost by keeping
810+
// track of the section level and only accounting for instrutions at
811+
// section level of zero. Note there will be duplication in outlined
812+
// sections too, but is not accounted in the inlining cost model.
813+
if (Trimmed.starts_with(".pushsection")) {
814+
++SectionLevel;
815+
continue;
816+
}
817+
if (Trimmed.starts_with(".popsection")) {
818+
--SectionLevel;
819+
continue;
820+
}
821+
// Ignore directives and labels.
822+
if (Trimmed.starts_with(".") || Trimmed.contains(":"))
823+
continue;
824+
if (SectionLevel == 0)
825+
++InlineAsmInstrCount;
826+
}
827+
NumInlineAsmInstructions += InlineAsmInstrCount;
828+
addCost(InlineAsmInstrCount * InlineAsmInstrCost);
829+
}
830+
780831
void onMissedSimplification() override { addCost(InstrCost); }
781832

782833
void onInitializeSROAArg(AllocaInst *Arg) override {
@@ -2420,6 +2471,9 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
24202471
if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
24212472
ContainsNoDuplicateCall = true;
24222473

2474+
if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand()))
2475+
onInlineAsm(*InlineAsmOp);
2476+
24232477
Function *F = Call.getCalledFunction();
24242478
bool IsIndirectCall = !F;
24252479
if (IsIndirectCall) {
@@ -3005,6 +3059,7 @@ void InlineCostCallAnalyzer::print(raw_ostream &OS) {
30053059
DEBUG_PRINT_STAT(NumConstantPtrDiffs);
30063060
DEBUG_PRINT_STAT(NumInstructionsSimplified);
30073061
DEBUG_PRINT_STAT(NumInstructions);
3062+
DEBUG_PRINT_STAT(NumInlineAsmInstructions);
30083063
DEBUG_PRINT_STAT(SROACostSavings);
30093064
DEBUG_PRINT_STAT(SROACostSavingsLost);
30103065
DEBUG_PRINT_STAT(LoadEliminationCost);
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
;; Test to verify that when callee has inline assembly, bumping up `-inline-asm-instr-cost` would block inlining.
2+
3+
; RUN: opt < %s -passes=inline -S | FileCheck %s --check-prefixes=CHECK,INLINE
4+
; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s --check-prefixes=CHECK,INLINE
5+
6+
;; Verify that a low assembly instruction cost of 150 does not block inlining.
7+
;; This test also verifies that the outlined section's instructions (in "other"
8+
;; section) do not contribute to the cost.
9+
; RUN: opt < %s -passes=inline -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE
10+
; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE
11+
12+
;; Verify that an assembly instruction cost of 300 blocks inlining.
13+
; RUN: opt < %s -passes=inline -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE
14+
; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE
15+
16+
define void @caller(i32 %a, i1 %b) #0 {
17+
call void @callee(i32 %a, i1 %b)
18+
ret void
19+
}
20+
21+
; CHECK: define void @caller
22+
; INLINE: call void asm
23+
; NOINLINE: call void @callee
24+
25+
26+
;; callee function with asm call with two real assembly instructions in the
27+
;; destination section and two assembly instructions in the outlined "other"
28+
;; section.
29+
define void @callee(i32 %a, i1 %b) {
30+
call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""()
31+
ret void
32+
}
33+
; CHECK: define void @callee
34+
35+

0 commit comments

Comments
 (0)