|
37 | 37 | #include "llvm/IR/Dominators.h"
|
38 | 38 | #include "llvm/IR/GetElementPtrTypeIterator.h"
|
39 | 39 | #include "llvm/IR/GlobalAlias.h"
|
| 40 | +#include "llvm/IR/InlineAsm.h" |
40 | 41 | #include "llvm/IR/InstVisitor.h"
|
41 | 42 | #include "llvm/IR/IntrinsicInst.h"
|
42 | 43 | #include "llvm/IR/Operator.h"
|
@@ -141,6 +142,10 @@ static cl::opt<int>
|
141 | 142 | InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
|
142 | 143 | cl::desc("Cost of a single instruction when inlining"));
|
143 | 144 |
|
| 145 | +static cl::opt<int> InlineAsmInstrCost( |
| 146 | + "inline-asm-instr-cost", cl::Hidden, cl::init(0), |
| 147 | + cl::desc("Cost of a single inline asm instruction when inlining")); |
| 148 | + |
144 | 149 | static cl::opt<int>
|
145 | 150 | MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
|
146 | 151 | cl::desc("Cost of load/store instruction when inlining"));
|
@@ -351,6 +356,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
351 | 356 | /// for.
|
352 | 357 | virtual void onMissedSimplification() {}
|
353 | 358 |
|
| 359 | + /// Account for inline assembly instructions. |
| 360 | + virtual void onInlineAsm(const InlineAsm &Arg) {} |
| 361 | + |
354 | 362 | /// Start accounting potential benefits due to SROA for the given alloca.
|
355 | 363 | virtual void onInitializeSROAArg(AllocaInst *Arg) {}
|
356 | 364 |
|
@@ -382,6 +390,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
382 | 390 | /// Number of bytes allocated statically by the callee.
|
383 | 391 | uint64_t AllocatedSize = 0;
|
384 | 392 | unsigned NumInstructions = 0;
|
| 393 | + unsigned NumInlineAsmInstructions = 0; |
385 | 394 | unsigned NumVectorInstructions = 0;
|
386 | 395 |
|
387 | 396 | /// While we walk the potentially-inlined instructions, we build up and
|
@@ -777,6 +786,48 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
|
777 | 786 |
|
778 | 787 | addCost(SwitchCost);
|
779 | 788 | }
|
| 789 | + |
| 790 | + // Parses the inline assembly argument to account for its cost. Inline |
| 791 | + // assembly instructions incur higher costs for inlining since they cannot be |
| 792 | + // analyzed and optimized. |
| 793 | + void onInlineAsm(const InlineAsm &Arg) override { |
| 794 | + if (!InlineAsmInstrCost) |
| 795 | + return; |
| 796 | + SmallVector<StringRef, 4> AsmStrs; |
| 797 | + Arg.collectAsmStrs(AsmStrs); |
| 798 | + int SectionLevel = 0; |
| 799 | + int InlineAsmInstrCount = 0; |
| 800 | + for (StringRef AsmStr : AsmStrs) { |
| 801 | + // Trim whitespaces and comments. |
| 802 | + StringRef Trimmed = AsmStr.trim(); |
| 803 | + size_t hashPos = Trimmed.find('#'); |
| 804 | + if (hashPos != StringRef::npos) |
| 805 | + Trimmed = Trimmed.substr(0, hashPos); |
| 806 | + // Ignore comments. |
| 807 | + if (Trimmed.empty()) |
| 808 | + continue; |
| 809 | + // Filter out the outlined assembly instructions from the cost by keeping |
| 810 | + // track of the section level and only accounting for instrutions at |
| 811 | + // section level of zero. Note there will be duplication in outlined |
| 812 | + // sections too, but is not accounted in the inlining cost model. |
| 813 | + if (Trimmed.starts_with(".pushsection")) { |
| 814 | + ++SectionLevel; |
| 815 | + continue; |
| 816 | + } |
| 817 | + if (Trimmed.starts_with(".popsection")) { |
| 818 | + --SectionLevel; |
| 819 | + continue; |
| 820 | + } |
| 821 | + // Ignore directives and labels. |
| 822 | + if (Trimmed.starts_with(".") || Trimmed.contains(":")) |
| 823 | + continue; |
| 824 | + if (SectionLevel == 0) |
| 825 | + ++InlineAsmInstrCount; |
| 826 | + } |
| 827 | + NumInlineAsmInstructions += InlineAsmInstrCount; |
| 828 | + addCost(InlineAsmInstrCount * InlineAsmInstrCost); |
| 829 | + } |
| 830 | + |
780 | 831 | void onMissedSimplification() override { addCost(InstrCost); }
|
781 | 832 |
|
782 | 833 | void onInitializeSROAArg(AllocaInst *Arg) override {
|
@@ -2420,6 +2471,9 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
|
2420 | 2471 | if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
|
2421 | 2472 | ContainsNoDuplicateCall = true;
|
2422 | 2473 |
|
| 2474 | + if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand())) |
| 2475 | + onInlineAsm(*InlineAsmOp); |
| 2476 | + |
2423 | 2477 | Function *F = Call.getCalledFunction();
|
2424 | 2478 | bool IsIndirectCall = !F;
|
2425 | 2479 | if (IsIndirectCall) {
|
@@ -3005,6 +3059,7 @@ void InlineCostCallAnalyzer::print(raw_ostream &OS) {
|
3005 | 3059 | DEBUG_PRINT_STAT(NumConstantPtrDiffs);
|
3006 | 3060 | DEBUG_PRINT_STAT(NumInstructionsSimplified);
|
3007 | 3061 | DEBUG_PRINT_STAT(NumInstructions);
|
| 3062 | + DEBUG_PRINT_STAT(NumInlineAsmInstructions); |
3008 | 3063 | DEBUG_PRINT_STAT(SROACostSavings);
|
3009 | 3064 | DEBUG_PRINT_STAT(SROACostSavingsLost);
|
3010 | 3065 | DEBUG_PRINT_STAT(LoadEliminationCost);
|
|
0 commit comments