From 84cd819bc849ab2d41b97c34994e44a5742dab55 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Fri, 14 Mar 2025 10:31:50 +0800 Subject: [PATCH] [Coroutines] Conditional elide coroutines based on hos/cold information --- .../Coroutines/CoroAnnotationElide.cpp | 77 ++++++++++++++++++ llvm/lib/Transforms/IPO/PartialInlining.cpp | 2 +- .../Coroutines/coro-conditional-elide.ll | 79 +++++++++++++++++++ 3 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/Coroutines/coro-conditional-elide.ll diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp index 9115946d205a4..d4d0c0f0895bb 100644 --- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp @@ -24,6 +24,9 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -33,6 +36,49 @@ using namespace llvm; #define DEBUG_TYPE "coro-annotation-elide" +static cl::opt CoroElideBranchRatio( + "coro-elide-branch-ratio", cl::init(0.55), cl::Hidden, + cl::desc("Minimum BranchProbability to consider a elide a coroutine.")); +extern cl::opt MinBlockCounterExecution; + +static cl::opt + PrintElidedCoroutine("print-elided-coroutine-stats", cl::init(false), + cl::Hidden, + cl::desc("Print stats for elided coroutine")); + +static cl::opt + ElideStatOutput("coro-elide-stat-output", cl::init(""), cl::Hidden, + cl::desc("Output file for -print-elided-coroutine-stats. " + "Defaults to standard error output.")); + +// The return value is used to indicate the owner of the resources. The users +// should use the output parameter. +static std::unique_ptr +getCoroElidedStatsOStream(llvm::raw_ostream *&OS) { + if (!PrintElidedCoroutine) { + OS = &llvm::nulls(); + return nullptr; + } + + if (ElideStatOutput.empty()) { + OS = &llvm::errs(); + return nullptr; + } + + std::error_code EC; + auto ret = std::make_unique(ElideStatOutput, EC, + sys::fs::OF_Append); + + if (EC) { + llvm::errs() << "llvm cannot open file: " << EC.message() << "\n"; + OS = &llvm::nulls(); + return nullptr; + } + + OS = ret.get(); + return ret; +} + static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) { for (Instruction &I : F->getEntryBlock()) if (!isa(&I)) @@ -145,6 +191,37 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine(); bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe); if (IsCallerPresplitCoroutine && HasAttr) { + + llvm::raw_ostream *OS = nullptr; + auto _ = getCoroElidedStatsOStream(OS); + assert(OS && "At least we should able to get access to standard error"); + + auto &BFI = FAM.getResult(*Caller); + if (BFI.getBlockFreq(CB->getParent()) < + BFI.getEntryFreq()) { + static BranchProbability MinBranchProbability( + static_cast(CoroElideBranchRatio * MinBlockCounterExecution), + MinBlockCounterExecution); + + auto Prob = BranchProbability::getBranchProbability( + BFI.getBlockFreq(CB->getParent()).getFrequency(), + BFI.getEntryFreq().getFrequency()); + + if (Prob < MinBranchProbability) { + *OS << "Not eliding " << *CB + << " with estimated probability: " << Prob << "\n"; + continue; + } + + *OS << "BB Prob: \t" << Prob << "\n"; + } else { + *OS << "BB Freq: \t" + << BFI.getBlockFreq(CB->getParent()).getFrequency() << "\n"; + *OS << "Entry Freq: \t" << BFI.getEntryFreq().getFrequency() << "\n"; + } + + *OS << "eliding " << *CB << "\n"; + auto *CallerN = CG.lookup(*Caller); auto *CallerC = CallerN ? CG.lookupSCC(*CallerN) : nullptr; // If CallerC is nullptr, it means LazyCallGraph hasn't visited Caller diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 2583249e65484..1a00d173d3ae0 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -109,7 +109,7 @@ static cl::opt MinRegionSizeRatio( "outline candidate and original function")); // Used to tune the minimum number of execution counts needed in the predecessor // block to the cold edge. ie. confidence interval. -static cl::opt +cl::opt MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid")); diff --git a/llvm/test/Transforms/Coroutines/coro-conditional-elide.ll b/llvm/test/Transforms/Coroutines/coro-conditional-elide.ll new file mode 100644 index 0000000000000..04c5bf0494278 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-conditional-elide.ll @@ -0,0 +1,79 @@ +; Testing elide performed its job for calls to coroutines marked safe. +; RUN: opt < %s -S -passes='cgscc(coro-annotation-elide)' -coro-elide-branch-ratio=0.55 | FileCheck %s + +%struct.Task = type { ptr } + +declare void @print(i32) nounwind + +; resume part of the coroutine +define fastcc void @callee.resume(ptr dereferenceable(1)) { + tail call void @print(i32 0) + ret void +} + +; destroy part of the coroutine +define fastcc void @callee.destroy(ptr) { + tail call void @print(i32 1) + ret void +} + +; cleanup part of the coroutine +define fastcc void @callee.cleanup(ptr) { + tail call void @print(i32 2) + ret void +} + +@callee.resumers = internal constant [3 x ptr] [ + ptr @callee.resume, ptr @callee.destroy, ptr @callee.cleanup] + +declare void @alloc(i1) nounwind + +; CHECK-LABEL: define ptr @callee +define ptr @callee(i8 %arg) { +entry: + %task = alloca %struct.Task, align 8 + %id = call token @llvm.coro.id(i32 0, ptr null, + ptr @callee, + ptr @callee.resumers) + %alloc = call i1 @llvm.coro.alloc(token %id) + %hdl = call ptr @llvm.coro.begin(token %id, ptr null) + store ptr %hdl, ptr %task + ret ptr %task +} + +; CHECK-LABEL: define ptr @callee.noalloc +define ptr @callee.noalloc(i8 %arg, ptr dereferenceable(32) align(8) %frame) { + entry: + %task = alloca %struct.Task, align 8 + %id = call token @llvm.coro.id(i32 0, ptr null, + ptr @callee, + ptr @callee.resumers) + %hdl = call ptr @llvm.coro.begin(token %id, ptr null) + store ptr %hdl, ptr %task + ret ptr %task +} + +; CHECK-LABEL: define ptr @caller(i1 %cond) +; Function Attrs: presplitcoroutine +define ptr @caller(i1 %cond) #0 { +entry: + br i1 %cond, label %call, label %ret + +call: + %task = call ptr @callee(i8 0) #1 + br label %ret + +ret: + %retval = phi ptr [ %task, %call ], [ null, %entry ] + ret ptr %retval + ; CHECK-NOT: alloca +} + +declare token @llvm.coro.id(i32, ptr, ptr, ptr) +declare ptr @llvm.coro.begin(token, ptr) +declare ptr @llvm.coro.frame() +declare ptr @llvm.coro.subfn.addr(ptr, i8) +declare i1 @llvm.coro.alloc(token) + +attributes #0 = { presplitcoroutine } +attributes #1 = { coro_elide_safe }