Skip to content

Commit f786916

Browse files
authored
[CodeGen][NPM] Support CodeGenSCCOrder in pipeline (#136818)
Wrap passes into Post order CGSCC pass manager in codegen pass builder. I am adding the pipeline test in this but it is not yet complete.
1 parent f7a0922 commit f786916

File tree

3 files changed

+96
-21
lines changed

3 files changed

+96
-21
lines changed

llvm/include/llvm/Passes/CodeGenPassBuilder.h

Lines changed: 78 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/StringRef.h"
1919
#include "llvm/Analysis/AliasAnalysis.h"
2020
#include "llvm/Analysis/BasicAliasAnalysis.h"
21+
#include "llvm/Analysis/CGSCCPassManager.h"
2122
#include "llvm/Analysis/ProfileSummaryInfo.h"
2223
#include "llvm/Analysis/ScopedNoAliasAA.h"
2324
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -210,10 +211,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
210211
class AddIRPass {
211212
public:
212213
AddIRPass(ModulePassManager &MPM, const DerivedT &PB) : MPM(MPM), PB(PB) {}
213-
~AddIRPass() {
214-
if (!FPM.isEmpty())
215-
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
216-
}
214+
~AddIRPass() { flushFPMToMPM(); }
217215

218216
template <typename PassT>
219217
void operator()(PassT &&Pass, StringRef Name = PassT::name()) {
@@ -231,16 +229,40 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
231229
FPM.addPass(std::forward<PassT>(Pass));
232230
} else {
233231
// Add Module Pass
234-
if (!FPM.isEmpty()) {
235-
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
236-
FPM = FunctionPassManager();
237-
}
238-
232+
flushFPMToMPM();
239233
MPM.addPass(std::forward<PassT>(Pass));
240234
}
241235
}
242236

237+
/// Setting this will add passes to the CGSCC pass manager.
238+
void requireCGSCCOrder() {
239+
if (PB.AddInCGSCCOrder)
240+
return;
241+
flushFPMToMPM();
242+
PB.AddInCGSCCOrder = true;
243+
}
244+
245+
/// Stop adding passes to the CGSCC pass manager.
246+
/// Existing passes won't be removed.
247+
void stopAddingInCGSCCOrder() {
248+
if (!PB.AddInCGSCCOrder)
249+
return;
250+
flushFPMToMPM();
251+
PB.AddInCGSCCOrder = false;
252+
}
253+
243254
private:
255+
void flushFPMToMPM() {
256+
if (FPM.isEmpty())
257+
return;
258+
if (PB.AddInCGSCCOrder) {
259+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
260+
createCGSCCToFunctionPassAdaptor(std::move(FPM))));
261+
} else {
262+
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
263+
}
264+
FPM = FunctionPassManager();
265+
}
244266
ModulePassManager &MPM;
245267
FunctionPassManager FPM;
246268
const DerivedT &PB;
@@ -252,13 +274,17 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
252274
AddMachinePass(ModulePassManager &MPM, const DerivedT &PB)
253275
: MPM(MPM), PB(PB) {}
254276
~AddMachinePass() {
255-
if (!MFPM.isEmpty()) {
256-
FunctionPassManager FPM;
257-
FPM.addPass(
258-
createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
259-
FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>());
277+
if (MFPM.isEmpty())
278+
return;
279+
280+
FunctionPassManager FPM;
281+
FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
282+
FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>());
283+
if (this->PB.AddInCGSCCOrder) {
284+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
285+
createCGSCCToFunctionPassAdaptor(std::move(FPM))));
286+
} else
260287
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
261-
}
262288
}
263289

264290
template <typename PassT>
@@ -276,20 +302,47 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
276302
MFPM.addPass(std::forward<PassT>(Pass));
277303
} else {
278304
// Add Module Pass
279-
if (!MFPM.isEmpty()) {
280-
MPM.addPass(createModuleToFunctionPassAdaptor(
281-
createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
282-
MFPM = MachineFunctionPassManager();
283-
}
284-
305+
flushMFPMToMPM();
285306
MPM.addPass(std::forward<PassT>(Pass));
286307
}
287308

288309
for (auto &C : PB.AfterCallbacks)
289310
C(Name, MFPM);
290311
}
291312

313+
/// Setting this will add passes to the CGSCC pass manager.
314+
void requireCGSCCOrder() {
315+
if (PB.AddInCGSCCOrder)
316+
return;
317+
flushMFPMToMPM();
318+
PB.AddInCGSCCOrder = true;
319+
}
320+
321+
/// Stop adding passes to the CGSCC pass manager.
322+
/// Existing passes won't be removed.
323+
void stopAddingInCGSCCOrder() {
324+
if (!PB.AddInCGSCCOrder)
325+
return;
326+
flushMFPMToMPM();
327+
PB.AddInCGSCCOrder = false;
328+
}
329+
292330
private:
331+
void flushMFPMToMPM() {
332+
if (MFPM.isEmpty())
333+
return;
334+
335+
if (PB.AddInCGSCCOrder) {
336+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
337+
createCGSCCToFunctionPassAdaptor(
338+
createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)))));
339+
} else {
340+
MPM.addPass(createModuleToFunctionPassAdaptor(
341+
createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
342+
}
343+
MFPM = MachineFunctionPassManager();
344+
}
345+
293346
ModulePassManager &MPM;
294347
MachineFunctionPassManager MFPM;
295348
const DerivedT &PB;
@@ -555,6 +608,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
555608
/// Helper variable for `-start-before/-start-after/-stop-before/-stop-after`
556609
mutable bool Started = true;
557610
mutable bool Stopped = true;
611+
mutable bool AddInCGSCCOrder = false;
558612
};
559613

560614
template <typename Derived, typename TargetMachineT>
@@ -813,6 +867,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addISelPrepare(
813867
AddIRPass &addPass) const {
814868
derived().addPreISel(addPass);
815869

870+
if (Opt.RequiresCodeGenSCCOrder)
871+
addPass.requireCGSCCOrder();
872+
816873
addPass(CallBrPreparePass());
817874
// Add both the safe stack and the stack protection passes: each of them will
818875
// only protect functions that have corresponding attributes.

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2099,6 +2099,8 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
20992099
// being run on them, which causes crashes in the resource usage analysis).
21002100
addPass(AMDGPULowerBufferFatPointersPass(TM));
21012101

2102+
addPass.requireCGSCCOrder();
2103+
21022104
Base::addCodeGenPrepare(addPass);
21032105

21042106
if (isPassEnabled(EnableLoadStoreVectorizer))
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
; RUN: llc -enable-new-pm -mtriple=amdgcn--amdhsa -O0 -print-pipeline-passes < %s 2>&1 \
2+
; RUN: | FileCheck -check-prefix=GCN-O0 %s
3+
4+
; RUN: llc -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \
5+
; RUN: | FileCheck -check-prefix=GCN-O2 %s
6+
7+
; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \
8+
; RUN: | FileCheck -check-prefix=GCN-O3 %s
9+
10+
; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
11+
; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,require<machine-loops>,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
12+
; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,require<machine-loops>,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
13+
14+
define void @empty() {
15+
ret void
16+
}

0 commit comments

Comments
 (0)