From 73ad1e3cb967f03926f450249dfc2167e92aef66 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 23 Apr 2025 06:38:10 +0000 Subject: [PATCH 1/7] [CodeGen][NPM] Support CodeGenSCCOrder in pipeline pb/codegenscc-order --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 89 +++++++++-- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 + llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 144 ++++++++++++++++++ 3 files changed, 219 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 6ed9ac47405d3..67f4a36511c5b 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -210,10 +211,7 @@ template class CodeGenPassBuilder { class AddIRPass { public: AddIRPass(ModulePassManager &MPM, const DerivedT &PB) : MPM(MPM), PB(PB) {} - ~AddIRPass() { - if (!FPM.isEmpty()) - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } + ~AddIRPass() { flushFPMToMPM(); } template void operator()(PassT &&Pass, StringRef Name = PassT::name()) { @@ -231,16 +229,40 @@ template class CodeGenPassBuilder { FPM.addPass(std::forward(Pass)); } else { // Add Module Pass - if (!FPM.isEmpty()) { - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - FPM = FunctionPassManager(); - } - + flushFPMToMPM(); MPM.addPass(std::forward(Pass)); } } + /// Setting this will add passes to the CGSCC pass manager. + void requireCGSCCOrder() { + if (PB.AddInCGSCCOrder) + return; + flushFPMToMPM(); + PB.AddInCGSCCOrder = true; + } + + /// Stop adding passes to the CGSCC pass manager. + /// Existing passes won't be removed. + void stopAddingInCGSCCOrder() { + if (!PB.AddInCGSCCOrder) + return; + flushFPMToMPM(); + PB.AddInCGSCCOrder = false; + } + private: + void flushFPMToMPM() { + if (!FPM.isEmpty()) { + if (PB.AddInCGSCCOrder) { + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createCGSCCToFunctionPassAdaptor(std::move(FPM)))); + } else { + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + FPM = FunctionPassManager(); + } + } ModulePassManager &MPM; FunctionPassManager FPM; const DerivedT &PB; @@ -257,7 +279,11 @@ template class CodeGenPassBuilder { FPM.addPass( createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))); FPM.addPass(InvalidateAnalysisPass()); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + if (this->PB.AddInCGSCCOrder) { + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createCGSCCToFunctionPassAdaptor(std::move(FPM)))); + } else + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } } @@ -276,12 +302,7 @@ template class CodeGenPassBuilder { MFPM.addPass(std::forward(Pass)); } else { // Add Module Pass - if (!MFPM.isEmpty()) { - MPM.addPass(createModuleToFunctionPassAdaptor( - createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)))); - MFPM = MachineFunctionPassManager(); - } - + flushMFPMToMPM(); MPM.addPass(std::forward(Pass)); } @@ -289,7 +310,39 @@ template class CodeGenPassBuilder { C(Name, MFPM); } + /// Setting this will add passes to the CGSCC pass manager. + void requireCGSCCOrder() { + if (PB.AddInCGSCCOrder) + return; + flushMFPMToMPM(); + PB.AddInCGSCCOrder = true; + } + + /// Stop adding passes to the CGSCC pass manager. + /// Existing passes won't be removed. + void stopAddingInCGSCCOrder() { + if (!PB.AddInCGSCCOrder) + return; + flushMFPMToMPM(); + PB.AddInCGSCCOrder = false; + } + private: + void flushMFPMToMPM() { + if (!MFPM.isEmpty()) { + if (PB.AddInCGSCCOrder) { + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createCGSCCToFunctionPassAdaptor( + createFunctionToMachineFunctionPassAdaptor( + std::move(MFPM))))); + } else { + MPM.addPass(createModuleToFunctionPassAdaptor( + createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)))); + } + MFPM = MachineFunctionPassManager(); + } + } + ModulePassManager &MPM; MachineFunctionPassManager MFPM; const DerivedT &PB; @@ -555,6 +608,7 @@ template class CodeGenPassBuilder { /// Helper variable for `-start-before/-start-after/-stop-before/-stop-after` mutable bool Started = true; mutable bool Stopped = true; + mutable bool AddInCGSCCOrder = false; }; template @@ -813,6 +867,9 @@ void CodeGenPassBuilder::addISelPrepare( AddIRPass &addPass) const { derived().addPreISel(addPass); + if (Opt.RequiresCodeGenSCCOrder) + addPass.requireCGSCCOrder(); + addPass(CallBrPreparePass()); // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 88613cf5eb4cd..09b40c9173ff6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2097,6 +2097,8 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { // being run on them, which causes crashes in the resource usage analysis). addPass(AMDGPULowerBufferFatPointersPass(TM)); + addPass.requireCGSCCOrder(); + Base::addCodeGenPrepare(addPass); if (isPassEnabled(EnableLoadStoreVectorizer)) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll new file mode 100644 index 0000000000000..96a533a19c88a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -0,0 +1,144 @@ +; UNSUPPORTED: expensive_checks +; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -disable-verify -print-pipeline-passes < %s 2>&1 \ +; RUN: | tr ',' '\n' | FileCheck -check-prefix=GCN-O3 %s + +; REQUIRES: asserts + +; GCN-O3: require +; GCN-O3-NEXT: require +; GCN-O3-NEXT: require +; GCN-O3-NEXT: pre-isel-intrinsic-lowering +; GCN-O3-NEXT: function(expand-large-div-rem +; GCN-O3-NEXT: expand-fp) +; GCN-O3-NEXT: amdgpu-remove-incompatible-functions +; GCN-O3-NEXT: amdgpu-printf-runtime-binding +; GCN-O3-NEXT: amdgpu-lower-ctor-dtor +; GCN-O3-NEXT: function(amdgpu-image-intrinsic-opt) +; GCN-O3-NEXT: expand-variadics +; GCN-O3-NEXT: amdgpu-always-inline +; GCN-O3-NEXT: always-inline +; GCN-O3-NEXT: amdgpu-export-kernel-runtime-handles +; GCN-O3-NEXT: amdgpu-sw-lower-lds +; GCN-O3-NEXT: amdgpu-lower-module-lds +; GCN-O3-NEXT: function(infer-address-spaces +; GCN-O3-NEXT: amdgpu-atomic-optimizer +; GCN-O3-NEXT: atomic-expand +; GCN-O3-NEXT: amdgpu-promote-alloca +; GCN-O3-NEXT: separate-const-offset-from-gep<> +; GCN-O3-NEXT: slsr +; GCN-O3-NEXT: gvn<> +; GCN-O3-NEXT: nary-reassociate +; GCN-O3-NEXT: early-cse<> +; GCN-O3-NEXT: amdgpu-codegenprepare +; GCN-O3-NEXT: loop-mssa(loop-reduce) +; GCN-O3-NEXT: mergeicmps +; GCN-O3-NEXT: expand-memcmp +; GCN-O3-NEXT: gc-lowering +; GCN-O3-NEXT: lower-constant-intrinsics +; GCN-O3-NEXT: UnreachableBlockElimPass +; GCN-O3-NEXT: consthoist +; GCN-O3-NEXT: ReplaceWithVeclib +; GCN-O3-NEXT: partially-inline-libcalls +; GCN-O3-NEXT: ee-instrument +; GCN-O3-NEXT: scalarize-masked-mem-intrin +; GCN-O3-NEXT: ExpandReductionsPass +; GCN-O3-NEXT: gvn<> +; GCN-O3-NEXT: amdgpu-lower-kernel-arguments) +; GCN-O3-NEXT: amdgpu-lower-buffer-fat-pointers +; GCN-O3-NEXT: cgscc(function(codegenprepare +; GCN-O3-NEXT: load-store-vectorizer +; GCN-O3-NEXT: lower-switch +; GCN-O3-NEXT: lower-invoke +; GCN-O3-NEXT: UnreachableBlockElimPass +; GCN-O3-NEXT: flatten-cfg +; GCN-O3-NEXT: sink +; GCN-O3-NEXT: amdgpu-late-codegenprepare +; GCN-O3-NEXT: amdgpu-unify-divergent-exit-nodes +; GCN-O3-NEXT: fix-irreducible +; GCN-O3-NEXT: unify-loop-exits +; GCN-O3-NEXT: StructurizeCFGPass +; GCN-O3-NEXT: amdgpu-annotate-uniform +; GCN-O3-NEXT: si-annotate-control-flow +; GCN-O3-NEXT: amdgpu-rewrite-undef-for-phi +; GCN-O3-NEXT: lcssa)) +; GCN-O3-NEXT: amdgpu-perf-hint +; GCN-O3-NEXT: cgscc(function(require +; GCN-O3-NEXT: callbr-prepare +; GCN-O3-NEXT: safe-stack +; GCN-O3-NEXT: stack-protector)) +; GCN-O3-NEXT: cgscc(function(machine-function(amdgpu-isel +; GCN-O3-NEXT: si-fix-sgpr-copies +; GCN-O3-NEXT: si-i1-copies +; GCN-O3-NEXT: finalize-isel +; GCN-O3-NEXT: early-tailduplication +; GCN-O3-NEXT: opt-phis +; GCN-O3-NEXT: stack-coloring +; GCN-O3-NEXT: localstackalloc +; GCN-O3-NEXT: dead-mi-elimination +; GCN-O3-NEXT: early-machinelicm +; GCN-O3-NEXT: machine-cse +; GCN-O3-NEXT: machine-sink +; GCN-O3-NEXT: peephole-opt +; GCN-O3-NEXT: dead-mi-elimination +; GCN-O3-NEXT: si-fold-operands +; GCN-O3-NEXT: gcn-dpp-combine +; GCN-O3-NEXT: si-load-store-opt +; GCN-O3-NEXT: si-peephole-sdwa +; GCN-O3-NEXT: early-machinelicm +; GCN-O3-NEXT: machine-cse +; GCN-O3-NEXT: si-fold-operands +; GCN-O3-NEXT: dead-mi-elimination +; GCN-O3-NEXT: si-shrink-instructions +; GCN-O3-NEXT: detect-dead-lanes +; GCN-O3-NEXT: InitUndefPass +; GCN-O3-NEXT: ProcessImplicitDefsPass +; GCN-O3-NEXT: unreachable-mbb-elimination +; GCN-O3-NEXT: require +; GCN-O3-NEXT: require +; GCN-O3-NEXT: phi-node-elimination +; GCN-O3-NEXT: two-address-instruction +; GCN-O3-NEXT: register-coalescer +; GCN-O3-NEXT: rename-independent-subregs +; GCN-O3-NEXT: machine-scheduler +; GCN-O3-NEXT: greedy +; GCN-O3-NEXT: amdgpu-nsa-reassign +; GCN-O3-NEXT: VirtRegRewriterPass +; GCN-O3-NEXT: stack-slot-coloring +; GCN-O3-NEXT: machine-cp +; GCN-O3-NEXT: machinelicm +; GCN-O3-NEXT: si-fix-vgpr-copies +; GCN-O3-NEXT: si-optimize-exec-masking +; GCN-O3-NEXT: remove-redundant-debug-values +; GCN-O3-NEXT: fixup-statepoint-caller-saved +; GCN-O3-NEXT: PostRAMachineSinkingPass +; GCN-O3-NEXT: ShrinkWrapPass +; GCN-O3-NEXT: PrologEpilogInserterPass +; GCN-O3-NEXT: branch-folder +; GCN-O3-NEXT: tailduplication +; GCN-O3-NEXT: machine-latecleanup +; GCN-O3-NEXT: machine-cp +; GCN-O3-NEXT: post-ra-pseudos +; GCN-O3-NEXT: postmisched +; GCN-O3-NEXT: block-placement +; GCN-O3-NEXT: fentry-insert +; GCN-O3-NEXT: xray-instrumentation +; GCN-O3-NEXT: patchable-function +; GCN-O3-NEXT: gcn-create-vopd +; GCN-O3-NEXT: si-memory-legalizer +; GCN-O3-NEXT: si-insert-waitcnts +; GCN-O3-NEXT: si-late-branch-lowering +; GCN-O3-NEXT: si-pre-emit-peephole +; GCN-O3-NEXT: post-RA-hazard-rec +; GCN-O3-NEXT: AMDGPUWaitSGPRHazardsPass +; GCN-O3-NEXT: amdgpu-insert-delay-alu +; GCN-O3-NEXT: branch-relaxation +; GCN-O3-NEXT: remove-loads-into-fake-uses +; GCN-O3-NEXT: live-debug-values +; GCN-O3-NEXT: machine-sanmd +; GCN-O3-NEXT: stack-frame-layout) +; GCN-O3-NEXT: invalidate)) + + +define void @empty() { + ret void +} From 58dfd7a2fec3cac44a152a058061849d91910ae4 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 23 Apr 2025 08:55:23 +0000 Subject: [PATCH 2/7] Remove tr and support expensive check --- llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 267 +++++++++---------- 1 file changed, 133 insertions(+), 134 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 96a533a19c88a..7ba1771eba08d 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -1,142 +1,141 @@ -; UNSUPPORTED: expensive_checks ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -disable-verify -print-pipeline-passes < %s 2>&1 \ -; RUN: | tr ',' '\n' | FileCheck -check-prefix=GCN-O3 %s +; RUN: | FileCheck -check-prefix=GCN-O3 %s ; REQUIRES: asserts ; GCN-O3: require -; GCN-O3-NEXT: require -; GCN-O3-NEXT: require -; GCN-O3-NEXT: pre-isel-intrinsic-lowering -; GCN-O3-NEXT: function(expand-large-div-rem -; GCN-O3-NEXT: expand-fp) -; GCN-O3-NEXT: amdgpu-remove-incompatible-functions -; GCN-O3-NEXT: amdgpu-printf-runtime-binding -; GCN-O3-NEXT: amdgpu-lower-ctor-dtor -; GCN-O3-NEXT: function(amdgpu-image-intrinsic-opt) -; GCN-O3-NEXT: expand-variadics -; GCN-O3-NEXT: amdgpu-always-inline -; GCN-O3-NEXT: always-inline -; GCN-O3-NEXT: amdgpu-export-kernel-runtime-handles -; GCN-O3-NEXT: amdgpu-sw-lower-lds -; GCN-O3-NEXT: amdgpu-lower-module-lds -; GCN-O3-NEXT: function(infer-address-spaces -; GCN-O3-NEXT: amdgpu-atomic-optimizer -; GCN-O3-NEXT: atomic-expand -; GCN-O3-NEXT: amdgpu-promote-alloca -; GCN-O3-NEXT: separate-const-offset-from-gep<> -; GCN-O3-NEXT: slsr -; GCN-O3-NEXT: gvn<> -; GCN-O3-NEXT: nary-reassociate -; GCN-O3-NEXT: early-cse<> -; GCN-O3-NEXT: amdgpu-codegenprepare -; GCN-O3-NEXT: loop-mssa(loop-reduce) -; GCN-O3-NEXT: mergeicmps -; GCN-O3-NEXT: expand-memcmp -; GCN-O3-NEXT: gc-lowering -; GCN-O3-NEXT: lower-constant-intrinsics -; GCN-O3-NEXT: UnreachableBlockElimPass -; GCN-O3-NEXT: consthoist -; GCN-O3-NEXT: ReplaceWithVeclib -; GCN-O3-NEXT: partially-inline-libcalls -; GCN-O3-NEXT: ee-instrument -; GCN-O3-NEXT: scalarize-masked-mem-intrin -; GCN-O3-NEXT: ExpandReductionsPass -; GCN-O3-NEXT: gvn<> -; GCN-O3-NEXT: amdgpu-lower-kernel-arguments) -; GCN-O3-NEXT: amdgpu-lower-buffer-fat-pointers -; GCN-O3-NEXT: cgscc(function(codegenprepare -; GCN-O3-NEXT: load-store-vectorizer -; GCN-O3-NEXT: lower-switch -; GCN-O3-NEXT: lower-invoke -; GCN-O3-NEXT: UnreachableBlockElimPass -; GCN-O3-NEXT: flatten-cfg -; GCN-O3-NEXT: sink -; GCN-O3-NEXT: amdgpu-late-codegenprepare -; GCN-O3-NEXT: amdgpu-unify-divergent-exit-nodes -; GCN-O3-NEXT: fix-irreducible -; GCN-O3-NEXT: unify-loop-exits -; GCN-O3-NEXT: StructurizeCFGPass -; GCN-O3-NEXT: amdgpu-annotate-uniform -; GCN-O3-NEXT: si-annotate-control-flow -; GCN-O3-NEXT: amdgpu-rewrite-undef-for-phi -; GCN-O3-NEXT: lcssa)) -; GCN-O3-NEXT: amdgpu-perf-hint -; GCN-O3-NEXT: cgscc(function(require -; GCN-O3-NEXT: callbr-prepare -; GCN-O3-NEXT: safe-stack -; GCN-O3-NEXT: stack-protector)) -; GCN-O3-NEXT: cgscc(function(machine-function(amdgpu-isel -; GCN-O3-NEXT: si-fix-sgpr-copies -; GCN-O3-NEXT: si-i1-copies -; GCN-O3-NEXT: finalize-isel -; GCN-O3-NEXT: early-tailduplication -; GCN-O3-NEXT: opt-phis -; GCN-O3-NEXT: stack-coloring -; GCN-O3-NEXT: localstackalloc -; GCN-O3-NEXT: dead-mi-elimination -; GCN-O3-NEXT: early-machinelicm -; GCN-O3-NEXT: machine-cse -; GCN-O3-NEXT: machine-sink -; GCN-O3-NEXT: peephole-opt -; GCN-O3-NEXT: dead-mi-elimination -; GCN-O3-NEXT: si-fold-operands -; GCN-O3-NEXT: gcn-dpp-combine -; GCN-O3-NEXT: si-load-store-opt -; GCN-O3-NEXT: si-peephole-sdwa -; GCN-O3-NEXT: early-machinelicm -; GCN-O3-NEXT: machine-cse -; GCN-O3-NEXT: si-fold-operands -; GCN-O3-NEXT: dead-mi-elimination -; GCN-O3-NEXT: si-shrink-instructions -; GCN-O3-NEXT: detect-dead-lanes -; GCN-O3-NEXT: InitUndefPass -; GCN-O3-NEXT: ProcessImplicitDefsPass -; GCN-O3-NEXT: unreachable-mbb-elimination -; GCN-O3-NEXT: require -; GCN-O3-NEXT: require -; GCN-O3-NEXT: phi-node-elimination -; GCN-O3-NEXT: two-address-instruction -; GCN-O3-NEXT: register-coalescer -; GCN-O3-NEXT: rename-independent-subregs -; GCN-O3-NEXT: machine-scheduler -; GCN-O3-NEXT: greedy -; GCN-O3-NEXT: amdgpu-nsa-reassign -; GCN-O3-NEXT: VirtRegRewriterPass -; GCN-O3-NEXT: stack-slot-coloring -; GCN-O3-NEXT: machine-cp -; GCN-O3-NEXT: machinelicm -; GCN-O3-NEXT: si-fix-vgpr-copies -; GCN-O3-NEXT: si-optimize-exec-masking -; GCN-O3-NEXT: remove-redundant-debug-values -; GCN-O3-NEXT: fixup-statepoint-caller-saved -; GCN-O3-NEXT: PostRAMachineSinkingPass -; GCN-O3-NEXT: ShrinkWrapPass -; GCN-O3-NEXT: PrologEpilogInserterPass -; GCN-O3-NEXT: branch-folder -; GCN-O3-NEXT: tailduplication -; GCN-O3-NEXT: machine-latecleanup -; GCN-O3-NEXT: machine-cp -; GCN-O3-NEXT: post-ra-pseudos -; GCN-O3-NEXT: postmisched -; GCN-O3-NEXT: block-placement -; GCN-O3-NEXT: fentry-insert -; GCN-O3-NEXT: xray-instrumentation -; GCN-O3-NEXT: patchable-function -; GCN-O3-NEXT: gcn-create-vopd -; GCN-O3-NEXT: si-memory-legalizer -; GCN-O3-NEXT: si-insert-waitcnts -; GCN-O3-NEXT: si-late-branch-lowering -; GCN-O3-NEXT: si-pre-emit-peephole -; GCN-O3-NEXT: post-RA-hazard-rec -; GCN-O3-NEXT: AMDGPUWaitSGPRHazardsPass -; GCN-O3-NEXT: amdgpu-insert-delay-alu -; GCN-O3-NEXT: branch-relaxation -; GCN-O3-NEXT: remove-loads-into-fake-uses -; GCN-O3-NEXT: live-debug-values -; GCN-O3-NEXT: machine-sanmd -; GCN-O3-NEXT: stack-frame-layout) -; GCN-O3-NEXT: invalidate)) +; GCN-O3: require +; GCN-O3: require +; GCN-O3: pre-isel-intrinsic-lowering +; GCN-O3: function(expand-large-div-rem +; GCN-O3: expand-fp) +; GCN-O3: amdgpu-remove-incompatible-functions +; GCN-O3: amdgpu-printf-runtime-binding +; GCN-O3: amdgpu-lower-ctor-dtor +; GCN-O3: function(amdgpu-image-intrinsic-opt) +; GCN-O3: expand-variadics +; GCN-O3: amdgpu-always-inline +; GCN-O3: always-inline +; GCN-O3: amdgpu-export-kernel-runtime-handles +; GCN-O3: amdgpu-sw-lower-lds +; GCN-O3: amdgpu-lower-module-lds +; GCN-O3: function(infer-address-spaces +; GCN-O3: amdgpu-atomic-optimizer +; GCN-O3: atomic-expand +; GCN-O3: amdgpu-promote-alloca +; GCN-O3: separate-const-offset-from-gep<> +; GCN-O3: slsr +; GCN-O3: gvn<> +; GCN-O3: nary-reassociate +; GCN-O3: early-cse<> +; GCN-O3: amdgpu-codegenprepare +; GCN-O3: loop-mssa(loop-reduce) +; GCN-O3: mergeicmps +; GCN-O3: expand-memcmp +; GCN-O3: gc-lowering +; GCN-O3: lower-constant-intrinsics +; GCN-O3: UnreachableBlockElimPass +; GCN-O3: consthoist +; GCN-O3: ReplaceWithVeclib +; GCN-O3: partially-inline-libcalls +; GCN-O3: ee-instrument +; GCN-O3: scalarize-masked-mem-intrin +; GCN-O3: ExpandReductionsPass +; GCN-O3: gvn<> +; GCN-O3: amdgpu-lower-kernel-arguments) +; GCN-O3: amdgpu-lower-buffer-fat-pointers +; GCN-O3: cgscc(function(codegenprepare +; GCN-O3: load-store-vectorizer +; GCN-O3: lower-switch +; GCN-O3: lower-invoke +; GCN-O3: UnreachableBlockElimPass +; GCN-O3: flatten-cfg +; GCN-O3: sink +; GCN-O3: amdgpu-late-codegenprepare +; GCN-O3: amdgpu-unify-divergent-exit-nodes +; GCN-O3: fix-irreducible +; GCN-O3: unify-loop-exits +; GCN-O3: StructurizeCFGPass +; GCN-O3: amdgpu-annotate-uniform +; GCN-O3: si-annotate-control-flow +; GCN-O3: amdgpu-rewrite-undef-for-phi +; GCN-O3: lcssa)) +; GCN-O3: amdgpu-perf-hint +; GCN-O3: cgscc(function(require +; GCN-O3: callbr-prepare +; GCN-O3: safe-stack +; GCN-O3: stack-protector)) +; GCN-O3: cgscc(function(machine-function(amdgpu-isel +; GCN-O3: si-fix-sgpr-copies +; GCN-O3: si-i1-copies +; GCN-O3: finalize-isel +; GCN-O3: early-tailduplication +; GCN-O3: opt-phis +; GCN-O3: stack-coloring +; GCN-O3: localstackalloc +; GCN-O3: dead-mi-elimination +; GCN-O3: early-machinelicm +; GCN-O3: machine-cse +; GCN-O3: machine-sink +; GCN-O3: peephole-opt +; GCN-O3: dead-mi-elimination +; GCN-O3: si-fold-operands +; GCN-O3: gcn-dpp-combine +; GCN-O3: si-load-store-opt +; GCN-O3: si-peephole-sdwa +; GCN-O3: early-machinelicm +; GCN-O3: machine-cse +; GCN-O3: si-fold-operands +; GCN-O3: dead-mi-elimination +; GCN-O3: si-shrink-instructions +; GCN-O3: detect-dead-lanes +; GCN-O3: InitUndefPass +; GCN-O3: ProcessImplicitDefsPass +; GCN-O3: unreachable-mbb-elimination +; GCN-O3: require +; GCN-O3: require +; GCN-O3: phi-node-elimination +; GCN-O3: two-address-instruction +; GCN-O3: register-coalescer +; GCN-O3: rename-independent-subregs +; GCN-O3: machine-scheduler +; GCN-O3: greedy +; GCN-O3: amdgpu-nsa-reassign +; GCN-O3: VirtRegRewriterPass +; GCN-O3: stack-slot-coloring +; GCN-O3: machine-cp +; GCN-O3: machinelicm +; GCN-O3: si-fix-vgpr-copies +; GCN-O3: si-optimize-exec-masking +; GCN-O3: remove-redundant-debug-values +; GCN-O3: fixup-statepoint-caller-saved +; GCN-O3: PostRAMachineSinkingPass +; GCN-O3: ShrinkWrapPass +; GCN-O3: PrologEpilogInserterPass +; GCN-O3: branch-folder +; GCN-O3: tailduplication +; GCN-O3: machine-latecleanup +; GCN-O3: machine-cp +; GCN-O3: post-ra-pseudos +; GCN-O3: postmisched +; GCN-O3: block-placement +; GCN-O3: fentry-insert +; GCN-O3: xray-instrumentation +; GCN-O3: patchable-function +; GCN-O3: gcn-create-vopd +; GCN-O3: si-memory-legalizer +; GCN-O3: si-insert-waitcnts +; GCN-O3: si-late-branch-lowering +; GCN-O3: si-pre-emit-peephole +; GCN-O3: post-RA-hazard-rec +; GCN-O3: AMDGPUWaitSGPRHazardsPass +; GCN-O3: amdgpu-insert-delay-alu +; GCN-O3: branch-relaxation +; GCN-O3: remove-loads-into-fake-uses +; GCN-O3: live-debug-values +; GCN-O3: machine-sanmd +; GCN-O3: stack-frame-layout) +; GCN-O3: invalidate)) define void @empty() { From 97ead0671449a7163eccefb6e1e95d0b36904a51 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Thu, 24 Apr 2025 04:51:11 +0000 Subject: [PATCH 3/7] separate tests for O2 and O3 --- llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 139 ++++++++++++++++++- 1 file changed, 137 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 7ba1771eba08d..fbfb850cd7776 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -1,7 +1,142 @@ -; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -disable-verify -print-pipeline-passes < %s 2>&1 \ +; RUN: llc -enable-new-pm -disable-verify -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ +; RUN: | FileCheck -check-prefix=GCN-O2 %s + +; RUN: llc -O3 -enable-new-pm -disable-verify -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; REQUIRES: asserts +; GCN-O2: require +; GCN-O2: require +; GCN-O2: require +; GCN-O2: pre-isel-intrinsic-lowering +; GCN-O2: function(expand-large-div-rem +; GCN-O2: expand-fp) +; GCN-O2: amdgpu-remove-incompatible-functions +; GCN-O2: amdgpu-printf-runtime-binding +; GCN-O2: amdgpu-lower-ctor-dtor +; GCN-O2: function(amdgpu-image-intrinsic-opt) +; GCN-O2: expand-variadics +; GCN-O2: amdgpu-always-inline +; GCN-O2: always-inline +; GCN-O2: amdgpu-export-kernel-runtime-handles +; GCN-O2: amdgpu-sw-lower-lds +; GCN-O2: amdgpu-lower-module-lds +; GCN-O2: function(infer-address-spaces +; GCN-O2: amdgpu-atomic-optimizer +; GCN-O2: atomic-expand +; GCN-O2: amdgpu-promote-alloca +; GCN-O2: separate-const-offset-from-gep<> +; GCN-O2: slsr +; GCN-O2: early-cse<> +; GCN-O2: nary-reassociate +; GCN-O2: early-cse<> +; GCN-O2: amdgpu-codegenprepare +; GCN-O2: loop-mssa(loop-reduce) +; GCN-O2: mergeicmps +; GCN-O2: expand-memcmp +; GCN-O2: gc-lowering +; GCN-O2: lower-constant-intrinsics +; GCN-O2: UnreachableBlockElimPass +; GCN-O2: consthoist +; GCN-O2: ReplaceWithVeclib +; GCN-O2: partially-inline-libcalls +; GCN-O2: ee-instrument +; GCN-O2: scalarize-masked-mem-intrin +; GCN-O2: ExpandReductionsPass +; GCN-O2: early-cse<> +; GCN-O2: amdgpu-lower-kernel-arguments) +; GCN-O2: amdgpu-lower-buffer-fat-pointers +; GCN-O2: cgscc(function(codegenprepare +; GCN-O2: load-store-vectorizer +; GCN-O2: lower-switch +; GCN-O2: lower-invoke +; GCN-O2: UnreachableBlockElimPass +; GCN-O2: flatten-cfg +; GCN-O2: sink +; GCN-O2: amdgpu-late-codegenprepare +; GCN-O2: amdgpu-unify-divergent-exit-nodes +; GCN-O2: fix-irreducible +; GCN-O2: unify-loop-exits +; GCN-O2: StructurizeCFGPass +; GCN-O2: amdgpu-annotate-uniform +; GCN-O2: si-annotate-control-flow +; GCN-O2: amdgpu-rewrite-undef-for-phi +; GCN-O2: lcssa)) +; GCN-O2: amdgpu-perf-hint +; GCN-O2: cgscc(function(require +; GCN-O2: callbr-prepare +; GCN-O2: safe-stack +; GCN-O2: stack-protector)) +; GCN-O2: cgscc(function(machine-function(amdgpu-isel +; GCN-O2: si-fix-sgpr-copies +; GCN-O2: si-i1-copies +; GCN-O2: finalize-isel +; GCN-O2: early-tailduplication +; GCN-O2: opt-phis +; GCN-O2: stack-coloring +; GCN-O2: localstackalloc +; GCN-O2: dead-mi-elimination +; GCN-O2: early-machinelicm +; GCN-O2: machine-cse +; GCN-O2: machine-sink +; GCN-O2: peephole-opt +; GCN-O2: dead-mi-elimination +; GCN-O2: si-fold-operands +; GCN-O2: gcn-dpp-combine +; GCN-O2: si-load-store-opt +; GCN-O2: si-peephole-sdwa +; GCN-O2: early-machinelicm +; GCN-O2: machine-cse +; GCN-O2: si-fold-operands +; GCN-O2: dead-mi-elimination +; GCN-O2: si-shrink-instructions +; GCN-O2: detect-dead-lanes +; GCN-O2: InitUndefPass +; GCN-O2: ProcessImplicitDefsPass +; GCN-O2: unreachable-mbb-elimination +; GCN-O2: require +; GCN-O2: require +; GCN-O2: phi-node-elimination +; GCN-O2: two-address-instruction +; GCN-O2: register-coalescer +; GCN-O2: rename-independent-subregs +; GCN-O2: machine-scheduler +; GCN-O2: greedy +; GCN-O2: amdgpu-nsa-reassign +; GCN-O2: VirtRegRewriterPass +; GCN-O2: stack-slot-coloring +; GCN-O2: machine-cp +; GCN-O2: machinelicm +; GCN-O2: si-fix-vgpr-copies +; GCN-O2: si-optimize-exec-masking +; GCN-O2: remove-redundant-debug-values +; GCN-O2: fixup-statepoint-caller-saved +; GCN-O2: PostRAMachineSinkingPass +; GCN-O2: ShrinkWrapPass +; GCN-O2: PrologEpilogInserterPass +; GCN-O2: branch-folder +; GCN-O2: tailduplication +; GCN-O2: machine-latecleanup +; GCN-O2: machine-cp +; GCN-O2: post-ra-pseudos +; GCN-O2: postmisched +; GCN-O2: block-placement +; GCN-O2: fentry-insert +; GCN-O2: xray-instrumentation +; GCN-O2: patchable-function +; GCN-O2: gcn-create-vopd +; GCN-O2: si-memory-legalizer +; GCN-O2: si-insert-waitcnts +; GCN-O2: si-late-branch-lowering +; GCN-O2: si-pre-emit-peephole +; GCN-O2: post-RA-hazard-rec +; GCN-O2: AMDGPUWaitSGPRHazardsPass +; GCN-O2: amdgpu-insert-delay-alu +; GCN-O2: branch-relaxation +; GCN-O2: remove-loads-into-fake-uses +; GCN-O2: live-debug-values +; GCN-O2: machine-sanmd +; GCN-O2: stack-frame-layout) +; GCN-O2: invalidate)) ; GCN-O3: require ; GCN-O3: require From 7d8439580874cf99cbcf4a762b8a2d7b4e308c25 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 5 May 2025 06:49:51 +0000 Subject: [PATCH 4/7] condense test into one line pipeline is printed on a single line, so having CHECK lines on separate lines can allow extra characters in between (and will not error out on extra passes being in the pipeline) --- llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 279 +------------------ 1 file changed, 11 insertions(+), 268 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index fbfb850cd7776..e9b57515e71e0 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -1,276 +1,19 @@ -; RUN: llc -enable-new-pm -disable-verify -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ +; RUN: llc -enable-new-pm -mtriple=amdgcn--amdhsa -O0 -print-pipeline-passes < %s 2>&1 \ +; RUN: | FileCheck -check-prefix=GCN-O0 %s + +; RUN: llc -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O2 %s -; RUN: llc -O3 -enable-new-pm -disable-verify -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ +; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O2: require -; GCN-O2: require -; GCN-O2: require -; GCN-O2: pre-isel-intrinsic-lowering -; GCN-O2: function(expand-large-div-rem -; GCN-O2: expand-fp) -; GCN-O2: amdgpu-remove-incompatible-functions -; GCN-O2: amdgpu-printf-runtime-binding -; GCN-O2: amdgpu-lower-ctor-dtor -; GCN-O2: function(amdgpu-image-intrinsic-opt) -; GCN-O2: expand-variadics -; GCN-O2: amdgpu-always-inline -; GCN-O2: always-inline -; GCN-O2: amdgpu-export-kernel-runtime-handles -; GCN-O2: amdgpu-sw-lower-lds -; GCN-O2: amdgpu-lower-module-lds -; GCN-O2: function(infer-address-spaces -; GCN-O2: amdgpu-atomic-optimizer -; GCN-O2: atomic-expand -; GCN-O2: amdgpu-promote-alloca -; GCN-O2: separate-const-offset-from-gep<> -; GCN-O2: slsr -; GCN-O2: early-cse<> -; GCN-O2: nary-reassociate -; GCN-O2: early-cse<> -; GCN-O2: amdgpu-codegenprepare -; GCN-O2: loop-mssa(loop-reduce) -; GCN-O2: mergeicmps -; GCN-O2: expand-memcmp -; GCN-O2: gc-lowering -; GCN-O2: lower-constant-intrinsics -; GCN-O2: UnreachableBlockElimPass -; GCN-O2: consthoist -; GCN-O2: ReplaceWithVeclib -; GCN-O2: partially-inline-libcalls -; GCN-O2: ee-instrument -; GCN-O2: scalarize-masked-mem-intrin -; GCN-O2: ExpandReductionsPass -; GCN-O2: early-cse<> -; GCN-O2: amdgpu-lower-kernel-arguments) -; GCN-O2: amdgpu-lower-buffer-fat-pointers -; GCN-O2: cgscc(function(codegenprepare -; GCN-O2: load-store-vectorizer -; GCN-O2: lower-switch -; GCN-O2: lower-invoke -; GCN-O2: UnreachableBlockElimPass -; GCN-O2: flatten-cfg -; GCN-O2: sink -; GCN-O2: amdgpu-late-codegenprepare -; GCN-O2: amdgpu-unify-divergent-exit-nodes -; GCN-O2: fix-irreducible -; GCN-O2: unify-loop-exits -; GCN-O2: StructurizeCFGPass -; GCN-O2: amdgpu-annotate-uniform -; GCN-O2: si-annotate-control-flow -; GCN-O2: amdgpu-rewrite-undef-for-phi -; GCN-O2: lcssa)) -; GCN-O2: amdgpu-perf-hint -; GCN-O2: cgscc(function(require -; GCN-O2: callbr-prepare -; GCN-O2: safe-stack -; GCN-O2: stack-protector)) -; GCN-O2: cgscc(function(machine-function(amdgpu-isel -; GCN-O2: si-fix-sgpr-copies -; GCN-O2: si-i1-copies -; GCN-O2: finalize-isel -; GCN-O2: early-tailduplication -; GCN-O2: opt-phis -; GCN-O2: stack-coloring -; GCN-O2: localstackalloc -; GCN-O2: dead-mi-elimination -; GCN-O2: early-machinelicm -; GCN-O2: machine-cse -; GCN-O2: machine-sink -; GCN-O2: peephole-opt -; GCN-O2: dead-mi-elimination -; GCN-O2: si-fold-operands -; GCN-O2: gcn-dpp-combine -; GCN-O2: si-load-store-opt -; GCN-O2: si-peephole-sdwa -; GCN-O2: early-machinelicm -; GCN-O2: machine-cse -; GCN-O2: si-fold-operands -; GCN-O2: dead-mi-elimination -; GCN-O2: si-shrink-instructions -; GCN-O2: detect-dead-lanes -; GCN-O2: InitUndefPass -; GCN-O2: ProcessImplicitDefsPass -; GCN-O2: unreachable-mbb-elimination -; GCN-O2: require -; GCN-O2: require -; GCN-O2: phi-node-elimination -; GCN-O2: two-address-instruction -; GCN-O2: register-coalescer -; GCN-O2: rename-independent-subregs -; GCN-O2: machine-scheduler -; GCN-O2: greedy -; GCN-O2: amdgpu-nsa-reassign -; GCN-O2: VirtRegRewriterPass -; GCN-O2: stack-slot-coloring -; GCN-O2: machine-cp -; GCN-O2: machinelicm -; GCN-O2: si-fix-vgpr-copies -; GCN-O2: si-optimize-exec-masking -; GCN-O2: remove-redundant-debug-values -; GCN-O2: fixup-statepoint-caller-saved -; GCN-O2: PostRAMachineSinkingPass -; GCN-O2: ShrinkWrapPass -; GCN-O2: PrologEpilogInserterPass -; GCN-O2: branch-folder -; GCN-O2: tailduplication -; GCN-O2: machine-latecleanup -; GCN-O2: machine-cp -; GCN-O2: post-ra-pseudos -; GCN-O2: postmisched -; GCN-O2: block-placement -; GCN-O2: fentry-insert -; GCN-O2: xray-instrumentation -; GCN-O2: patchable-function -; GCN-O2: gcn-create-vopd -; GCN-O2: si-memory-legalizer -; GCN-O2: si-insert-waitcnts -; GCN-O2: si-late-branch-lowering -; GCN-O2: si-pre-emit-peephole -; GCN-O2: post-RA-hazard-rec -; GCN-O2: AMDGPUWaitSGPRHazardsPass -; GCN-O2: amdgpu-insert-delay-alu -; GCN-O2: branch-relaxation -; GCN-O2: remove-loads-into-fake-uses -; GCN-O2: live-debug-values -; GCN-O2: machine-sanmd -; GCN-O2: stack-frame-layout) -; GCN-O2: invalidate)) -; GCN-O3: require -; GCN-O3: require -; GCN-O3: require -; GCN-O3: pre-isel-intrinsic-lowering -; GCN-O3: function(expand-large-div-rem -; GCN-O3: expand-fp) -; GCN-O3: amdgpu-remove-incompatible-functions -; GCN-O3: amdgpu-printf-runtime-binding -; GCN-O3: amdgpu-lower-ctor-dtor -; GCN-O3: function(amdgpu-image-intrinsic-opt) -; GCN-O3: expand-variadics -; GCN-O3: amdgpu-always-inline -; GCN-O3: always-inline -; GCN-O3: amdgpu-export-kernel-runtime-handles -; GCN-O3: amdgpu-sw-lower-lds -; GCN-O3: amdgpu-lower-module-lds -; GCN-O3: function(infer-address-spaces -; GCN-O3: amdgpu-atomic-optimizer -; GCN-O3: atomic-expand -; GCN-O3: amdgpu-promote-alloca -; GCN-O3: separate-const-offset-from-gep<> -; GCN-O3: slsr -; GCN-O3: gvn<> -; GCN-O3: nary-reassociate -; GCN-O3: early-cse<> -; GCN-O3: amdgpu-codegenprepare -; GCN-O3: loop-mssa(loop-reduce) -; GCN-O3: mergeicmps -; GCN-O3: expand-memcmp -; GCN-O3: gc-lowering -; GCN-O3: lower-constant-intrinsics -; GCN-O3: UnreachableBlockElimPass -; GCN-O3: consthoist -; GCN-O3: ReplaceWithVeclib -; GCN-O3: partially-inline-libcalls -; GCN-O3: ee-instrument -; GCN-O3: scalarize-masked-mem-intrin -; GCN-O3: ExpandReductionsPass -; GCN-O3: gvn<> -; GCN-O3: amdgpu-lower-kernel-arguments) -; GCN-O3: amdgpu-lower-buffer-fat-pointers -; GCN-O3: cgscc(function(codegenprepare -; GCN-O3: load-store-vectorizer -; GCN-O3: lower-switch -; GCN-O3: lower-invoke -; GCN-O3: UnreachableBlockElimPass -; GCN-O3: flatten-cfg -; GCN-O3: sink -; GCN-O3: amdgpu-late-codegenprepare -; GCN-O3: amdgpu-unify-divergent-exit-nodes -; GCN-O3: fix-irreducible -; GCN-O3: unify-loop-exits -; GCN-O3: StructurizeCFGPass -; GCN-O3: amdgpu-annotate-uniform -; GCN-O3: si-annotate-control-flow -; GCN-O3: amdgpu-rewrite-undef-for-phi -; GCN-O3: lcssa)) -; GCN-O3: amdgpu-perf-hint -; GCN-O3: cgscc(function(require -; GCN-O3: callbr-prepare -; GCN-O3: safe-stack -; GCN-O3: stack-protector)) -; GCN-O3: cgscc(function(machine-function(amdgpu-isel -; GCN-O3: si-fix-sgpr-copies -; GCN-O3: si-i1-copies -; GCN-O3: finalize-isel -; GCN-O3: early-tailduplication -; GCN-O3: opt-phis -; GCN-O3: stack-coloring -; GCN-O3: localstackalloc -; GCN-O3: dead-mi-elimination -; GCN-O3: early-machinelicm -; GCN-O3: machine-cse -; GCN-O3: machine-sink -; GCN-O3: peephole-opt -; GCN-O3: dead-mi-elimination -; GCN-O3: si-fold-operands -; GCN-O3: gcn-dpp-combine -; GCN-O3: si-load-store-opt -; GCN-O3: si-peephole-sdwa -; GCN-O3: early-machinelicm -; GCN-O3: machine-cse -; GCN-O3: si-fold-operands -; GCN-O3: dead-mi-elimination -; GCN-O3: si-shrink-instructions -; GCN-O3: detect-dead-lanes -; GCN-O3: InitUndefPass -; GCN-O3: ProcessImplicitDefsPass -; GCN-O3: unreachable-mbb-elimination -; GCN-O3: require -; GCN-O3: require -; GCN-O3: phi-node-elimination -; GCN-O3: two-address-instruction -; GCN-O3: register-coalescer -; GCN-O3: rename-independent-subregs -; GCN-O3: machine-scheduler -; GCN-O3: greedy -; GCN-O3: amdgpu-nsa-reassign -; GCN-O3: VirtRegRewriterPass -; GCN-O3: stack-slot-coloring -; GCN-O3: machine-cp -; GCN-O3: machinelicm -; GCN-O3: si-fix-vgpr-copies -; GCN-O3: si-optimize-exec-masking -; GCN-O3: remove-redundant-debug-values -; GCN-O3: fixup-statepoint-caller-saved -; GCN-O3: PostRAMachineSinkingPass -; GCN-O3: ShrinkWrapPass -; GCN-O3: PrologEpilogInserterPass -; GCN-O3: branch-folder -; GCN-O3: tailduplication -; GCN-O3: machine-latecleanup -; GCN-O3: machine-cp -; GCN-O3: post-ra-pseudos -; GCN-O3: postmisched -; GCN-O3: block-placement -; GCN-O3: fentry-insert -; GCN-O3: xray-instrumentation -; GCN-O3: patchable-function -; GCN-O3: gcn-create-vopd -; GCN-O3: si-memory-legalizer -; GCN-O3: si-insert-waitcnts -; GCN-O3: si-late-branch-lowering -; GCN-O3: si-pre-emit-peephole -; GCN-O3: post-RA-hazard-rec -; GCN-O3: AMDGPUWaitSGPRHazardsPass -; GCN-O3: amdgpu-insert-delay-alu -; GCN-O3: branch-relaxation -; GCN-O3: remove-loads-into-fake-uses -; GCN-O3: live-debug-values -; GCN-O3: machine-sanmd -; GCN-O3: stack-frame-layout) -; GCN-O3: invalidate)) +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) + + +; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,require,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) + +; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,require,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) define void @empty() { From cb75a3d033b2b138c2d1dfe9a31af4d6fa09d736 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 6 May 2025 09:21:32 +0000 Subject: [PATCH 5/7] early return --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 67f4a36511c5b..fe3a41b814a89 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -329,18 +329,18 @@ template class CodeGenPassBuilder { private: void flushMFPMToMPM() { - if (!MFPM.isEmpty()) { - if (PB.AddInCGSCCOrder) { - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( - createCGSCCToFunctionPassAdaptor( - createFunctionToMachineFunctionPassAdaptor( - std::move(MFPM))))); - } else { - MPM.addPass(createModuleToFunctionPassAdaptor( - createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)))); - } - MFPM = MachineFunctionPassManager(); + if (MFPM.isEmpty()) + return; + + if (PB.AddInCGSCCOrder) { + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createCGSCCToFunctionPassAdaptor( + createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))))); + } else { + MPM.addPass(createModuleToFunctionPassAdaptor( + createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)))); } + MFPM = MachineFunctionPassManager(); } ModulePassManager &MPM; From 7cda52fbb1eb7d028cfd3b4be1b2018bf1ddfb55 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 12 May 2025 08:08:45 +0000 Subject: [PATCH 6/7] more early returns --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index fe3a41b814a89..a3b19af4adc39 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -253,15 +253,15 @@ template class CodeGenPassBuilder { private: void flushFPMToMPM() { - if (!FPM.isEmpty()) { - if (PB.AddInCGSCCOrder) { - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( - createCGSCCToFunctionPassAdaptor(std::move(FPM)))); - } else { - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - FPM = FunctionPassManager(); + if (FPM.isEmpty()) + return; + if (PB.AddInCGSCCOrder) { + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createCGSCCToFunctionPassAdaptor(std::move(FPM)))); + } else { + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } + FPM = FunctionPassManager(); } ModulePassManager &MPM; FunctionPassManager FPM; @@ -274,17 +274,17 @@ template class CodeGenPassBuilder { AddMachinePass(ModulePassManager &MPM, const DerivedT &PB) : MPM(MPM), PB(PB) {} ~AddMachinePass() { - if (!MFPM.isEmpty()) { - FunctionPassManager FPM; - FPM.addPass( - createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))); - FPM.addPass(InvalidateAnalysisPass()); - if (this->PB.AddInCGSCCOrder) { - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( - createCGSCCToFunctionPassAdaptor(std::move(FPM)))); - } else - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } + if (MFPM.isEmpty()) + return; + + FunctionPassManager FPM; + FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))); + FPM.addPass(InvalidateAnalysisPass()); + if (this->PB.AddInCGSCCOrder) { + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createCGSCCToFunctionPassAdaptor(std::move(FPM)))); + } else + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } template From 032da65bcedeab3e69ea46699cbc518cdefe5ac3 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 7 Jul 2025 09:47:06 +0000 Subject: [PATCH 7/7] update test --- llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index e9b57515e71e0..5155ec212c12f 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,14 +7,9 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s - ; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) - - -; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,require,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) - -; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,require,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) - +; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,require,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,require,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) define void @empty() { ret void