Skip to content

Commit d23c5c2

Browse files
authored
[CGData] Global Merge Functions (#112671)
This implements a global function merging pass. Unlike traditional function merging passes that use IR comparators, this pass employs a structurally stable hash to identify similar functions while ignoring certain constant operands. These ignored constants are tracked and encoded into a stable function summary. When merging, instead of explicitly folding similar functions and their call sites, we form a merging instance by supplying different parameters via thunks. The actual size reduction occurs when identically created merging instances are folded by the linker. Currently, this pass is wired to a pre-codegen pass, enabled by the `-enable-global-merge-func` flag. In a local merging mode, the analysis and merging steps occur sequentially within a module: - `analyze`: Collects stable function hashes and tracks locations of ignored constant operands. - `finalize`: Identifies merge candidates with matching hashes and computes the set of parameters that point to different constants. - `merge`: Uses the stable function map to optimistically create a merged function. We can enable a global merging mode similar to the global function outliner (https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-2-thinlto-nolto/78753/), which will perform the above steps separately. - `-codegen-data-generate`: During the first round of code generation, we analyze local merging instances and publish their summaries. - Offline using `llvm-cgdata` or at link-time, we can finalize all these merging summaries that are combined to determine parameters. - `-codegen-data-use`: During the second round of code generation, we optimistically create merging instances within each module, and finally, the linker folds identically created merging instances. Depends on #112664 This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608.
1 parent 6e614e1 commit d23c5c2

29 files changed

+1197
-11
lines changed

llvm/include/llvm/CGData/CodeGenData.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,9 @@ class CodeGenData {
145145
const OutlinedHashTree *getOutlinedHashTree() {
146146
return PublishedHashTree.get();
147147
}
148+
const StableFunctionMap *getStableFunctionMap() {
149+
return PublishedStableFunctionMap.get();
150+
}
148151

149152
/// Returns true if we should write codegen data.
150153
bool emitCGData() { return EmitCGData; }
@@ -169,10 +172,18 @@ inline bool hasOutlinedHashTree() {
169172
return CodeGenData::getInstance().hasOutlinedHashTree();
170173
}
171174

175+
inline bool hasStableFunctionMap() {
176+
return CodeGenData::getInstance().hasStableFunctionMap();
177+
}
178+
172179
inline const OutlinedHashTree *getOutlinedHashTree() {
173180
return CodeGenData::getInstance().getOutlinedHashTree();
174181
}
175182

183+
inline const StableFunctionMap *getStableFunctionMap() {
184+
return CodeGenData::getInstance().getStableFunctionMap();
185+
}
186+
176187
inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
177188

178189
inline void

llvm/include/llvm/CGData/StableFunctionMap.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ struct StableFunctionMap {
110110
size_t size(SizeType Type = UniqueHashCount) const;
111111

112112
/// Finalize the stable function map by trimming content.
113-
void finalize();
113+
void finalize(bool SkipTrim = false);
114114

115115
private:
116116
/// Insert a `StableFunctionEntry` into the function map directly. This

llvm/include/llvm/CGData/StableFunctionMapRecord.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct StableFunctionMapRecord {
4949
void deserializeYAML(yaml::Input &YIS);
5050

5151
/// Finalize the stable function map by trimming content.
52-
void finalize() { FunctionMap->finalize(); }
52+
void finalize(bool SkipTrim = false) { FunctionMap->finalize(SkipTrim); }
5353

5454
/// Merge the stable function map into this one.
5555
void merge(const StableFunctionMapRecord &Other) {
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//===------ GlobalMergeFunctions.h - Global merge functions -----*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass defines the implementation of a function merging mechanism
10+
// that utilizes a stable function hash to track differences in constants and
11+
// identify potential merge candidates. The process involves two rounds:
12+
// 1. The first round collects stable function hashes and identifies merge
13+
// candidates with matching hashes. It also computes the set of parameters
14+
// that point to different constants during the stable function merge.
15+
// 2. The second round leverages this collected global function information to
16+
// optimistically create a merged function in each module context, ensuring
17+
// correct transformation.
18+
// Similar to the global outliner, this approach uses the linker's deduplication
19+
// (ICF) to fold identical merged functions, thereby reducing the final binary
20+
// size. The work is inspired by the concepts discussed in the following paper:
21+
// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
22+
//
23+
//===----------------------------------------------------------------------===//
24+
25+
#ifndef LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H
26+
#define LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H
27+
28+
#include "llvm/CGData/StableFunctionMap.h"
29+
#include "llvm/IR/Module.h"
30+
#include "llvm/IR/PassManager.h"
31+
#include "llvm/Pass.h"
32+
33+
enum class HashFunctionMode {
34+
Local,
35+
BuildingHashFuncion,
36+
UsingHashFunction,
37+
};
38+
39+
namespace llvm {
40+
41+
// A vector of locations (the pair of (instruction, operand) indices) reachable
42+
// from a parameter.
43+
using ParamLocs = SmallVector<IndexPair, 4>;
44+
// A vector of parameters
45+
using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
46+
47+
/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism
48+
/// using stable function hashes. It identifies and merges functions with
49+
/// matching hashes across modules to optimize binary size.
50+
class GlobalMergeFunc {
51+
HashFunctionMode MergerMode = HashFunctionMode::Local;
52+
53+
std::unique_ptr<StableFunctionMap> LocalFunctionMap;
54+
55+
const ModuleSummaryIndex *Index;
56+
57+
public:
58+
/// The suffix used to identify the merged function that parameterizes
59+
/// the constant values. Note that the original function, without this suffix,
60+
/// becomes a thunk supplying contexts to the merged function via parameters.
61+
static constexpr const char MergingInstanceSuffix[] = ".Tgm";
62+
63+
GlobalMergeFunc(const ModuleSummaryIndex *Index) : Index(Index) {};
64+
65+
void initializeMergerMode(const Module &M);
66+
67+
bool run(Module &M);
68+
69+
/// Analyze module to create stable function into LocalFunctionMap.
70+
void analyze(Module &M);
71+
72+
/// Emit LocalFunctionMap into __llvm_merge section.
73+
void emitFunctionMap(Module &M);
74+
75+
/// Merge functions in the module using the given function map.
76+
bool merge(Module &M, const StableFunctionMap *FunctionMap);
77+
};
78+
79+
/// Global function merging pass for new pass manager.
80+
struct GlobalMergeFuncPass : public PassInfoMixin<GlobalMergeFuncPass> {
81+
PreservedAnalyses run(Module &M, AnalysisManager<Module> &);
82+
};
83+
84+
} // end namespace llvm
85+
#endif // LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H

llvm/include/llvm/CodeGen/Passes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,9 @@ namespace llvm {
507507
/// This pass frees the memory occupied by the MachineFunction.
508508
FunctionPass *createFreeMachineFunctionPass();
509509

510+
/// This pass performs merging similar functions globally.
511+
ModulePass *createGlobalMergeFuncPass();
512+
510513
/// This pass performs outlining on machine instructions directly before
511514
/// printing assembly.
512515
ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true);

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ void initializeGCEmptyBasicBlocksPass(PassRegistry &);
123123
void initializeGCMachineCodeAnalysisPass(PassRegistry &);
124124
void initializeGCModuleInfoPass(PassRegistry &);
125125
void initializeGVNLegacyPassPass(PassRegistry &);
126+
void initializeGlobalMergeFuncPassWrapperPass(PassRegistry &);
126127
void initializeGlobalMergePass(PassRegistry &);
127128
void initializeGlobalsAAWrapperPassPass(PassRegistry &);
128129
void initializeHardwareLoopsLegacyPass(PassRegistry &);

llvm/include/llvm/LinkAllPasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ struct ForcePassLinking {
7979
(void)llvm::createDomOnlyViewerWrapperPassPass();
8080
(void)llvm::createDomViewerWrapperPassPass();
8181
(void)llvm::createAlwaysInlinerLegacyPass();
82+
(void)llvm::createGlobalMergeFuncPass();
8283
(void)llvm::createGlobalsAAWrapperPass();
8384
(void)llvm::createInstSimplifyLegacyPass();
8485
(void)llvm::createInstructionCombiningPass();

llvm/include/llvm/Passes/CodeGenPassBuilder.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "llvm/CodeGen/FinalizeISel.h"
3636
#include "llvm/CodeGen/GCMetadata.h"
3737
#include "llvm/CodeGen/GlobalMerge.h"
38+
#include "llvm/CodeGen/GlobalMergeFunctions.h"
3839
#include "llvm/CodeGen/IndirectBrExpand.h"
3940
#include "llvm/CodeGen/InterleavedAccess.h"
4041
#include "llvm/CodeGen/InterleavedLoadCombine.h"
@@ -713,6 +714,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addIRPasses(
713714
// Convert conditional moves to conditional jumps when profitable.
714715
if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableSelectOptimize)
715716
addPass(SelectOptimizePass(&TM));
717+
718+
if (Opt.EnableGlobalMergeFunc)
719+
addPass(GlobalMergeFuncPass());
716720
}
717721

718722
/// Turn exception handling constructs into something the code generators can

llvm/include/llvm/Passes/MachinePassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass())
2929
MODULE_PASS("lower-emutls", LowerEmuTLSPass())
3030
MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass())
3131
MODULE_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass())
32+
MODULE_PASS("global-merge-func", GlobalMergeFuncPass())
3233
#undef MODULE_PASS
3334

3435
#ifndef FUNCTION_ANALYSIS

llvm/include/llvm/Target/CGPassBuilderOption.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ struct CGPassBuilderOption {
3131
bool DisableVerify = false;
3232
bool EnableImplicitNullChecks = false;
3333
bool EnableBlockPlacementStats = false;
34+
bool EnableGlobalMergeFunc = false;
3435
bool EnableMachineFunctionSplitter = false;
3536
bool MISchedPostRA = false;
3637
bool EarlyLiveIntervals = false;

0 commit comments

Comments
 (0)