Skip to content

Commit 9825b96

Browse files
committed
LLVM and SPIRV-LLVM-Translator pulldown (WW30)
LLVM: llvm/llvm-project@190518d SPIRV-LLVM-Translator: KhronosGroup/SPIRV-LLVM-Translator@6904b38
2 parents 1ccb99e + b335b7a commit 9825b96

File tree

2,759 files changed

+149821
-75197
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,759 files changed

+149821
-75197
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "llvm/Support/ErrorOr.h"
4040
#include "llvm/Support/raw_ostream.h"
4141
#include <functional>
42+
#include <list>
4243
#include <map>
4344
#include <set>
4445
#include <shared_mutex>
@@ -199,7 +200,7 @@ class BinaryContext {
199200
uint32_t DuplicatedJumpTables{0x10000000};
200201

201202
/// Function fragments to skip.
202-
std::vector<BinaryFunction *> FragmentsToSkip;
203+
std::unordered_set<BinaryFunction *> FragmentsToSkip;
203204

204205
/// The runtime library.
205206
std::unique_ptr<RuntimeLibrary> RtLibrary;
@@ -235,6 +236,18 @@ class BinaryContext {
235236
MIB = std::move(TargetBuilder);
236237
}
237238

239+
/// Return function fragments to skip.
240+
const std::unordered_set<BinaryFunction *> &getFragmentsToSkip() {
241+
return FragmentsToSkip;
242+
}
243+
244+
/// Add function fragment to skip
245+
void addFragmentsToSkip(BinaryFunction *Function) {
246+
FragmentsToSkip.insert(Function);
247+
}
248+
249+
void clearFragmentsToSkip() { FragmentsToSkip.clear(); }
250+
238251
/// Given DWOId returns CU if it exists in DWOCUs.
239252
Optional<DWARFUnit *> getDWOCU(uint64_t DWOId);
240253

@@ -475,15 +488,15 @@ class BinaryContext {
475488
/// If \p NextJTAddress is different from zero, it is used as an upper
476489
/// bound for jump table memory layout.
477490
///
478-
/// Optionally, populate \p Offsets with jump table entries. The entries
491+
/// Optionally, populate \p Address from jump table entries. The entries
479492
/// could be partially populated if the jump table detection fails.
480493
bool analyzeJumpTable(const uint64_t Address,
481494
const JumpTable::JumpTableType Type, BinaryFunction &BF,
482495
const uint64_t NextJTAddress = 0,
483-
JumpTable::OffsetsType *Offsets = nullptr);
496+
JumpTable::AddressesType *EntriesAsAddress = nullptr);
484497

485498
/// After jump table locations are established, this function will populate
486-
/// their OffsetEntries based on memory contents.
499+
/// their EntriesAsAddress based on memory contents.
487500
void populateJumpTables();
488501

489502
/// Returns a jump table ID and label pointing to the duplicated jump table.
@@ -498,12 +511,12 @@ class BinaryContext {
498511
/// to function \p BF.
499512
std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
500513

501-
/// Free memory used by jump table offsets
502-
void clearJumpTableOffsets() {
514+
/// Free memory used by JumpTable's EntriesAsAddress
515+
void clearJumpTableTempData() {
503516
for (auto &JTI : JumpTables) {
504517
JumpTable &JT = *JTI.second;
505-
JumpTable::OffsetsType Temp;
506-
Temp.swap(JT.OffsetEntries);
518+
JumpTable::AddressesType Temp;
519+
Temp.swap(JT.EntriesAsAddress);
507520
}
508521
}
509522
/// Return true if the array of bytes represents a valid code padding.
@@ -641,6 +654,10 @@ class BinaryContext {
641654
/// special linux kernel sections
642655
std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers;
643656

657+
/// List of external addresses in the code that are not a function start
658+
/// and are referenced from BinaryFunction.
659+
std::list<std::pair<BinaryFunction *, uint64_t>> InterproceduralReferences;
660+
644661
/// PseudoProbe decoder
645662
MCPseudoProbeDecoder ProbeDecoder;
646663

@@ -884,8 +901,23 @@ class BinaryContext {
884901
bool registerFragment(BinaryFunction &TargetFunction,
885902
BinaryFunction &Function) const;
886903

887-
/// Resolve inter-procedural dependencies from \p Function.
888-
void processInterproceduralReferences(BinaryFunction &Function);
904+
/// Add unterprocedural reference for \p Function to \p Address
905+
void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {
906+
InterproceduralReferences.push_back({Function, Address});
907+
}
908+
909+
/// Used to fix the target of linker-generated AArch64 adrp + add
910+
/// sequence with no relocation info.
911+
void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits,
912+
MCInst &LoadHiBits, uint64_t Target);
913+
914+
/// Return true if AARch64 veneer was successfully matched at a given
915+
/// \p Address and register veneer binary function if \p MatchOnly
916+
/// argument is false.
917+
bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false);
918+
919+
/// Resolve inter-procedural dependencies from
920+
void processInterproceduralReferences();
889921

890922
/// Skip functions with all parent and child fragments transitively.
891923
void skipMarkedFragments();

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,6 @@ class BinaryFunction {
253253

254254
std::unique_ptr<BinaryLoopInfo> BLI;
255255

256-
/// Set of external addresses in the code that are not a function start
257-
/// and are referenced from this function.
258-
std::set<uint64_t> InterproceduralReferences;
259-
260256
/// All labels in the function that are referenced via relocations from
261257
/// data objects. Typically these are jump table destinations and computed
262258
/// goto labels.
@@ -337,9 +333,9 @@ class BinaryFunction {
337333
/// True if the original entry point was patched.
338334
bool IsPatched{false};
339335

340-
/// True if the function contains jump table with entries pointing to
341-
/// locations in fragments.
342-
bool HasSplitJumpTable{false};
336+
/// True if the function contains explicit or implicit indirect branch to its
337+
/// split fragments, e.g., split jump table, landing pad in split fragment
338+
bool HasIndirectTargetToSplitFragment{false};
343339

344340
/// True if there are no control-flow edges with successors in other functions
345341
/// (i.e. if tail calls have edges to function-local basic blocks).
@@ -1161,8 +1157,8 @@ class BinaryFunction {
11611157
/// Return the number of emitted instructions for this function.
11621158
uint32_t getNumNonPseudos() const {
11631159
uint32_t N = 0;
1164-
for (BinaryBasicBlock *const &BB : layout())
1165-
N += BB->getNumNonPseudos();
1160+
for (const BinaryBasicBlock &BB : blocks())
1161+
N += BB.getNumNonPseudos();
11661162
return N;
11671163
}
11681164

@@ -1441,9 +1437,12 @@ class BinaryFunction {
14411437
/// otherwise processed.
14421438
bool isPseudo() const { return IsPseudo; }
14431439

1444-
/// Return true if the function contains a jump table with entries pointing
1445-
/// to split fragments.
1446-
bool hasSplitJumpTable() const { return HasSplitJumpTable; }
1440+
/// Return true if the function contains explicit or implicit indirect branch
1441+
/// to its split fragments, e.g., split jump table, landing pad in split
1442+
/// fragment.
1443+
bool hasIndirectTargetToSplitFragment() const {
1444+
return HasIndirectTargetToSplitFragment;
1445+
}
14471446

14481447
/// Return true if all CFG edges have local successors.
14491448
bool hasCanonicalCFG() const { return HasCanonicalCFG; }
@@ -1838,7 +1837,9 @@ class BinaryFunction {
18381837

18391838
void setIsPatched(bool V) { IsPatched = V; }
18401839

1841-
void setHasSplitJumpTable(bool V) { HasSplitJumpTable = V; }
1840+
void setHasIndirectTargetToSplitFragment(bool V) {
1841+
HasIndirectTargetToSplitFragment = V;
1842+
}
18421843

18431844
void setHasCanonicalCFG(bool V) { HasCanonicalCFG = V; }
18441845

@@ -2334,13 +2335,13 @@ class BinaryFunction {
23342335
size_t estimateHotSize(const bool UseSplitSize = true) const {
23352336
size_t Estimate = 0;
23362337
if (UseSplitSize && isSplit()) {
2337-
for (const BinaryBasicBlock *BB : BasicBlocksLayout)
2338-
if (!BB->isCold())
2339-
Estimate += BC.computeCodeSize(BB->begin(), BB->end());
2338+
for (const BinaryBasicBlock &BB : blocks())
2339+
if (!BB.isCold())
2340+
Estimate += BC.computeCodeSize(BB.begin(), BB.end());
23402341
} else {
2341-
for (const BinaryBasicBlock *BB : BasicBlocksLayout)
2342-
if (BB->getKnownExecutionCount() != 0)
2343-
Estimate += BC.computeCodeSize(BB->begin(), BB->end());
2342+
for (const BinaryBasicBlock &BB : blocks())
2343+
if (BB.getKnownExecutionCount() != 0)
2344+
Estimate += BC.computeCodeSize(BB.begin(), BB.end());
23442345
}
23452346
return Estimate;
23462347
}
@@ -2349,16 +2350,16 @@ class BinaryFunction {
23492350
if (!isSplit())
23502351
return estimateSize();
23512352
size_t Estimate = 0;
2352-
for (const BinaryBasicBlock *BB : BasicBlocksLayout)
2353-
if (BB->isCold())
2354-
Estimate += BC.computeCodeSize(BB->begin(), BB->end());
2353+
for (const BinaryBasicBlock &BB : blocks())
2354+
if (BB.isCold())
2355+
Estimate += BC.computeCodeSize(BB.begin(), BB.end());
23552356
return Estimate;
23562357
}
23572358

23582359
size_t estimateSize() const {
23592360
size_t Estimate = 0;
2360-
for (const BinaryBasicBlock *BB : BasicBlocksLayout)
2361-
Estimate += BC.computeCodeSize(BB->begin(), BB->end());
2361+
for (const BinaryBasicBlock &BB : blocks())
2362+
Estimate += BC.computeCodeSize(BB.begin(), BB.end());
23622363
return Estimate;
23632364
}
23642365

bolt/include/bolt/Core/JumpTable.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ class JumpTable : public BinaryData {
6969
/// All the entries as labels.
7070
std::vector<MCSymbol *> Entries;
7171

72-
/// All the entries as offsets into a function. Invalid after CFG is built.
73-
using OffsetsType = std::vector<uint64_t>;
74-
OffsetsType OffsetEntries;
72+
/// All the entries as absolute addresses. Invalid after disassembly is done.
73+
using AddressesType = std::vector<uint64_t>;
74+
AddressesType EntriesAsAddress;
7575

7676
/// Map <Offset> -> <Label> used for embedded jump tables. Label at 0 offset
7777
/// is the main label for the jump table.
@@ -87,18 +87,17 @@ class JumpTable : public BinaryData {
8787
uint64_t Count{0};
8888

8989
/// BinaryFunction this jump tables belongs to.
90-
BinaryFunction *Parent{nullptr};
90+
SmallVector<BinaryFunction *, 1> Parents;
9191

9292
private:
9393
/// Constructor should only be called by a BinaryContext.
9494
JumpTable(MCSymbol &Symbol, uint64_t Address, size_t EntrySize,
95-
JumpTableType Type, LabelMapType &&Labels, BinaryFunction &BF,
96-
BinarySection &Section);
95+
JumpTableType Type, LabelMapType &&Labels, BinarySection &Section);
9796

9897
public:
9998
/// Return the size of the jump table.
10099
uint64_t getSize() const {
101-
return std::max(OffsetEntries.size(), Entries.size()) * EntrySize;
100+
return std::max(EntriesAsAddress.size(), Entries.size()) * EntrySize;
102101
}
103102

104103
const MCSymbol *getFirstLabel() const {

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,7 +1384,7 @@ class MCPlusBuilder {
13841384
llvm_unreachable("not implemented");
13851385
}
13861386

1387-
/// Return true if the instruction CurInst, in combination with the recent
1387+
/// Return not 0 if the instruction CurInst, in combination with the recent
13881388
/// history of disassembled instructions supplied by [Begin, End), is a linker
13891389
/// generated veneer/stub that needs patching. This happens in AArch64 when
13901390
/// the code is large and the linker needs to generate stubs, but it does
@@ -1394,11 +1394,14 @@ class MCPlusBuilder {
13941394
/// is put in TgtLowBits, and its pair in TgtHiBits. If the instruction in
13951395
/// TgtHiBits does not have an immediate operand, but an expression, then
13961396
/// this expression is put in TgtHiSym and Tgt only contains the lower bits.
1397-
virtual bool matchLinkerVeneer(InstructionIterator Begin,
1398-
InstructionIterator End, uint64_t Address,
1399-
const MCInst &CurInst, MCInst *&TargetHiBits,
1400-
MCInst *&TargetLowBits,
1401-
uint64_t &Target) const {
1397+
/// Return value is a total number of instructions that were used to create
1398+
/// a veneer.
1399+
virtual uint64_t matchLinkerVeneer(InstructionIterator Begin,
1400+
InstructionIterator End, uint64_t Address,
1401+
const MCInst &CurInst,
1402+
MCInst *&TargetHiBits,
1403+
MCInst *&TargetLowBits,
1404+
uint64_t &Target) const {
14021405
llvm_unreachable("not implemented");
14031406
}
14041407

bolt/include/bolt/Passes/AllocCombiner.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ namespace bolt {
1717
class AllocCombinerPass : public BinaryFunctionPass {
1818
/// Stats aggregating variables
1919
uint64_t NumCombined{0};
20+
uint64_t DynamicCountCombined{0};
2021
DenseSet<const BinaryFunction *> FuncsChanged;
2122

2223
void combineAdjustments(BinaryFunction &BF);

bolt/include/bolt/Passes/DataflowAnalysis.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,8 @@ class DataflowAnalysis {
315315
void run() {
316316
derived().preflight();
317317

318+
if (Func.begin() == Func.end())
319+
return;
318320
// Initialize state for all points of the function
319321
for (BinaryBasicBlock &BB : Func) {
320322
StateTy &St = getOrCreateStateAt(BB);
@@ -324,7 +326,6 @@ class DataflowAnalysis {
324326
St = derived().getStartingStateAtPoint(Inst);
325327
}
326328
}
327-
assert(Func.begin() != Func.end() && "Unexpected empty function");
328329

329330
std::queue<BinaryBasicBlock *> Worklist;
330331
// TODO: Pushing this in a DFS ordering will greatly speed up the dataflow

bolt/include/bolt/Passes/FrameAnalysis.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,10 @@ class FrameAnalysis {
121121
/// Set of functions that require the stack to be 16B aligned
122122
DenseSet<const BinaryFunction *> FunctionsRequireAlignment;
123123

124+
/// Set of functions that performs computations with stack addresses and
125+
/// complicates our understanding of aliasing of stack spaces.
126+
DenseSet<const BinaryFunction *> FunctionsWithStackArithmetic;
127+
124128
/// Owns ArgAccesses for all instructions. References to elements are
125129
/// attached to instructions as indexes to this vector, in MCAnnotations.
126130
std::vector<ArgAccesses> ArgAccessesVector;
@@ -130,7 +134,6 @@ class FrameAnalysis {
130134
/// Analysis stats counters
131135
uint64_t NumFunctionsNotOptimized{0};
132136
uint64_t NumFunctionsFailedRestoreFI{0};
133-
uint64_t CountFunctionsNotOptimized{0};
134137
uint64_t CountFunctionsFailedRestoreFI{0};
135138
uint64_t CountDenominator{0};
136139

@@ -184,6 +187,12 @@ class FrameAnalysis {
184187
return FunctionsRequireAlignment.count(&Func);
185188
}
186189

190+
/// Return true if \p Func does computation with the address of any stack
191+
/// position, meaning we have limited alias analysis on this function.
192+
bool hasStackArithmetic(const BinaryFunction &Func) const {
193+
return FunctionsWithStackArithmetic.count(&Func);
194+
}
195+
187196
/// Functions for retrieving our specific MCAnnotation data from instructions
188197
ErrorOr<ArgAccesses &> getArgAccessesFor(const MCInst &Inst);
189198

bolt/include/bolt/Passes/FrameOptimizer.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,12 @@ class FrameOptimizerPass : public BinaryFunctionPass {
7777
/// Stats aggregating variables
7878
uint64_t NumRedundantLoads{0};
7979
uint64_t NumRedundantStores{0};
80-
uint64_t NumLoadsChangedToReg{0};
81-
uint64_t NumLoadsChangedToImm{0};
80+
uint64_t FreqRedundantLoads{0};
81+
uint64_t FreqRedundantStores{0};
82+
uint64_t FreqLoadsChangedToReg{0};
83+
uint64_t FreqLoadsChangedToImm{0};
8284
uint64_t NumLoadsDeleted{0};
85+
uint64_t FreqLoadsDeleted{0};
8386

8487
DenseSet<const BinaryFunction *> FuncsChanged;
8588

bolt/include/bolt/Passes/MCF.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ namespace llvm {
1313
namespace bolt {
1414

1515
class BinaryFunction;
16+
class DataflowInfoManager;
1617

1718
enum MCFCostFunction : char {
1819
MCF_DISABLE = 0,
@@ -22,6 +23,12 @@ enum MCFCostFunction : char {
2223
MCF_BLAMEFTS
2324
};
2425

26+
/// Implement the idea in "SamplePGO - The Power of Profile Guided Optimizations
27+
/// without the Usability Burden" by Diego Novillo to make basic block counts
28+
/// equal if we show that A dominates B, B post-dominates A and they are in the
29+
/// same loop and same loop nesting level.
30+
void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF);
31+
2532
/// Fill edge counts based on the basic block count. Used in nonLBR mode when
2633
/// we only have bb count.
2734
void estimateEdgeCounts(BinaryFunction &BF);

bolt/include/bolt/Passes/ShrinkWrapping.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -303,13 +303,16 @@ class ShrinkWrapping {
303303
std::vector<int64_t> PopOffsetByReg;
304304
std::vector<MCPhysReg> DomOrder;
305305
CalleeSavedAnalysis CSA;
306-
std::vector<SmallSetVector<MCInst *, 4>> SavePos;
307-
std::vector<uint64_t> BestSaveCount;
308-
std::vector<MCInst *> BestSavePos;
306+
std::vector<std::vector<uint64_t>> BestSaveCount;
307+
std::vector<std::vector<MCInst *>> BestSavePos;
309308

310309
/// Pass stats
311310
static std::atomic_uint64_t SpillsMovedRegularMode;
312311
static std::atomic_uint64_t SpillsMovedPushPopMode;
312+
static std::atomic_uint64_t SpillsMovedDynamicCount;
313+
static std::atomic_uint64_t SpillsFailedDynamicCount;
314+
static std::atomic_uint64_t InstrDynamicCount;
315+
static std::atomic_uint64_t StoreDynamicCount;
313316

314317
Optional<unsigned> AnnotationIndex;
315318

@@ -515,7 +518,7 @@ class ShrinkWrapping {
515518
BC.MIB->removeAnnotation(Inst, getAnnotationIndex());
516519
}
517520

518-
bool perform();
521+
bool perform(bool HotOnly = false);
519522

520523
static void printStats();
521524
};

0 commit comments

Comments
 (0)