Skip to content

Commit 6b8d04c

Browse files
committed
[CodeLayout] Refactor std::vector uses, namespace, and EdgeCountT. NFC
* Place types and functions in the llvm::codelayout namespace * Change EdgeCountT from pair<pair<uint64_t, uint64_t>, uint64_t> to a struct and utilize structured bindings. It is not conventional to use the "T" suffix for structure types. * Remove a redundant copy in ChainT::merge. * Change {ExtTSPImpl,CDSortImpl}::run to use return value instead of an output parameter * Rename applyCDSLayout to computeCacheDirectedLayout: (a) avoid rare abbreviation "CDS" (cache-directed sort) (b) "compute" is more conventional for the specific use case * Change the parameter types from std::vector to ArrayRef so that SmallVector arguments can be used. * Similarly, rename applyExtTspLayout to computeExtTspLayout. Reviewed By: Amir Differential Revision: https://reviews.llvm.org/D159526
1 parent e6ebd28 commit 6b8d04c

File tree

5 files changed

+106
-121
lines changed

5 files changed

+106
-121
lines changed

bolt/lib/Passes/ReorderAlgorithm.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -531,21 +531,21 @@ void ExtTSPReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF,
531531
}
532532

533533
// Initialize CFG edges
534-
using JumpT = std::pair<uint64_t, uint64_t>;
535-
std::vector<std::pair<JumpT, uint64_t>> JumpCounts;
534+
std::vector<codelayout::EdgeCount> JumpCounts;
536535
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
537536
auto BI = BB->branch_info_begin();
538537
for (BinaryBasicBlock *SuccBB : BB->successors()) {
539538
assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
540539
"missing profile for a jump");
541-
auto It = std::make_pair(BB->getLayoutIndex(), SuccBB->getLayoutIndex());
542-
JumpCounts.push_back(std::make_pair(It, BI->Count));
540+
JumpCounts.push_back(
541+
{BB->getLayoutIndex(), SuccBB->getLayoutIndex(), BI->Count});
543542
++BI;
544543
}
545544
}
546545

547546
// Run the layout algorithm
548-
auto Result = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
547+
auto Result =
548+
codelayout::computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
549549
Order.reserve(BF.getLayout().block_size());
550550
for (uint64_t R : Result)
551551
Order.push_back(OrigOrder[R]);

bolt/lib/Passes/ReorderFunctions.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -331,23 +331,21 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
331331
// Initialize CFG nodes and their data
332332
std::vector<uint64_t> FuncSizes;
333333
std::vector<uint64_t> FuncCounts;
334-
using JumpT = std::pair<uint64_t, uint64_t>;
335-
std::vector<std::pair<JumpT, uint64_t>> CallCounts;
334+
std::vector<codelayout::EdgeCount> CallCounts;
336335
std::vector<uint64_t> CallOffsets;
337336
for (NodeId F = 0; F < Cg.numNodes(); ++F) {
338337
FuncSizes.push_back(Cg.size(F));
339338
FuncCounts.push_back(Cg.samples(F));
340339
for (NodeId Succ : Cg.successors(F)) {
341340
const Arc &Arc = *Cg.findArc(F, Succ);
342-
auto It = std::make_pair(F, Succ);
343-
CallCounts.push_back(std::make_pair(It, Arc.weight()));
341+
CallCounts.push_back({F, Succ, uint64_t(Arc.weight())});
344342
CallOffsets.push_back(uint64_t(Arc.avgCallOffset()));
345343
}
346344
}
347345

348346
// Run the layout algorithm.
349-
std::vector<uint64_t> Result =
350-
applyCDSLayout(FuncSizes, FuncCounts, CallCounts, CallOffsets);
347+
std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout(
348+
FuncSizes, FuncCounts, CallCounts, CallOffsets);
351349

352350
// Create a single cluster from the computed order of hot functions.
353351
std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end());

llvm/include/llvm/Transforms/Utils/CodeLayout.h

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,21 @@
1414
#ifndef LLVM_TRANSFORMS_UTILS_CODELAYOUT_H
1515
#define LLVM_TRANSFORMS_UTILS_CODELAYOUT_H
1616

17+
#include "llvm/ADT/ArrayRef.h"
1718
#include "llvm/ADT/DenseMap.h"
1819

20+
#include <utility>
1921
#include <vector>
2022

21-
namespace llvm {
23+
namespace llvm::codelayout {
2224

2325
using EdgeT = std::pair<uint64_t, uint64_t>;
24-
using EdgeCountT = std::pair<EdgeT, uint64_t>;
26+
27+
struct EdgeCount {
28+
uint64_t src;
29+
uint64_t dst;
30+
uint64_t count;
31+
};
2532

2633
/// Find a layout of nodes (basic blocks) of a given CFG optimizing jump
2734
/// locality and thus processor I-cache utilization. This is achieved via
@@ -34,24 +41,22 @@ using EdgeCountT = std::pair<EdgeT, uint64_t>;
3441
/// \p EdgeCounts: The execution counts of every edge (jump) in the profile. The
3542
/// map also defines the edges in CFG and should include 0-count edges.
3643
/// \returns The best block order found.
37-
std::vector<uint64_t>
38-
applyExtTspLayout(const std::vector<uint64_t> &NodeSizes,
39-
const std::vector<uint64_t> &NodeCounts,
40-
const std::vector<EdgeCountT> &EdgeCounts);
44+
std::vector<uint64_t> computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,
45+
ArrayRef<uint64_t> NodeCounts,
46+
ArrayRef<EdgeCount> EdgeCounts);
4147

4248
/// Estimate the "quality" of a given node order in CFG. The higher the score,
4349
/// the better the order is. The score is designed to reflect the locality of
4450
/// the given order, which is anti-correlated with the number of I-cache misses
4551
/// in a typical execution of the function.
46-
double calcExtTspScore(const std::vector<uint64_t> &Order,
47-
const std::vector<uint64_t> &NodeSizes,
48-
const std::vector<uint64_t> &NodeCounts,
49-
const std::vector<EdgeCountT> &EdgeCounts);
52+
double calcExtTspScore(ArrayRef<uint64_t> Order, ArrayRef<uint64_t> NodeSizes,
53+
ArrayRef<uint64_t> NodeCounts,
54+
ArrayRef<EdgeCount> EdgeCounts);
5055

5156
/// Estimate the "quality" of the current node order in CFG.
52-
double calcExtTspScore(const std::vector<uint64_t> &NodeSizes,
53-
const std::vector<uint64_t> &NodeCounts,
54-
const std::vector<EdgeCountT> &EdgeCounts);
57+
double calcExtTspScore(ArrayRef<uint64_t> NodeSizes,
58+
ArrayRef<uint64_t> NodeCounts,
59+
ArrayRef<EdgeCount> EdgeCounts);
5560

5661
/// Algorithm-specific params for Cache-Directed Sort. The values are tuned for
5762
/// the best performance of large-scale front-end bound binaries.
@@ -75,18 +80,16 @@ struct CDSortConfig {
7580
/// map also defines the edges in CFG and should include 0-count edges.
7681
/// \p CallOffsets: The offsets of the calls from their source nodes.
7782
/// \returns The best function order found.
78-
std::vector<uint64_t> applyCDSLayout(const std::vector<uint64_t> &FuncSizes,
79-
const std::vector<uint64_t> &FuncCounts,
80-
const std::vector<EdgeCountT> &CallCounts,
81-
const std::vector<uint64_t> &CallOffsets);
83+
std::vector<uint64_t> computeCacheDirectedLayout(
84+
ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts,
85+
ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets);
8286

8387
/// Apply a Cache-Directed Sort with a custom config.
84-
std::vector<uint64_t> applyCDSLayout(const CDSortConfig &Config,
85-
const std::vector<uint64_t> &FuncSizes,
86-
const std::vector<uint64_t> &FuncCounts,
87-
const std::vector<EdgeCountT> &CallCounts,
88-
const std::vector<uint64_t> &CallOffsets);
88+
std::vector<uint64_t> computeCacheDirectedLayout(
89+
const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes,
90+
ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts,
91+
ArrayRef<uint64_t> CallOffsets);
8992

90-
} // end namespace llvm
93+
} // namespace llvm::codelayout
9194

9295
#endif // LLVM_TRANSFORMS_UTILS_CODELAYOUT_H

llvm/lib/CodeGen/MachineBlockPlacement.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3501,7 +3501,7 @@ void MachineBlockPlacement::applyExtTsp() {
35013501

35023502
auto BlockSizes = std::vector<uint64_t>(F->size());
35033503
auto BlockCounts = std::vector<uint64_t>(F->size());
3504-
std::vector<EdgeCountT> JumpCounts;
3504+
std::vector<codelayout::EdgeCount> JumpCounts;
35053505
for (MachineBasicBlock &MBB : *F) {
35063506
// Getting the block frequency.
35073507
BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
@@ -3520,8 +3520,8 @@ void MachineBlockPlacement::applyExtTsp() {
35203520
for (MachineBasicBlock *Succ : MBB.successors()) {
35213521
auto EP = MBPI->getEdgeProbability(&MBB, Succ);
35223522
BlockFrequency JumpFreq = BlockFreq * EP;
3523-
auto Jump = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]);
3524-
JumpCounts.push_back(std::make_pair(Jump, JumpFreq.getFrequency()));
3523+
JumpCounts.push_back(
3524+
{BlockIndex[&MBB], BlockIndex[Succ], JumpFreq.getFrequency()});
35253525
}
35263526
}
35273527

@@ -3534,7 +3534,7 @@ void MachineBlockPlacement::applyExtTsp() {
35343534
calcExtTspScore(BlockSizes, BlockCounts, JumpCounts)));
35353535

35363536
// Run the layout algorithm.
3537-
auto NewOrder = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
3537+
auto NewOrder = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
35383538
std::vector<const MachineBasicBlock *> NewBlockOrder;
35393539
NewBlockOrder.reserve(F->size());
35403540
for (uint64_t Node : NewOrder) {

0 commit comments

Comments
 (0)