7
7
// ===----------------------------------------------------------------------===//
8
8
//
9
9
// A simple pass that looks at local memory arrays that are statically
10
- // sized and sets an appropriate alignment for them . This enables vectorization
10
+ // sized and potentially increases their alignment. This enables vectorization
11
11
// of loads/stores to these arrays if not explicitly specified by the client.
12
12
//
13
13
// TODO: Ideally we should do a bin-packing of local arrays to maximize
16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " NVPTX.h"
19
+ #include " llvm/Analysis/TargetTransformInfo.h"
19
20
#include " llvm/IR/DataLayout.h"
20
21
#include " llvm/IR/Instructions.h"
21
22
#include " llvm/IR/Module.h"
23
+ #include " llvm/IR/PassManager.h"
22
24
#include " llvm/Pass.h"
23
25
#include " llvm/Support/CommandLine.h"
24
26
#include " llvm/Support/MathExtras.h"
27
+ #include " llvm/Support/NVPTXAddrSpace.h"
25
28
26
29
using namespace llvm ;
27
30
@@ -30,16 +33,35 @@ static cl::opt<bool>
30
33
cl::init (false ), cl::Hidden,
31
34
cl::desc(" Use maximum alignment for local memory" ));
32
35
33
- static constexpr Align MaxPTXArrayAlignment = Align::Constant<16 >();
36
+ static Align getMaxLocalArrayAlignment (const TargetTransformInfo &TTI) {
37
+ const unsigned MaxBitWidth =
38
+ TTI.getLoadStoreVecRegBitWidth (NVPTXAS::ADDRESS_SPACE_LOCAL);
39
+ return Align (MaxBitWidth / 8 );
40
+ }
41
+
42
+ namespace {
43
+ struct NVPTXIncreaseLocalAlignment {
44
+ const Align MaxAlign;
45
+
46
+ NVPTXIncreaseLocalAlignment (const TargetTransformInfo &TTI)
47
+ : MaxAlign(getMaxLocalArrayAlignment(TTI)) {}
48
+
49
+ bool run (Function &F);
50
+ bool updateAllocaAlignment (AllocaInst *Alloca, const DataLayout &DL);
51
+ Align getAggressiveArrayAlignment (unsigned ArraySize);
52
+ Align getConservativeArrayAlignment (unsigned ArraySize);
53
+ };
54
+ } // namespace
34
55
35
56
// / Get the maximum useful alignment for an array. This is more likely to
36
57
// / produce holes in the local memory.
37
58
// /
38
59
// / Choose an alignment large enough that the entire array could be loaded with
39
60
// / a single vector load (if possible). Cap the alignment at
40
61
// / MaxPTXArrayAlignment.
41
- static Align getAggressiveArrayAlignment (const unsigned ArraySize) {
42
- return std::min (MaxPTXArrayAlignment, Align (PowerOf2Ceil (ArraySize)));
62
+ Align NVPTXIncreaseLocalAlignment::getAggressiveArrayAlignment (
63
+ const unsigned ArraySize) {
64
+ return std::min (MaxAlign, Align (PowerOf2Ceil (ArraySize)));
43
65
}
44
66
45
67
// / Get the alignment of arrays that reduces the chances of leaving holes when
@@ -49,20 +71,18 @@ static Align getAggressiveArrayAlignment(const unsigned ArraySize) {
49
71
// / Choose the largest alignment such that the array size is a multiple of the
50
72
// / alignment. If all elements of the buffer are allocated in order of
51
73
// / alignment (higher to lower) no holes will be left.
52
- static Align getConservativeArrayAlignment (const unsigned ArraySize) {
53
- return commonAlignment (MaxPTXArrayAlignment, ArraySize);
74
+ Align NVPTXIncreaseLocalAlignment::getConservativeArrayAlignment (
75
+ const unsigned ArraySize) {
76
+ return commonAlignment (MaxAlign, ArraySize);
54
77
}
55
78
56
79
// / Find a better alignment for local arrays
57
- static bool updateAllocaAlignment (const DataLayout &DL, AllocaInst *Alloca) {
80
+ bool NVPTXIncreaseLocalAlignment::updateAllocaAlignment (AllocaInst *Alloca,
81
+ const DataLayout &DL) {
58
82
// Looking for statically sized local arrays
59
83
if (!Alloca->isStaticAlloca ())
60
84
return false ;
61
85
62
- // For now, we only support array allocas
63
- if (!(Alloca->isArrayAllocation () || Alloca->getAllocatedType ()->isArrayTy ()))
64
- return false ;
65
-
66
86
const auto ArraySize = Alloca->getAllocationSize (DL);
67
87
if (!(ArraySize && ArraySize->isFixed ()))
68
88
return false ;
@@ -80,14 +100,14 @@ static bool updateAllocaAlignment(const DataLayout &DL, AllocaInst *Alloca) {
80
100
return false ;
81
101
}
82
102
83
- static bool runSetLocalArrayAlignment (Function &F) {
103
+ bool NVPTXIncreaseLocalAlignment::run (Function &F) {
84
104
bool Changed = false ;
85
- const DataLayout &DL = F.getParent ()->getDataLayout ();
105
+ const auto &DL = F.getParent ()->getDataLayout ();
86
106
87
107
BasicBlock &EntryBB = F.getEntryBlock ();
88
108
for (Instruction &I : EntryBB)
89
109
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(&I))
90
- Changed |= updateAllocaAlignment (DL, Alloca );
110
+ Changed |= updateAllocaAlignment (Alloca, DL );
91
111
92
112
return Changed;
93
113
}
@@ -98,6 +118,9 @@ struct NVPTXIncreaseLocalAlignmentLegacyPass : public FunctionPass {
98
118
NVPTXIncreaseLocalAlignmentLegacyPass () : FunctionPass(ID) {}
99
119
100
120
bool runOnFunction (Function &F) override ;
121
+ void getAnalysisUsage (AnalysisUsage &AU) const override {
122
+ AU.addRequired <TargetTransformInfoWrapperPass>();
123
+ }
101
124
StringRef getPassName () const override {
102
125
return " NVPTX Increase Local Alignment" ;
103
126
}
@@ -115,12 +138,15 @@ FunctionPass *llvm::createNVPTXIncreaseLocalAlignmentPass() {
115
138
}
116
139
117
140
bool NVPTXIncreaseLocalAlignmentLegacyPass::runOnFunction (Function &F) {
118
- return runSetLocalArrayAlignment (F);
141
+ const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F);
142
+ return NVPTXIncreaseLocalAlignment (TTI).run (F);
119
143
}
120
144
121
145
PreservedAnalyses
122
- NVPTXIncreaseLocalAlignmentPass::run (Function &F, FunctionAnalysisManager &AM) {
123
- bool Changed = runSetLocalArrayAlignment (F);
146
+ NVPTXIncreaseLocalAlignmentPass::run (Function &F,
147
+ FunctionAnalysisManager &FAM) {
148
+ const auto &TTI = FAM.getResult <TargetIRAnalysis>(F);
149
+ bool Changed = NVPTXIncreaseLocalAlignment (TTI).run (F);
124
150
125
151
if (!Changed)
126
152
return PreservedAnalyses::all ();
0 commit comments