7
7
// ===----------------------------------------------------------------------===//
8
8
//
9
9
// A simple pass that looks at local memory arrays that are statically
10
- // sized and sets an appropriate alignment for them . This enables vectorization
10
+ // sized and potentially increases their alignment. This enables vectorization
11
11
// of loads/stores to these arrays if not explicitly specified by the client.
12
12
//
13
13
// TODO: Ideally we should do a bin-packing of local arrays to maximize
16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " NVPTX.h"
19
+ #include " llvm/Analysis/TargetTransformInfo.h"
19
20
#include " llvm/IR/DataLayout.h"
20
21
#include " llvm/IR/Instructions.h"
21
22
#include " llvm/IR/Module.h"
23
+ #include " llvm/IR/PassManager.h"
22
24
#include " llvm/Pass.h"
23
25
#include " llvm/Support/CommandLine.h"
24
26
#include " llvm/Support/MathExtras.h"
27
+ #include " llvm/Support/NVPTXAddrSpace.h"
25
28
26
29
using namespace llvm ;
27
30
@@ -30,16 +33,30 @@ static cl::opt<bool>
30
33
cl::init (false ), cl::Hidden,
31
34
cl::desc(" Use maximum alignment for local memory" ));
32
35
33
- static constexpr Align MaxPTXArrayAlignment = Align::Constant<16 >();
36
+ namespace {
37
+ struct NVPTXIncreaseLocalAlignment {
38
+ const Align MaxAlign;
39
+
40
+ NVPTXIncreaseLocalAlignment (const TargetTransformInfo &TTI)
41
+ : MaxAlign(TTI.getLoadStoreVecRegBitWidth(NVPTXAS::ADDRESS_SPACE_LOCAL)) {
42
+ }
43
+
44
+ bool run (Function &F);
45
+ bool updateAllocaAlignment (AllocaInst *Alloca, const DataLayout &DL);
46
+ Align getAggressiveArrayAlignment (unsigned ArraySize);
47
+ Align getConservativeArrayAlignment (unsigned ArraySize);
48
+ };
49
+ } // namespace
34
50
35
51
// / Get the maximum useful alignment for an array. This is more likely to
36
52
// / produce holes in the local memory.
37
53
// /
38
54
// / Choose an alignment large enough that the entire array could be loaded with
39
55
// / a single vector load (if possible). Cap the alignment at
40
56
// / MaxPTXArrayAlignment.
41
- static Align getAggressiveArrayAlignment (const unsigned ArraySize) {
42
- return std::min (MaxPTXArrayAlignment, Align (PowerOf2Ceil (ArraySize)));
57
+ Align NVPTXIncreaseLocalAlignment::getAggressiveArrayAlignment (
58
+ const unsigned ArraySize) {
59
+ return std::min (MaxAlign, Align (PowerOf2Ceil (ArraySize)));
43
60
}
44
61
45
62
// / Get the alignment of arrays that reduces the chances of leaving holes when
@@ -49,20 +66,18 @@ static Align getAggressiveArrayAlignment(const unsigned ArraySize) {
49
66
// / Choose the largest alignment such that the array size is a multiple of the
50
67
// / alignment. If all elements of the buffer are allocated in order of
51
68
// / alignment (higher to lower) no holes will be left.
52
- static Align getConservativeArrayAlignment (const unsigned ArraySize) {
53
- return commonAlignment (MaxPTXArrayAlignment, ArraySize);
69
+ Align NVPTXIncreaseLocalAlignment::getConservativeArrayAlignment (
70
+ const unsigned ArraySize) {
71
+ return commonAlignment (MaxAlign, ArraySize);
54
72
}
55
73
56
74
// / Find a better alignment for local arrays
57
- static bool updateAllocaAlignment (const DataLayout &DL, AllocaInst *Alloca) {
75
+ bool NVPTXIncreaseLocalAlignment::updateAllocaAlignment (AllocaInst *Alloca,
76
+ const DataLayout &DL) {
58
77
// Looking for statically sized local arrays
59
78
if (!Alloca->isStaticAlloca ())
60
79
return false ;
61
80
62
- // For now, we only support array allocas
63
- if (!(Alloca->isArrayAllocation () || Alloca->getAllocatedType ()->isArrayTy ()))
64
- return false ;
65
-
66
81
const auto ArraySize = Alloca->getAllocationSize (DL);
67
82
if (!(ArraySize && ArraySize->isFixed ()))
68
83
return false ;
@@ -80,14 +95,14 @@ static bool updateAllocaAlignment(const DataLayout &DL, AllocaInst *Alloca) {
80
95
return false ;
81
96
}
82
97
83
- static bool runSetLocalArrayAlignment (Function &F) {
98
+ bool NVPTXIncreaseLocalAlignment::run (Function &F) {
84
99
bool Changed = false ;
85
- const DataLayout &DL = F.getParent ()->getDataLayout ();
100
+ const auto &DL = F.getParent ()->getDataLayout ();
86
101
87
102
BasicBlock &EntryBB = F.getEntryBlock ();
88
103
for (Instruction &I : EntryBB)
89
104
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(&I))
90
- Changed |= updateAllocaAlignment (DL, Alloca );
105
+ Changed |= updateAllocaAlignment (Alloca, DL );
91
106
92
107
return Changed;
93
108
}
@@ -115,12 +130,15 @@ FunctionPass *llvm::createNVPTXIncreaseLocalAlignmentPass() {
115
130
}
116
131
117
132
bool NVPTXIncreaseLocalAlignmentLegacyPass::runOnFunction (Function &F) {
118
- return runSetLocalArrayAlignment (F);
133
+ const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F);
134
+ return NVPTXIncreaseLocalAlignment (TTI).run (F);
119
135
}
120
136
121
137
PreservedAnalyses
122
- NVPTXIncreaseLocalAlignmentPass::run (Function &F, FunctionAnalysisManager &AM) {
123
- bool Changed = runSetLocalArrayAlignment (F);
138
+ NVPTXIncreaseLocalAlignmentPass::run (Function &F,
139
+ FunctionAnalysisManager &FAM) {
140
+ const auto &TTI = FAM.getResult <TargetIRAnalysis>(F);
141
+ bool Changed = NVPTXIncreaseLocalAlignment (TTI).run (F);
124
142
125
143
if (!Changed)
126
144
return PreservedAnalyses::all ();
0 commit comments