Skip to content

Commit 755ef49

Browse files
committed
address comments
1 parent 791bb1d commit 755ef49

File tree

5 files changed

+735
-16
lines changed

5 files changed

+735
-16
lines changed

llvm/lib/Target/NVPTX/NVPTXIncreaseAlignment.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,18 @@ static cl::opt<bool>
3333
cl::init(false), cl::Hidden,
3434
cl::desc("Use maximum alignment for local memory"));
3535

36+
static Align getMaxLocalArrayAlignment(const TargetTransformInfo &TTI) {
37+
const unsigned MaxBitWidth =
38+
TTI.getLoadStoreVecRegBitWidth(NVPTXAS::ADDRESS_SPACE_LOCAL);
39+
return Align(MaxBitWidth / 8);
40+
}
41+
3642
namespace {
3743
struct NVPTXIncreaseLocalAlignment {
3844
const Align MaxAlign;
3945

4046
NVPTXIncreaseLocalAlignment(const TargetTransformInfo &TTI)
41-
: MaxAlign(TTI.getLoadStoreVecRegBitWidth(NVPTXAS::ADDRESS_SPACE_LOCAL)) {
42-
}
47+
: MaxAlign(getMaxLocalArrayAlignment(TTI)) {}
4348

4449
bool run(Function &F);
4550
bool updateAllocaAlignment(AllocaInst *Alloca, const DataLayout &DL);
@@ -113,6 +118,9 @@ struct NVPTXIncreaseLocalAlignmentLegacyPass : public FunctionPass {
113118
NVPTXIncreaseLocalAlignmentLegacyPass() : FunctionPass(ID) {}
114119

115120
bool runOnFunction(Function &F) override;
121+
void getAnalysisUsage(AnalysisUsage &AU) const override {
122+
AU.addRequired<TargetTransformInfoWrapperPass>();
123+
}
116124
StringRef getPassName() const override {
117125
return "NVPTX Increase Local Alignment";
118126
}

llvm/test/CodeGen/NVPTX/increase-local-align.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ define void @test6() {
6767

6868
define void @test7() {
6969
; COMMON-LABEL: define void @test7() {
70-
; COMMON-NEXT: [[A:%.*]] = alloca i32, align 2
70+
; COMMON-NEXT: [[A:%.*]] = alloca i32, align 4
7171
; COMMON-NEXT: ret void
7272
;
7373
%a = alloca i32, align 2

llvm/test/CodeGen/NVPTX/lower-byval-args.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
135135
;
136136
; PTX-LABEL: escape_ptr(
137137
; PTX: {
138-
; PTX-NEXT: .local .align 4 .b8 __local_depot2[8];
138+
; PTX-NEXT: .local .align 8 .b8 __local_depot2[8];
139139
; PTX-NEXT: .reg .b64 %SP;
140140
; PTX-NEXT: .reg .b64 %SPL;
141141
; PTX-NEXT: .reg .b32 %r<3>;
@@ -179,7 +179,7 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
179179
;
180180
; PTX-LABEL: escape_ptr_gep(
181181
; PTX: {
182-
; PTX-NEXT: .local .align 4 .b8 __local_depot3[8];
182+
; PTX-NEXT: .local .align 8 .b8 __local_depot3[8];
183183
; PTX-NEXT: .reg .b64 %SP;
184184
; PTX-NEXT: .reg .b64 %SPL;
185185
; PTX-NEXT: .reg .b32 %r<3>;
@@ -194,7 +194,7 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
194194
; PTX-NEXT: st.local.b32 [%rd2+4], %r1;
195195
; PTX-NEXT: ld.param.b32 %r2, [escape_ptr_gep_param_1];
196196
; PTX-NEXT: st.local.b32 [%rd2], %r2;
197-
; PTX-NEXT: add.s64 %rd3, %rd1, 4;
197+
; PTX-NEXT: or.b64 %rd3, %rd1, 4;
198198
; PTX-NEXT: { // callseq 1, 0
199199
; PTX-NEXT: .param .b64 param0;
200200
; PTX-NEXT: st.param.b64 [param0], %rd3;
@@ -224,7 +224,7 @@ define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeon
224224
;
225225
; PTX-LABEL: escape_ptr_store(
226226
; PTX: {
227-
; PTX-NEXT: .local .align 4 .b8 __local_depot4[8];
227+
; PTX-NEXT: .local .align 8 .b8 __local_depot4[8];
228228
; PTX-NEXT: .reg .b64 %SP;
229229
; PTX-NEXT: .reg .b64 %SPL;
230230
; PTX-NEXT: .reg .b32 %r<3>;
@@ -262,7 +262,7 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
262262
;
263263
; PTX-LABEL: escape_ptr_gep_store(
264264
; PTX: {
265-
; PTX-NEXT: .local .align 4 .b8 __local_depot5[8];
265+
; PTX-NEXT: .local .align 8 .b8 __local_depot5[8];
266266
; PTX-NEXT: .reg .b64 %SP;
267267
; PTX-NEXT: .reg .b64 %SPL;
268268
; PTX-NEXT: .reg .b32 %r<3>;
@@ -279,7 +279,7 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
279279
; PTX-NEXT: st.local.b32 [%rd4+4], %r1;
280280
; PTX-NEXT: ld.param.b32 %r2, [escape_ptr_gep_store_param_1];
281281
; PTX-NEXT: st.local.b32 [%rd4], %r2;
282-
; PTX-NEXT: add.s64 %rd5, %rd3, 4;
282+
; PTX-NEXT: or.b64 %rd5, %rd3, 4;
283283
; PTX-NEXT: st.global.b64 [%rd2], %rd5;
284284
; PTX-NEXT: ret;
285285
entry:
@@ -302,7 +302,7 @@ define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonl
302302
;
303303
; PTX-LABEL: escape_ptrtoint(
304304
; PTX: {
305-
; PTX-NEXT: .local .align 4 .b8 __local_depot6[8];
305+
; PTX-NEXT: .local .align 8 .b8 __local_depot6[8];
306306
; PTX-NEXT: .reg .b64 %SP;
307307
; PTX-NEXT: .reg .b64 %SPL;
308308
; PTX-NEXT: .reg .b32 %r<3>;

llvm/test/CodeGen/NVPTX/variadics-backend.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias
208208
define dso_local i32 @bar() {
209209
; CHECK-PTX-LABEL: bar(
210210
; CHECK-PTX: {
211-
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
211+
; CHECK-PTX-NEXT: .local .align 16 .b8 __local_depot3[32];
212212
; CHECK-PTX-NEXT: .reg .b64 %SP;
213213
; CHECK-PTX-NEXT: .reg .b64 %SPL;
214214
; CHECK-PTX-NEXT: .reg .b16 %rs<5>;
@@ -226,12 +226,12 @@ define dso_local i32 @bar() {
226226
; CHECK-PTX-NEXT: ld.global.nc.b8 %rs3, [__const_$_bar_$_s1+5];
227227
; CHECK-PTX-NEXT: st.local.b8 [%rd2], %rs3;
228228
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
229-
; CHECK-PTX-NEXT: st.b32 [%SP+8], %r1;
229+
; CHECK-PTX-NEXT: st.b32 [%SP+16], %r1;
230230
; CHECK-PTX-NEXT: mov.b16 %rs4, 1;
231-
; CHECK-PTX-NEXT: st.b8 [%SP+12], %rs4;
231+
; CHECK-PTX-NEXT: st.b8 [%SP+20], %rs4;
232232
; CHECK-PTX-NEXT: mov.b64 %rd3, 1;
233-
; CHECK-PTX-NEXT: st.b64 [%SP+16], %rd3;
234-
; CHECK-PTX-NEXT: add.u64 %rd4, %SP, 8;
233+
; CHECK-PTX-NEXT: st.b64 [%SP+24], %rd3;
234+
; CHECK-PTX-NEXT: add.u64 %rd4, %SP, 16;
235235
; CHECK-PTX-NEXT: { // callseq 1, 0
236236
; CHECK-PTX-NEXT: .param .b32 param0;
237237
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
@@ -371,7 +371,7 @@ entry:
371371
define dso_local void @qux() {
372372
; CHECK-PTX-LABEL: qux(
373373
; CHECK-PTX: {
374-
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24];
374+
; CHECK-PTX-NEXT: .local .align 16 .b8 __local_depot7[32];
375375
; CHECK-PTX-NEXT: .reg .b64 %SP;
376376
; CHECK-PTX-NEXT: .reg .b64 %SPL;
377377
; CHECK-PTX-NEXT: .reg .b32 %r<3>;

0 commit comments

Comments
 (0)