Skip to content

Commit 07e2ba4

Browse files
authored
[AMDGPU] Set AS8 address width to 48 bits
Of the 128-bits of buffer descriptor only 48 bits are address bits, so following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54, the logic conclusion is to set the index width to 48 bits instead of the current value of 128. Most of the test changes are mechanical datalayout updates, but there is one actual change: the ptrmask test now uses .i48 instead of .i128 and I had to update SelectionDAGBuilder to correctly extend the mask. Reviewed By: krzysz00 Pull Request: #139419
1 parent 90daed3 commit 07e2ba4

34 files changed

+111
-90
lines changed

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 =
3333

3434
static const char *const DataLayoutStringAMDGCN =
3535
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36-
"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
37-
"32-v48:64-v96:128"
38-
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39-
"-ni:7:8:9";
36+
"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
37+
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-"
38+
"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
4039

4140
const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
4241
llvm::AMDGPUAS::FLAT_ADDRESS, // Default

clang/test/CodeGen/target-data.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,12 +176,12 @@
176176

177177
// RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \
178178
// RUN: | FileCheck %s -check-prefix=R600SI
179-
// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
179+
// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
180180

181181
// Test default -target-cpu
182182
// RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \
183183
// RUN: | FileCheck %s -check-prefix=R600SIDefault
184-
// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
184+
// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
185185

186186
// RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \
187187
// RUN: FileCheck %s -check-prefix=AARCH64
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
22
// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
33

4-
// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
4+
// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
55
void foo(void) {}

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7966,17 +7966,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
79667966

79677967
// On arm64_32, pointers are 32 bits when stored in memory, but
79687968
// zero-extended to 64 bits when in registers. Thus the mask is 32 bits to
7969-
// match the index type, but the pointer is 64 bits, so the the mask must be
7969+
// match the index type, but the pointer is 64 bits, so the mask must be
79707970
// zero-extended up to 64 bits to match the pointer.
79717971
EVT PtrVT =
79727972
TLI.getValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
79737973
EVT MemVT =
79747974
TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
79757975
assert(PtrVT == Ptr.getValueType());
7976-
assert(MemVT == Mask.getValueType());
7977-
if (MemVT != PtrVT)
7976+
if (Mask.getValueType().getFixedSizeInBits() < MemVT.getFixedSizeInBits()) {
7977+
// For AMDGPU buffer descriptors the mask is 48 bits, but the pointer is
7978+
// 128-bit, so we have to pad the mask with ones for unused bits.
7979+
auto HighOnes = DAG.getNode(
7980+
ISD::SHL, sdl, PtrVT, DAG.getAllOnesConstant(sdl, PtrVT),
7981+
DAG.getShiftAmountConstant(Mask.getValueType().getFixedSizeInBits(),
7982+
PtrVT, sdl));
7983+
Mask = DAG.getNode(ISD::OR, sdl, PtrVT,
7984+
DAG.getZExtOrTrunc(Mask, sdl, PtrVT), HighOnes);
7985+
} else if (Mask.getValueType() != PtrVT)
79787986
Mask = DAG.getPtrExtOrTrunc(Mask, sdl, PtrVT);
79797987

7988+
assert(Mask.getValueType() == PtrVT);
79807989
setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask));
79817990
return;
79827991
}

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5781,7 +5781,10 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
57815781
if (!DL.contains("-p7") && !DL.starts_with("p7"))
57825782
Res.append("-p7:160:256:256:32");
57835783
if (!DL.contains("-p8") && !DL.starts_with("p8"))
5784-
Res.append("-p8:128:128");
5784+
Res.append("-p8:128:128:128:48");
5785+
constexpr StringRef OldP8("-p8:128:128-");
5786+
if (DL.contains(OldP8))
5787+
Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
57855788
if (!DL.contains("-p9") && !DL.starts_with("p9"))
57865789
Res.append("-p9:192:256:256:32");
57875790

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -688,10 +688,9 @@ static StringRef computeDataLayout(const Triple &TT) {
688688
// space 8) which cannot be non-trivilally accessed by LLVM memory operations
689689
// like getelementptr.
690690
return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
691-
"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"
692-
"v32:32-v48:64-v96:"
693-
"128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
694-
"G1-ni:7:8:9";
691+
"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
692+
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-"
693+
"v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
695694
}
696695

697696
LLVM_READNONE

llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
; CHECK-NEXT: x[]: full-set
99
; CHECK-NEXT: allocas uses:
1010

11-
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
11+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
1212

1313
define void @a(ptr addrspace(5) %x) {
1414
entry:
Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - < %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s
22

3-
; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p8) = G_PTR_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_(s128)
3+
; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p8) = G_PTR_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_(s48)
44

55

66
define float @gep_on_rsrc(ptr addrspace(8) %rsrc) {
77
body:
8-
%next = getelementptr float, ptr addrspace(8) %rsrc, i128 1
8+
%next = getelementptr float, ptr addrspace(8) %rsrc, i48 1
99
%res = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %next, i32 0, i32 0, i32 0)
1010
ret float %res
1111
}
1212

1313
declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg)
14-

llvm/test/CodeGen/AMDGPU/ptrmask.ll

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s
33
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
44
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
@@ -145,79 +145,79 @@ define amdgpu_ps ptr addrspace(7) @s_ptrmask_buffer_fat_ptr_i32_neg8(ptr addrspa
145145
ret ptr addrspace(7) %masked
146146
}
147147

148-
define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) %ptr, i128 %mask) {
149-
; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128:
148+
define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i48(ptr addrspace(8) %ptr, i48 %mask) {
149+
; GCN-LABEL: v_ptrmask_buffer_resource_variable_i48:
150150
; GCN: ; %bb.0:
151151
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152+
; GCN-NEXT: v_or_b32_e32 v5, 0xffff0000, v5
152153
; GCN-NEXT: v_and_b32_e32 v1, v1, v5
153154
; GCN-NEXT: v_and_b32_e32 v0, v0, v4
154-
; GCN-NEXT: v_and_b32_e32 v3, v3, v7
155-
; GCN-NEXT: v_and_b32_e32 v2, v2, v6
156155
; GCN-NEXT: s_setpc_b64 s[30:31]
157156
;
158-
; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128:
157+
; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i48:
159158
; GFX10PLUS: ; %bb.0:
160159
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160+
; GFX10PLUS-NEXT: v_or_b32_e32 v5, 0xffff0000, v5
161161
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4
162162
; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5
163-
; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6
164-
; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7
165163
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
166-
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask)
164+
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 %mask)
167165
ret ptr addrspace(8) %masked
168166
}
169167

170-
define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) %ptr) {
171-
; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8:
168+
define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i48_neg8(ptr addrspace(8) %ptr) {
169+
; GCN-LABEL: v_ptrmask_buffer_resource_variable_i48_neg8:
172170
; GCN: ; %bb.0:
173171
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174172
; GCN-NEXT: v_and_b32_e32 v0, -8, v0
175173
; GCN-NEXT: s_setpc_b64 s[30:31]
176174
;
177-
; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8:
175+
; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i48_neg8:
178176
; GFX10PLUS: ; %bb.0:
179177
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180178
; GFX10PLUS-NEXT: v_and_b32_e32 v0, -8, v0
181179
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
182-
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8)
180+
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 -8)
183181
ret ptr addrspace(8) %masked
184182
}
185183

186-
define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) inreg %ptr, i128 inreg %mask) {
187-
; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128:
184+
define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i48(ptr addrspace(8) inreg %ptr, i48 inreg %mask) {
185+
; GCN-LABEL: s_ptrmask_buffer_resource_variable_i48:
188186
; GCN: ; %bb.0:
189-
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
187+
; GCN-NEXT: s_or_b32 s7, s7, 0xffff0000
190188
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
191189
; GCN-NEXT: s_mov_b32 s2, s4
192190
; GCN-NEXT: s_mov_b32 s3, s5
193191
; GCN-NEXT: ; return to shader part epilog
194192
;
195-
; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128:
193+
; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i48:
196194
; GFX10PLUS: ; %bb.0:
195+
; GFX10PLUS-NEXT: s_or_b32 s7, s7, 0xffff0000
197196
; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
198-
; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9]
197+
; GFX10PLUS-NEXT: s_mov_b32 s2, s4
198+
; GFX10PLUS-NEXT: s_mov_b32 s3, s5
199199
; GFX10PLUS-NEXT: ; return to shader part epilog
200-
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask)
200+
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 %mask)
201201
ret ptr addrspace(8) %masked
202202
}
203203

204-
define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) inreg %ptr) {
205-
; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8:
204+
define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i48_neg8(ptr addrspace(8) inreg %ptr) {
205+
; GCN-LABEL: s_ptrmask_buffer_resource_variable_i48_neg8:
206206
; GCN: ; %bb.0:
207207
; GCN-NEXT: s_mov_b32 s1, s3
208208
; GCN-NEXT: s_and_b32 s0, s2, -8
209209
; GCN-NEXT: s_mov_b32 s2, s4
210210
; GCN-NEXT: s_mov_b32 s3, s5
211211
; GCN-NEXT: ; return to shader part epilog
212212
;
213-
; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8:
213+
; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i48_neg8:
214214
; GFX10PLUS: ; %bb.0:
215215
; GFX10PLUS-NEXT: s_mov_b32 s1, s3
216216
; GFX10PLUS-NEXT: s_and_b32 s0, s2, -8
217217
; GFX10PLUS-NEXT: s_mov_b32 s2, s4
218218
; GFX10PLUS-NEXT: s_mov_b32 s3, s5
219219
; GFX10PLUS-NEXT: ; return to shader part epilog
220-
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8)
220+
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 -8)
221221
ret ptr addrspace(8) %masked
222222
}
223223

llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; Test that we don't crash.
22
; RUN: opt < %s -passes=alignment-from-assumptions -S
33

4-
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8"
4+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8"
55

66
%"core::str::CharIndices.29.66.90.114.138.149.165.173.181.197.205.213.229.387.398" = type { [0 x i64], i64, [0 x i64], { ptr, ptr }, [0 x i64] }
77
%"unwind::libunwind::_Unwind_Exception.9.51.75.99.123.147.163.171.179.195.203.211.227.385.396" = type { [0 x i64], i64, [0 x i64], ptr, [0 x i64], [6 x i64], [0 x i64] }

0 commit comments

Comments
 (0)