Skip to content

Commit 15d36aa

Browse files
authored
[clang][CodeGen] Preserve addrspace of enqueue_kernel builtin. (#148062)
__enqueue_kernel_varargs' last parameter is in addrspace(5), but CodeGen currently misses this qualifier. This commit fixes the code to preserve the qualifier by referencing Alloca, which has its casts removed, rather than TmpPtr.
1 parent 9e90788 commit 15d36aa

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5987,8 +5987,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
59875987
auto *Zero = llvm::ConstantInt::get(IntTy, 0);
59885988
for (unsigned I = First; I < NumArgs; ++I) {
59895989
auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5990-
auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5991-
{Zero, Index});
5990+
auto *GEP =
5991+
Builder.CreateGEP(Tmp.getElementType(), Alloca, {Zero, Index});
59925992
if (I == First)
59935993
ElemPtr = GEP;
59945994
auto *V =

clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,9 @@ kernel void test_target_features_kernel(global int *i) {
209209
// NOCPU-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5
210210
// NOCPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
211211
// NOCPU-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8
212-
// NOCPU-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0
213-
// NOCPU-NEXT: store i64 100, ptr [[TMP18]], align 8
214-
// NOCPU-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
212+
// NOCPU-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0
213+
// NOCPU-NEXT: store i64 100, ptr addrspace(5) [[TMP18]], align 8
214+
// NOCPU-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr addrspace(5) [[TMP18]])
215215
// NOCPU-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0
216216
// NOCPU-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8
217217
// NOCPU-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1
@@ -587,9 +587,9 @@ kernel void test_target_features_kernel(global int *i) {
587587
// GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
588588
// GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]]
589589
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]]
590-
// GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0
591-
// GFX900-NEXT: store i64 100, ptr [[TMP18]], align 8
592-
// GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
590+
// GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0
591+
// GFX900-NEXT: store i64 100, ptr addrspace(5) [[TMP18]], align 8
592+
// GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr addrspace(5) [[TMP18]])
593593
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]]
594594
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]]
595595
// GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0

0 commit comments

Comments
 (0)