Skip to content

Commit 3b170e3

Browse files
committed
[SYCL][AMD] Fix kernel stub calling convention for AMD
We need to ensure an amdgpu_kernel calling convention on the entry point, otherwise the kernel is not properly registered and will be removed from the module.
1 parent 6ad1847 commit 3b170e3

File tree

3 files changed

+12
-2
lines changed

3 files changed

+12
-2
lines changed

clang/include/clang/AST/GlobalDecl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,8 @@ class GlobalDecl {
164164
}
165165

166166
static KernelReferenceKind getDefaultKernelReference(const FunctionDecl *D) {
167-
return (D->hasAttr<OpenCLKernelAttr>() || D->getLangOpts().CUDAIsDevice)
167+
return (D->hasAttr<OpenCLKernelAttr>() || D->getLangOpts().CUDAIsDevice ||
168+
D->hasAttr<SYCLKernelAttr>())
168169
? KernelReferenceKind::Kernel
169170
: KernelReferenceKind::Stub;
170171
}

clang/lib/AST/Decl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5515,7 +5515,8 @@ FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) {
55155515
}
55165516

55175517
bool FunctionDecl::isReferenceableKernel() const {
5518-
return hasAttr<CUDAGlobalAttr>() || hasAttr<OpenCLKernelAttr>();
5518+
return hasAttr<CUDAGlobalAttr>() || hasAttr<OpenCLKernelAttr>() ||
5519+
hasAttr<SYCLKernelAttr>();
55195520
}
55205521

55215522
BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) {

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
304304
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
305305
CodeGen::CodeGenModule &M) const override;
306306
unsigned getOpenCLKernelCallingConv() const override;
307+
void
308+
setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
307309

308310
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
309311
llvm::PointerType *T, QualType QT) const override;
@@ -434,6 +436,12 @@ unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
434436
return llvm::CallingConv::AMDGPU_KERNEL;
435437
}
436438

439+
void AMDGPUTargetCodeGenInfo::setOCLKernelStubCallingConvention(
440+
const FunctionType *&FT) const {
441+
FT = getABIInfo().getContext().adjustFunctionType(
442+
FT, FT->getExtInfo().withCallingConv(CC_AMDGPUKernelCall));
443+
}
444+
437445
// Currently LLVM assumes null pointers always have value 0,
438446
// which results in incorrectly transformed IR. Therefore, instead of
439447
// emitting null pointers in private and local address spaces, a null

0 commit comments

Comments
 (0)