Skip to content

Commit 11d3e31

Browse files
committed
[CUDA][HIP] Fix mangling number for local struct
MSVC and Itanium mangling use different mangling numbers for function-scope structs, which causes inconsistent mangled kernel names in device and host compilations. This patch uses Itanium mangling number for structs in for mangling device side names in CUDA/HIP host compilation on Windows to fix this issue. A state is added to ASTContext to indicate whether the current name mangling is for device side names in host compilation. Device and host mangling number are encoded/decoded as upper and lower half of 32 bit unsigned integer to fit into the original mangling number field for AST. Diagnostic will be emitted if a manglining number exceeds limit. Reviewed by: Artem Belevich, Reid Kleckner Differential Revision: https://reviews.llvm.org/D122734 Fixes: SWDEV-328515
1 parent 981ed72 commit 11d3e31

File tree

5 files changed

+96
-1
lines changed

5 files changed

+96
-1
lines changed

clang/include/clang/AST/ASTContext.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,9 @@ class ASTContext : public RefCountedBase<ASTContext> {
677677
~CUDAConstantEvalContextRAII() { Ctx.CUDAConstantEvalCtx = SavedCtx; }
678678
};
679679

680+
/// Current CUDA name mangling is for device name in host compilation.
681+
bool CUDAMangleDeviceNameInHostCompilation = false;
682+
680683
/// Returns the dynamic AST node parent map context.
681684
ParentMapContext &getParentMapContext();
682685

clang/lib/AST/ASTContext.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11762,7 +11762,14 @@ void ASTContext::setManglingNumber(const NamedDecl *ND, unsigned Number) {
1176211762

1176311763
unsigned ASTContext::getManglingNumber(const NamedDecl *ND) const {
1176411764
auto I = MangleNumbers.find(ND);
11765-
return I != MangleNumbers.end() ? I->second : 1;
11765+
unsigned Res = I != MangleNumbers.end() ? I->second : 1;
11766+
if (!LangOpts.CUDA || LangOpts.CUDAIsDevice)
11767+
return Res;
11768+
11769+
// CUDA/HIP host compilation encodes host and device mangling numbers
11770+
// as lower and upper half of 32 bit integer.
11771+
Res = CUDAMangleDeviceNameInHostCompilation ? Res >> 16 : Res & 0xFFFF;
11772+
return Res > 1 ? Res : 1;
1176611773
}
1176711774

1176811775
void ASTContext::setStaticLocalNumber(const VarDecl *VD, unsigned Number) {

clang/lib/AST/MicrosoftCXXABI.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,20 @@ class MSHIPNumberingContext : public MicrosoftNumberingContext {
7676
unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override {
7777
return DeviceCtx->getManglingNumber(CallOperator);
7878
}
79+
80+
unsigned getManglingNumber(const TagDecl *TD,
81+
unsigned MSLocalManglingNumber) override {
82+
unsigned DeviceN = DeviceCtx->getManglingNumber(TD, MSLocalManglingNumber);
83+
unsigned HostN =
84+
MicrosoftNumberingContext::getManglingNumber(TD, MSLocalManglingNumber);
85+
if (DeviceN > 0xFFFF || HostN > 0xFFFF) {
86+
DiagnosticsEngine &Diags = TD->getASTContext().getDiagnostics();
87+
unsigned DiagID = Diags.getCustomDiagID(
88+
DiagnosticsEngine::Error, "Mangling number exceeds limit (65535)");
89+
Diags.Report(TD->getLocation(), DiagID);
90+
}
91+
return (DeviceN << 16) | HostN;
92+
}
7993
};
8094

8195
class MSSYCLNumberingContext : public MicrosoftNumberingContext {

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/IR/DerivedTypes.h"
2525
#include "llvm/IR/ReplaceConstant.h"
2626
#include "llvm/Support/Format.h"
27+
#include "llvm/Support/SaveAndRestore.h"
2728

2829
using namespace clang;
2930
using namespace CodeGen;
@@ -260,6 +261,8 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
260261
}
261262

262263
std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
264+
llvm::SaveAndRestore<bool> MangleAsDevice(
265+
CGM.getContext().CUDAMangleDeviceNameInHostCompilation, true);
263266
GlobalDecl GD;
264267
// D could be either a kernel or a variable.
265268
if (auto *FD = dyn_cast<FunctionDecl>(ND))
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// RUN: %clang_cc1 -emit-llvm -o - -aux-triple x86_64-pc-windows-msvc \
2+
// RUN: -fms-extensions -triple amdgcn-amd-amdhsa \
3+
// RUN: -target-cpu gfx1030 -fcuda-is-device -x hip %s \
4+
// RUN: | FileCheck -check-prefix=DEV %s
5+
6+
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
7+
// RUN: -fms-extensions -aux-triple amdgcn-amd-amdhsa \
8+
// RUN: -aux-target-cpu gfx1030 -x hip %s \
9+
// RUN: | FileCheck -check-prefix=HOST %s
10+
11+
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
12+
// RUN: -fms-extensions -aux-triple amdgcn-amd-amdhsa \
13+
// RUN: -aux-target-cpu gfx1030 -x hip %s \
14+
// RUN: | FileCheck -check-prefix=HOST-NEG %s
15+
16+
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
17+
// RUN: -fms-extensions -x c++ %s \
18+
// RUN: | FileCheck -check-prefix=CPP %s
19+
20+
#if __HIP__
21+
#include "Inputs/cuda.h"
22+
#endif
23+
24+
// Check local struct 'Op' uses Itanium mangling number instead of MSVC mangling
25+
// number in device side name mangling. It is the same in device and host
26+
// compilation.
27+
28+
// DEV: define amdgpu_kernel void @_Z6kernelIZN4TestIiE3runEvE2OpEvv(
29+
30+
// HOST-DAG: @{{.*}} = {{.*}}c"_Z6kernelIZN4TestIiE3runEvE2OpEvv\00"
31+
32+
// HOST-NEG-NOT: @{{.*}} = {{.*}}c"_Z6kernelIZN4TestIiE3runEvE2Op_1Evv\00"
33+
#if __HIP__
34+
template<typename T>
35+
__attribute__((global)) void kernel()
36+
{
37+
}
38+
#endif
39+
40+
// Check local struct 'Op' uses MSVC mangling number in host function name mangling.
41+
// It is the same when compiled as HIP or C++ program.
42+
43+
// HOST-DAG: call void @"??$fun@UOp@?2??run@?$Test@H@@QEAAXXZ@@@YAXXZ"()
44+
// CPP: call void @"??$fun@UOp@?2??run@?$Test@H@@QEAAXXZ@@@YAXXZ"()
45+
template<typename T>
46+
void fun()
47+
{
48+
}
49+
50+
template <typename T>
51+
class Test {
52+
public:
53+
void run()
54+
{
55+
struct Op
56+
{
57+
};
58+
#if __HIP__
59+
kernel<Op><<<1, 1>>>();
60+
#endif
61+
fun<Op>();
62+
}
63+
};
64+
65+
int main() {
66+
Test<int> A;
67+
A.run();
68+
}

0 commit comments

Comments
 (0)