Skip to content

Commit 17ef973

Browse files
feat: Initial code nav support for CUDA kernel call expressions (#435)
1 parent 68fabc3 commit 17ef973

File tree

9 files changed

+426
-1
lines changed

9 files changed

+426
-1
lines changed

docs/Development.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,12 @@ clang -Xclang -ast-dump=json file.c
182182
183183
Another option is to use clang-query ([tutorial](https://devblogs.microsoft.com/cppblog/exploring-clang-tooling-part-2-examining-the-clang-ast-with-clang-query/)).
184184
185+
**NOTE:** If running the above on CUDA code
186+
leads to a Clang error suggesting that CUDA could not be found,
187+
it's likely that the code is ill-formed. Adding flags like
188+
`-nocudainc` or `-nocudalib` (sometimes suggested by Clang) will
189+
lead to `CUDAKernelCallExpr` values not being parsed properly.
190+
185191
### Automated test case reduction
186192
187193
In case of a crash, it may be possible to automatically reduce

indexer/ClangAstMacros.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
F(TemplateTypeParm)
2828

2929
#define FOR_EACH_EXPR_TO_BE_INDEXED(F) \
30+
F(CUDAKernelCall) \
3031
F(CXXConstruct) \
3132
F(CXXDependentScopeMember) \
3233
F(DeclRef) \

indexer/Indexer.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -946,6 +946,21 @@ void TuIndexer::saveVarTemplateDecl(const clang::VarTemplateDecl &) {
946946
// Skip emitting a definition here, as we'll emit one for the inner VarDecl.
947947
}
948948

949+
void TuIndexer::saveCUDAKernelCallExpr(
950+
const clang::CUDAKernelCallExpr &cudaKernelCallExpr) {
951+
if (auto *cudaConfig = cudaKernelCallExpr.getConfig()) {
952+
if (auto *calleeDecl = cudaConfig->getCalleeDecl()) {
953+
if (auto *namedDecl = llvm::dyn_cast<clang::NamedDecl>(calleeDecl)) {
954+
if (auto optSymbolName =
955+
this->symbolFormatter.getNamedDeclSymbol(*namedDecl)) {
956+
auto symbolName = optSymbolName.value();
957+
this->saveReference(symbolName, cudaConfig->getExprLoc());
958+
}
959+
}
960+
}
961+
}
962+
}
963+
949964
void TuIndexer::saveCXXConstructExpr(
950965
const clang::CXXConstructExpr &cxxConstructExpr) {
951966
if (auto *cxxConstructorDecl = cxxConstructExpr.getConstructor()) {

test/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ scip_clang_test_suite(
5454
index_data = glob([
5555
"index/**/*.c",
5656
"index/**/*.cc",
57+
"index/**/*.cu",
58+
"index/**/*.cuh",
5759
"index/**/*.h",
5860
"index/**/package-map.json",
5961
]),

test/Snapshot.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ bool isTuMainFilePath(std::string_view p) {
116116
return false;
117117
}
118118
auto ext = p.substr(dotIndex);
119-
return ext == ".cc" || ext == ".cpp" || ext == ".cxx" || ext == ".c";
119+
return ext == ".cc" || ext == ".cpp" || ext == ".cxx" || ext == ".c"
120+
|| ext == ".cu";
120121
}
121122

122123
// static

test/index/cuda/cuda_stub.h

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Based off https://sourcegraph.com/github.com/llvm/llvm-project/-/blob/clang/test/SemaCUDA/Inputs/cuda.h
2+
//
3+
// Add common stuff for CUDA headers here.
4+
5+
using size_t = unsigned long long;
6+
7+
// Make this file work with nvcc, for testing compatibility.
8+
9+
#ifndef __NVCC__
10+
#define __constant__ __attribute__((constant))
11+
#define __device__ __attribute__((device))
12+
#define __global__ __attribute__((global))
13+
#define __host__ __attribute__((host))
14+
#define __shared__ __attribute__((shared))
15+
#define __managed__ __attribute__((managed))
16+
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
17+
18+
struct dim3 {
19+
unsigned x, y, z;
20+
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
21+
};
22+
23+
#ifdef __HIP__
24+
typedef struct hipStream *hipStream_t;
25+
typedef enum hipError {} hipError_t;
26+
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
27+
hipStream_t stream = 0);
28+
extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
29+
size_t sharedSize = 0,
30+
hipStream_t stream = 0);
31+
extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
32+
dim3 blockDim, void **args,
33+
size_t sharedMem,
34+
hipStream_t stream);
35+
#else
36+
typedef struct cudaStream *cudaStream_t;
37+
typedef enum cudaError {} cudaError_t;
38+
39+
extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
40+
size_t sharedSize = 0,
41+
cudaStream_t stream = 0);
42+
extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
43+
size_t sharedSize = 0,
44+
cudaStream_t stream = 0);
45+
extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
46+
dim3 blockDim, void **args,
47+
size_t sharedMem, cudaStream_t stream);
48+
#endif
49+
50+
// Host- and device-side placement new overloads.
51+
void *operator new(size_t, void *p) { return p; }
52+
void *operator new[](size_t, void *p) { return p; }
53+
__device__ void *operator new(size_t, void *p) { return p; }
54+
__device__ void *operator new[](size_t, void *p) { return p; }
55+
56+
#endif // !__NVCC__
57+

test/index/cuda/cuda_stub.snapshot.h

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
// Based off https://sourcegraph.com/github.com/llvm/llvm-project/-/blob/clang/test/SemaCUDA/Inputs/cuda.h
2+
//^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ definition [..] `<file>/cuda_stub.h`/
3+
//
4+
// Add common stuff for CUDA headers here.
5+
6+
using size_t = unsigned long long;
7+
// ^^^^^^ definition [..] size_t#
8+
9+
// Make this file work with nvcc, for testing compatibility.
10+
11+
#ifndef __NVCC__
12+
#define __constant__ __attribute__((constant))
13+
// ^^^^^^^^^^^^ definition [..] `cuda_stub.h:10:9`!
14+
#define __device__ __attribute__((device))
15+
// ^^^^^^^^^^ definition [..] `cuda_stub.h:11:9`!
16+
#define __global__ __attribute__((global))
17+
// ^^^^^^^^^^ definition [..] `cuda_stub.h:12:9`!
18+
#define __host__ __attribute__((host))
19+
// ^^^^^^^^ definition [..] `cuda_stub.h:13:9`!
20+
#define __shared__ __attribute__((shared))
21+
// ^^^^^^^^^^ definition [..] `cuda_stub.h:14:9`!
22+
#define __managed__ __attribute__((managed))
23+
// ^^^^^^^^^^^ definition [..] `cuda_stub.h:15:9`!
24+
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
25+
// ^^^^^^^^^^^^^^^^^ definition [..] `cuda_stub.h:16:9`!
26+
27+
struct dim3 {
28+
// ^^^^ definition [..] dim3#
29+
unsigned x, y, z;
30+
// ^ definition [..] dim3#x.
31+
// ^ definition [..] dim3#y.
32+
// ^ definition [..] dim3#z.
33+
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
34+
// ^^^^^^^^ reference [..] `cuda_stub.h:13:9`!
35+
// ^^^^^^^^^^ reference [..] `cuda_stub.h:11:9`!
36+
// ^^^^ definition [..] dim3#dim3(6df00707c193238d).
37+
// ^ definition local 0
38+
// ^ definition local 1
39+
// ^ definition local 2
40+
// ^ reference [..] dim3#x.
41+
// ^ reference local 0
42+
// ^ reference [..] dim3#y.
43+
// ^ reference local 1
44+
// ^ reference [..] dim3#z.
45+
// ^ reference local 2
46+
};
47+
48+
#ifdef __HIP__
49+
typedef struct hipStream *hipStream_t;
50+
typedef enum hipError {} hipError_t;
51+
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
52+
hipStream_t stream = 0);
53+
extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
54+
size_t sharedSize = 0,
55+
hipStream_t stream = 0);
56+
extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
57+
dim3 blockDim, void **args,
58+
size_t sharedMem,
59+
hipStream_t stream);
60+
#else
61+
typedef struct cudaStream *cudaStream_t;
62+
// ^^^^^^^^^^ reference [..] cudaStream#
63+
// ^^^^^^^^^^ reference [..] cudaStream#
64+
// ^^^^^^^^^^^^ definition [..] cudaStream_t#
65+
typedef enum cudaError {} cudaError_t;
66+
// ^^^^^^^^^ definition [..] cudaError#
67+
// ^^^^^^^^^^^ definition [..] cudaError_t#
68+
69+
extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
70+
// ^^^^ reference [..] dim3#
71+
// ^^^^^^^^ definition local 3
72+
// ^^^^ reference [..] dim3#
73+
// ^^^^^^^^^ definition local 4
74+
size_t sharedSize = 0,
75+
// ^^^^^^ reference [..] size_t#
76+
// ^^^^^^^^^^ definition local 5
77+
cudaStream_t stream = 0);
78+
// ^^^^^^^^^^^^ reference [..] cudaStream_t#
79+
// ^^^^^^ definition local 6
80+
extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
81+
// ^^^^ reference [..] dim3#
82+
// ^^^^^^^^ definition local 7
83+
// ^^^^ reference [..] dim3#
84+
// ^^^^^^^^^ definition local 8
85+
size_t sharedSize = 0,
86+
// ^^^^^^ reference [..] size_t#
87+
// ^^^^^^^^^^ definition local 9
88+
cudaStream_t stream = 0);
89+
// ^^^^^^^^^^^^ reference [..] cudaStream_t#
90+
// ^^^^^^ definition local 10
91+
extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
92+
// ^^^^^^^^^^^ reference [..] cudaError_t#
93+
// ^^^^ definition local 11
94+
// ^^^^ reference [..] dim3#
95+
// ^^^^^^^ definition local 12
96+
dim3 blockDim, void **args,
97+
// ^^^^ reference [..] dim3#
98+
// ^^^^^^^^ definition local 13
99+
// ^^^^ definition local 14
100+
size_t sharedMem, cudaStream_t stream);
101+
// ^^^^^^ reference [..] size_t#
102+
// ^^^^^^^^^ definition local 15
103+
// ^^^^^^^^^^^^ reference [..] cudaStream_t#
104+
// ^^^^^^ definition local 16
105+
#endif
106+
107+
// Host- and device-side placement new overloads.
108+
void *operator new(size_t, void *p) { return p; }
109+
// ^^^^^^^^ definition [..] `operator new`(ecd71fefd6822377).
110+
// ^^^^^^ reference [..] size_t#
111+
// ^ definition local 17
112+
// ^ reference local 17
113+
void *operator new[](size_t, void *p) { return p; }
114+
// ^^^^^^^^ definition [..] `operator new[]`(ecd71fefd6822377).
115+
// ^^^^^^ reference [..] size_t#
116+
// ^ definition local 18
117+
// ^ reference local 18
118+
__device__ void *operator new(size_t, void *p) { return p; }
119+
//^^^^^^^^^^ reference [..] `cuda_stub.h:11:9`!
120+
// ^^^^^^^^ definition [..] `operator new`(ecd71fefd6822377).
121+
// ^^^^^^ reference [..] size_t#
122+
// ^ definition local 19
123+
// ^ reference local 19
124+
__device__ void *operator new[](size_t, void *p) { return p; }
125+
//^^^^^^^^^^ reference [..] `cuda_stub.h:11:9`!
126+
// ^^^^^^^^ definition [..] `operator new[]`(ecd71fefd6822377).
127+
// ^^^^^^ reference [..] size_t#
128+
// ^ definition local 20
129+
// ^ reference local 20
130+
131+
#endif // !__NVCC__
132+

test/index/cuda/kernelcall.cu

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Initially based off kernel-call.cu in the Clang tests
2+
// https://sourcegraph.com/github.com/llvm/llvm-project/-/blob/clang/test/SemaCUDA/kernel-call.cu
3+
4+
#include "cuda_stub.h"
5+
6+
__global__ void g1(int x) {}
7+
8+
template <typename T> void t1(T arg) {
9+
g1<<<arg, arg>>>(1);
10+
}
11+
12+
void h1(int x) {}
13+
int h2(int x) { return 1; }
14+
15+
int main(void) {
16+
g1<<<1, 1>>>(42);
17+
g1(42); // expected-error {{call to global function 'g1' not configured}}
18+
g1<<<1>>>(42); // expected-error {{too few execution configuration arguments to kernel function call}}
19+
g1<<<1, 1, 0, 0, 0>>>(42); // expected-error {{too many execution configuration arguments to kernel function call}}
20+
21+
t1(1);
22+
23+
h1<<<1, 1>>>(42); // expected-error {{kernel call to non-global function 'h1'}}
24+
25+
int (*fp)(int) = h2;
26+
fp<<<1, 1>>>(42); // expected-error {{must have void return type}}
27+
28+
g1<<<undeclared, 1>>>(42); // expected-error {{use of undeclared identifier 'undeclared'}}
29+
}
30+
31+
// Make sure we can call static member kernels.
32+
template <typename > struct a0 {
33+
template <typename T> static __global__ void Call(T);
34+
};
35+
struct a1 {
36+
template <typename T> static __global__ void Call(T);
37+
};
38+
template <typename T> struct a2 {
39+
static __global__ void Call(T);
40+
};
41+
struct a3 {
42+
static __global__ void Call(int);
43+
static __global__ void Call(void*);
44+
};
45+
46+
struct b {
47+
template <typename c> void d0(c arg) {
48+
a0<c>::Call<<<0, 0>>>(arg);
49+
a1::Call<<<0,0>>>(arg);
50+
a2<c>::Call<<<0,0>>>(arg);
51+
a3::Call<<<0, 0>>>(arg);
52+
}
53+
void d1(void* arg) {
54+
a0<void*>::Call<<<0, 0>>>(arg);
55+
a1::Call<<<0,0>>>(arg);
56+
a2<void*>::Call<<<0,0>>>(arg);
57+
a3::Call<<<0, 0>>>(arg);
58+
}
59+
void e() { d0(1); }
60+
};

0 commit comments

Comments
 (0)