Skip to content

Commit 9c24777

Browse files
authored
Work around a CUPTI bug in CUDA 12.4 Update 1. (#2330)
1 parent a011e73 commit 9c24777

File tree

2 files changed

+13
-3
lines changed

2 files changed

+13
-3
lines changed

src/compiler/reflection.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,16 @@ function code_sass(io::IO, job::CompilerJob; raw::Bool=false)
5151
return
5252
end
5353

54+
# NVIDIA bug #4604961: CUPTI in CUDA 12.4 Update 1 does not capture profiled events
55+
# unless the activity API is first activated
56+
if runtime_version() == v"12.4"
57+
cfg = CUPTI.ActivityConfig([CUPTI.CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL,
58+
CUPTI.CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API])
59+
CUPTI.enable!(cfg) do
60+
# do nothing
61+
end
62+
end
63+
5464
cfg = CUPTI.CallbackConfig([CUPTI.CUPTI_CB_DOMAIN_RESOURCE]) do domain, id, data
5565
# only process relevant callbacks
5666
id == CUPTI.CUPTI_CBID_RESOURCE_MODULE_LOADED || return

test/core/execution.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ end
7777
CUDA.code_warntype(devnull, dummy, Tuple{})
7878
CUDA.code_llvm(devnull, dummy, Tuple{})
7979
CUDA.code_ptx(devnull, dummy, Tuple{})
80-
if can_use_cupti() && CUDA.runtime_version() != v"12.4"
80+
if can_use_cupti()
8181
# functions defined in Julia
8282
sass = sprint(io->CUDA.code_sass(io, dummy, Tuple{}))
8383
@test occursin(".text._Z5dummy", sass)
@@ -96,7 +96,7 @@ end
9696
@device_code_warntype io=devnull @cuda dummy()
9797
@device_code_llvm io=devnull @cuda dummy()
9898
@device_code_ptx io=devnull @cuda dummy()
99-
if can_use_cupti() && CUDA.runtime_version() != v"12.4"
99+
if can_use_cupti()
100100
# functions defined in Julia
101101
sass = sprint(io->@device_code_sass io=io @cuda dummy())
102102
@test occursin(".text._Z5dummy", sass)
@@ -120,7 +120,7 @@ end
120120
@test occursin("dummy", sprint(io->(@device_code_llvm io=io optimize=false @cuda dummy())))
121121
@test occursin("dummy", sprint(io->(@device_code_llvm io=io @cuda dummy())))
122122
@test occursin("dummy", sprint(io->(@device_code_ptx io=io @cuda dummy())))
123-
if can_use_cupti() && CUDA.runtime_version() != v"12.4"
123+
if can_use_cupti()
124124
@test occursin("dummy", sprint(io->(@device_code_sass io=io @cuda dummy())))
125125
end
126126

0 commit comments

Comments
 (0)