File tree Expand file tree Collapse file tree 1 file changed +7
-6
lines changed Expand file tree Collapse file tree 1 file changed +7
-6
lines changed Original file line number Diff line number Diff line change @@ -128,10 +128,8 @@ using Printf
128
128
#
129
129
130
130
function profile_externally (f)
131
- # wait for the device to become idle (and trigger a GC to avoid interference)
131
+ # wait for the device to become idle
132
132
CUDA. cuCtxSynchronize ()
133
- GC. gc (false )
134
- GC. gc (true )
135
133
136
134
start ()
137
135
try
@@ -293,11 +291,13 @@ function profile_internally(f; concurrent=true, kwargs...)
293
291
end
294
292
cfg = CUPTI. ActivityConfig (activity_kinds)
295
293
296
- # wait for the device to become idle (and trigger a GC to avoid interference)
294
+ # wait for the device to become idle
297
295
CUDA. cuCtxSynchronize ()
298
296
299
297
CUPTI. enable! (cfg) do
300
- # sink the initial profiler overhead into a synchronization call
298
+ # perform dummy operations to "warm up" the profiler, and avoid slow first calls.
299
+ # we'll skip everything up until the synchronization call during processing
300
+ CuArray ([1 ])
301
301
CUDA. cuCtxSynchronize ()
302
302
303
303
f ()
@@ -710,7 +710,8 @@ function Base.show(io::IO, results::ProfileResults)
710
710
# called a lot during compilation
711
711
" cuDeviceGetAttribute" ,
712
712
# done before every memory operation
713
- " cuPointerGetAttribute" , " cuDeviceGetMemPool" ])
713
+ " cuPointerGetAttribute" , " cuDeviceGetMemPool" ,
714
+ " cuStreamGetCaptureInfo" ])
714
715
end
715
716
end
716
717
You can’t perform that action at this time.
0 commit comments