@@ -400,7 +400,7 @@ function render_traces(host_trace, device_trace, nvtx_trace;
400
400
trace_last_sync = findlast (host_trace. name .== " cuCtxSynchronize" )
401
401
trace_first_sync == trace_last_sync && error (" Could not find the end of the profiling trace." )
402
402
# # truncate the trace
403
- if ! raw
403
+ if ! raw || ! trace
404
404
trace_begin = host_trace. stop[trace_first_sync]
405
405
trace_end = host_trace. stop[trace_last_sync]
406
406
@@ -409,10 +409,12 @@ function render_traces(host_trace, device_trace, nvtx_trace;
409
409
for df in (host_trace, device_trace)
410
410
filter! (row -> trace_first_call. id <= row. id <= trace_last_call. id, df)
411
411
end
412
- body_hlines = Int[]
412
+ trace_divisions = Int[]
413
413
else
414
- # in raw mode, we display the entire trace, but highlight the relevant part
415
- body_hlines = [trace_first_sync, trace_last_sync- 1 ]
414
+ # in raw mode, we display the entire trace, but highlight the relevant part.
415
+ # note that we only do so when tracing, because otherwise the summary would
416
+ # be skewed by the expensive initial API call used to sink the profiler overhead.
417
+ trace_divisions = [trace_first_sync, trace_last_sync- 1 ]
416
418
417
419
# inclusive bounds
418
420
trace_begin = host_trace. start[begin ]
@@ -557,8 +559,8 @@ function render_traces(host_trace, device_trace, nvtx_trace;
557
559
" cuGetProcAddress" ,
558
560
# called a lot during compilation
559
561
" cuDeviceGetAttribute" ,
560
- # pointer attribute query, done before every memory operation
561
- " cuPointerGetAttribute" ])
562
+ # done before every memory operation
563
+ " cuPointerGetAttribute" , " cuDeviceGetMemPool " ])
562
564
end
563
565
end
564
566
@@ -611,7 +613,8 @@ function render_traces(host_trace, device_trace, nvtx_trace;
611
613
end
612
614
end
613
615
highlighters = time_highlighters (df)
614
- pretty_table (io, df; header, alignment, formatters, highlighters, crop, body_hlines)
616
+ pretty_table (io, df; header, alignment, formatters, highlighters, crop,
617
+ body_hlines= trace_divisions)
615
618
else
616
619
df = summarize_trace (df)
617
620
@@ -693,7 +696,8 @@ function render_traces(host_trace, device_trace, nvtx_trace;
693
696
end
694
697
end
695
698
highlighters = time_highlighters (df)
696
- pretty_table (io, df; header, alignment, formatters, highlighters, crop, body_hlines)
699
+ pretty_table (io, df; header, alignment, formatters, highlighters, crop,
700
+ body_hlines= trace_divisions)
697
701
else
698
702
df = summarize_trace (device_trace)
699
703
0 commit comments