Skip to content

Commit a08b759

Browse files
committed
Minor improvements to integrated profiler.
Hide the memory pool getter queries, and don't include the initial API call in summaries even when profiling with raw=true.
1 parent 5cc0ff2 commit a08b759

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

src/profile.jl

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ function render_traces(host_trace, device_trace, nvtx_trace;
400400
trace_last_sync = findlast(host_trace.name .== "cuCtxSynchronize")
401401
trace_first_sync == trace_last_sync && error("Could not find the end of the profiling trace.")
402402
## truncate the trace
403-
if !raw
403+
if !raw || !trace
404404
trace_begin = host_trace.stop[trace_first_sync]
405405
trace_end = host_trace.stop[trace_last_sync]
406406

@@ -409,10 +409,12 @@ function render_traces(host_trace, device_trace, nvtx_trace;
409409
for df in (host_trace, device_trace)
410410
filter!(row -> trace_first_call.id <= row.id <= trace_last_call.id, df)
411411
end
412-
body_hlines = Int[]
412+
trace_divisions = Int[]
413413
else
414-
# in raw mode, we display the entire trace, but highlight the relevant part
415-
body_hlines = [trace_first_sync, trace_last_sync-1]
414+
# in raw mode, we display the entire trace, but highlight the relevant part.
415+
# note that we only do so when tracing, because otherwise the summary would
416+
# be skewed by the expensive initial API call used to sink the profiler overhead.
417+
trace_divisions = [trace_first_sync, trace_last_sync-1]
416418

417419
# inclusive bounds
418420
trace_begin = host_trace.start[begin]
@@ -557,8 +559,8 @@ function render_traces(host_trace, device_trace, nvtx_trace;
557559
"cuGetProcAddress",
558560
# called a lot during compilation
559561
"cuDeviceGetAttribute",
560-
# pointer attribute query, done before every memory operation
561-
"cuPointerGetAttribute"])
562+
# done before every memory operation
563+
"cuPointerGetAttribute", "cuDeviceGetMemPool"])
562564
end
563565
end
564566

@@ -611,7 +613,8 @@ function render_traces(host_trace, device_trace, nvtx_trace;
611613
end
612614
end
613615
highlighters = time_highlighters(df)
614-
pretty_table(io, df; header, alignment, formatters, highlighters, crop, body_hlines)
616+
pretty_table(io, df; header, alignment, formatters, highlighters, crop,
617+
body_hlines=trace_divisions)
615618
else
616619
df = summarize_trace(df)
617620

@@ -693,7 +696,8 @@ function render_traces(host_trace, device_trace, nvtx_trace;
693696
end
694697
end
695698
highlighters = time_highlighters(df)
696-
pretty_table(io, df; header, alignment, formatters, highlighters, crop, body_hlines)
699+
pretty_table(io, df; header, alignment, formatters, highlighters, crop,
700+
body_hlines=trace_divisions)
697701
else
698702
df = summarize_trace(device_trace)
699703

0 commit comments

Comments
 (0)