Skip to content

Commit 762c657

Browse files
IanButterworthLilithHafner
authored andcommitted
Profile: Thread and task-specific profiling (JuliaLang#41742)
- Adds thread and task ids to profile samples - Implements thread and task selection for Profile.print() - Implements thread and task groupby options for Profile.print() - Add include_meta to Profile.fetch() which defaults to false to ensure backwards compat with external profiling tooling - store time of each profile sample (cycleclock) - add sleep_check_state to metadata and show % utilization
1 parent 29a5e80 commit 762c657

File tree

9 files changed

+361
-82
lines changed

9 files changed

+361
-82
lines changed

NEWS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ Standard library changes
6767
#### Printf
6868
* Now uses `textwidth` for formatting `%s` and `%c` widths ([#41085]).
6969

70+
#### Profile
71+
* Profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows
72+
grouping by thread, task, or nested thread/task, task/thread, and `threads` and `tasks` kwargs to allow filtering.
73+
Further, percent utilization is now reported as a total or per-thread, based on whether the thread is idle or not at
74+
each sample. `Profile.fetch()` by default strips out the new metadata to ensure backwards compatibility with external
75+
profiling data consumers, but can be included with the `include_meta` kwarg. ([#41742])
76+
7077
#### Random
7178

7279
#### REPL

contrib/generate_precompile.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,11 @@ Profile = get(Base.loaded_modules,
222222
nothing)
223223
if Profile !== nothing
224224
hardcoded_precompile_statements *= """
225-
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol})
225+
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
226+
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
227+
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
228+
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UnitRange{UInt}})
229+
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Vector{Int}, Vector{UInt}})
226230
"""
227231
end
228232

src/signal-handling.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64
3737

3838
JL_DLLEXPORT int jl_profile_is_buffer_full(void)
3939
{
40-
// the latter `+ 1` is for the block terminator `0`.
41-
return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 1 > bt_size_max;
40+
// the `+ 5` is for the block terminator `0` plus 4 metadata entries
41+
return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 5 > bt_size_max;
4242
}
4343

4444
static uint64_t jl_last_sigint_trigger = 0;

src/signals-mach.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,19 @@ void *mach_profile_listener(void *arg)
588588
#else
589589
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
590590
#endif
591+
jl_ptls_t ptls = jl_all_tls_states[i];
592+
593+
// store threadid but add 1 as 0 is preserved to indicate end of block
594+
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
595+
596+
// store task id
597+
bt_data_prof[bt_size_cur++].uintptr = ptls->current_task;
598+
599+
// store cpu cycle clock
600+
bt_data_prof[bt_size_cur++].uintptr = cycleclock();
601+
602+
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
603+
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;
591604

592605
// Mark the end of this block with 0
593606
bt_data_prof[bt_size_cur++].uintptr = 0;

src/signals-unix.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,20 @@ static void *signal_listener(void *arg)
786786
}
787787
jl_set_safe_restore(old_buf);
788788

789+
jl_ptls_t ptls = jl_all_tls_states[i];
790+
791+
// store threadid but add 1 as 0 is preserved to indicate end of block
792+
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
793+
794+
// store task id
795+
bt_data_prof[bt_size_cur++].uintptr = ptls->current_task;
796+
797+
// store cpu cycle clock
798+
bt_data_prof[bt_size_cur++].uintptr = cycleclock();
799+
800+
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
801+
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;
802+
789803
// Mark the end of this block with 0
790804
bt_data_prof[bt_size_cur++].uintptr = 0;
791805
}

src/signals-win.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,21 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
360360
// Get backtrace data
361361
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
362362
bt_size_max - bt_size_cur - 1, &ctxThread, NULL);
363+
364+
jl_ptls_t ptls = jl_all_tls_states[0]; // given only profiling hMainThread
365+
366+
// store threadid but add 1 as 0 is preserved to indicate end of block
367+
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
368+
369+
// store task id
370+
bt_data_prof[bt_size_cur++].uintptr = ptls->current_task;
371+
372+
// store cpu cycle clock
373+
bt_data_prof[bt_size_cur++].uintptr = cycleclock();
374+
375+
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
376+
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;
377+
363378
// Mark the end of this block with 0
364379
bt_data_prof[bt_size_cur++].uintptr = 0;
365380
}

stdlib/Profile/Project.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
55
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
66

77
[extras]
8-
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
8+
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
99
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
10+
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1011

1112
[targets]
12-
test = ["Test", "Serialization"]
13+
test = ["Logging", "Serialization", "Test"]

0 commit comments

Comments
 (0)