Skip to content

Commit f8a7c55

Browse files
committed
add probes for GC phases
1 parent 86ad7c4 commit f8a7c55

File tree

8 files changed

+275
-1
lines changed

8 files changed

+275
-1
lines changed

contrib/bpftrace/gc_all.bt

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env bpftrace
2+
3+
BEGIN
4+
{
5+
printf("Tracing Julia GC Times... Hit Ctrl-C to end.\n");
6+
}
7+
8+
usdt:usr/lib/libjulia-internal.so:julia:gc__begin
9+
{
10+
$now = nsecs;
11+
@time[pid] = $now;
12+
@start[pid] = $now;
13+
}
14+
15+
usdt:usr/lib/libjulia-internal.so:julia:gc__stop_the_world
16+
/@start[pid]/
17+
{
18+
$now = nsecs;
19+
@stop_the_world[pid] = hist(($now - @time[pid]) / 1000);
20+
@time[pid] = $now;
21+
}
22+
23+
usdt:usr/lib/libjulia-internal.so:julia:gc__end
24+
/@start[pid]/
25+
{
26+
$now = nsecs;
27+
@gc_total[pid] = hist(($now - @start[pid]) / 1000);
28+
@gc_phase[pid] = hist(($now - @time[pid]) / 1000);
29+
@time[pid] = $now;
30+
delete(@start[pid]);
31+
}
32+
33+
usdt:usr/lib/libjulia-internal.so:julia:gc__finalizer
34+
/@time[pid]/
35+
{
36+
@finalizer[pid] = hist((nsecs - @time[pid]) / 1000);
37+
delete(@time[pid]);
38+
}
39+
40+
END
41+
{
42+
clear(@start);
43+
clear(@time);
44+
}

contrib/bpftrace/gc_simple.bt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env bpftrace
2+
3+
BEGIN
4+
{
5+
printf("Tracing Julia GC Times... Hit Ctrl-C to end.\n");
6+
}
7+
8+
usdt:usr/lib/libjulia-internal.so:julia:gc__begin
9+
{
10+
@start[pid] = nsecs;
11+
}
12+
13+
usdt:usr/lib/libjulia-internal.so:julia:gc__end
14+
/@start[pid]/
15+
{
16+
@usecs[pid] = hist((nsecs - @start[pid]) / 1000);
17+
delete(@start[pid]);
18+
}
19+
20+
END
21+
{
22+
clear(@start);
23+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env bpftrace
2+
3+
BEGIN
4+
{
5+
printf("Tracing Julia GC Stop-The-World Latency... Hit Ctrl-C to end.\n");
6+
}
7+
8+
usdt:usr/lib/libjulia-internal.so:julia:gc__begin
9+
{
10+
@start[pid] = nsecs;
11+
}
12+
13+
usdt:usr/lib/libjulia-internal.so:julia:gc__stop_the_world
14+
/@start[pid]/
15+
{
16+
@usecs[pid] = hist((nsecs - @start[pid]) / 1000);
17+
delete(@start[pid]);
18+
}
19+
20+
END
21+
{
22+
clear(@start);
23+
}

doc/make.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ DevDocs = [
155155
"devdocs/debuggingtips.md",
156156
"devdocs/valgrind.md",
157157
"devdocs/sanitizers.md",
158+
"devdocs/probes.md"
158159
]
159160
]
160161

doc/src/devdocs/probes.md

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# Instrumenting Julia with DTrace, and bpftrace
2+
3+
DTrace and bpftrace are tools that enable lightweight instrumentation of processes. You can turn the instrumentation on and off while the process is running, and with instrumentation is off the overhead is minimal.
4+
5+
!!! compat "Julia 1.8"
6+
Support for probes was added in Julia 1.8
7+
8+
## Enabling support
9+
10+
On Linux install the `systemtap` package that has a version of `dtrace`.
11+
12+
```
13+
WITH_DTRACE=1
14+
```
15+
16+
### Verifying
17+
18+
```
19+
> readelf -n usr/lib/libjulia-internal.so.1
20+
21+
Displaying notes found in: .note.gnu.build-id
22+
Owner Data size Description
23+
GNU 0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring)
24+
Build ID: 57161002f35548772a87418d2385c284ceb3ead8
25+
26+
Displaying notes found in: .note.stapsdt
27+
Owner Data size Description
28+
stapsdt 0x00000029 NT_STAPSDT (SystemTap probe descriptors)
29+
Provider: julia
30+
Name: gc__begin
31+
Location: 0x000000000013213e, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cac
32+
Arguments:
33+
stapsdt 0x00000032 NT_STAPSDT (SystemTap probe descriptors)
34+
Provider: julia
35+
Name: gc__stop_the_world
36+
Location: 0x0000000000132144, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cae
37+
Arguments:
38+
stapsdt 0x00000027 NT_STAPSDT (SystemTap probe descriptors)
39+
Provider: julia
40+
Name: gc__end
41+
Location: 0x000000000013214a, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb0
42+
Arguments:
43+
stapsdt 0x0000002d NT_STAPSDT (SystemTap probe descriptors)
44+
Provider: julia
45+
Name: gc__finalizer
46+
Location: 0x0000000000132150, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb2
47+
Arguments:
48+
```
49+
50+
## Adding probes in libjulia
51+
52+
Probes are declared in dtraces format in the file `src/uprobes.d`. The generated
53+
header file is included in `src/julia_internal.h` and if you add probes you should
54+
provide a noop implementation there.
55+
56+
The header will contain a semaphore `*_ENABLED` and the actual call to the probe.
57+
If the probe arguments are expensive to compute you should first check if the
58+
probe is enable and then compute the arguments and call the probe.
59+
60+
```c
61+
if (JL_PROBE_GC_STOP_THE_WORLD_ENABLED())
62+
JL_PROBE_GC_STOP_THE_WORLD();
63+
```
64+
65+
If your probe has no arguments it is preferred to not include the semaphore check.
66+
67+
68+
69+
## Available probes
70+
71+
### GC probes
72+
73+
1. `julia:gc__begin`: GC begins running on one thread and triggers stop-the-world.
74+
2. `julia:gc__stop_the_world`: All threads have reached a safepoint and GC runs.
75+
3. `julia:gc__mark__begin`: Beginning the mark phase
76+
4. `julia:gc__mark_end(scanned_bytes, perm_scanned)`: Mark phase ended
77+
5. `julia:gc__sweep_begin(full)`: Starting sweep
78+
6. `julia:gc__sweep_end()`: Sweep phase finished
79+
7. `julia:gc__end`: GC is finished, other threads continue work
80+
8. `julia:gc__finalizer`: Initial GC thread has finished running finalizers
81+
82+
#### GC stop-the-world latency
83+
84+
An example `bpftrace` script is given in `contrib/gc_stop_the_world_latency.bt`
85+
and it creates a histogram of the latency for all threads to reach a safepoint.
86+
87+
Running this Julia code, with `julia -t 2`
88+
89+
```
90+
using Base.Threads
91+
92+
fib(x) = x <= 1 ? 1 : fib(x-1) + fib(x-2)
93+
94+
beaver = @spawn begin
95+
while true
96+
fib(30)
97+
ccall(:jl_gcsafepoint, Cvoid, ())
98+
end
99+
end
100+
101+
allocator = @spawn begin
102+
while true
103+
zeros(1024)
104+
end
105+
end
106+
107+
wait(allocator)
108+
```
109+
110+
and in a second terminal
111+
112+
```
113+
> sudo contrib/bpftrace/gc_stop_the_world_latency.bt
114+
Attaching 4 probes...
115+
Tracing Julia GC Stop-The-World Latency... Hit Ctrl-C to end.
116+
^C
117+
118+
119+
@usecs[1743412]:
120+
[4, 8) 971 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
121+
[8, 16) 837 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
122+
[16, 32) 129 |@@@@@@ |
123+
[32, 64) 10 | |
124+
[64, 128) 1 | |
125+
```
126+
127+
We can see the latency distribution of the stop-the-world phase in the executed Julia process.
128+
129+
## Notes on using `bpftrace`
130+
131+
An example probe in the bpftrace format looks like:
132+
133+
```
134+
usdt:usr/lib/libjulia-internal.so:julia:gc__begin
135+
{
136+
@start[pid] = nsecs;
137+
}
138+
```
139+
140+
The probe declaration takes the kind `usdt`, then either the
141+
path to the library or the PID, the provide name `julia`
142+
and the probe name `gc__begin`. Note that I am using a
143+
relative path to the `libjulia-internal.so`, but this might
144+
need to be an absolute path on a production system.

src/gc.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3004,6 +3004,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
30043004

30053005
uint64_t t0 = jl_hrtime();
30063006
int64_t last_perm_scanned_bytes = perm_scanned_bytes;
3007+
JL_PROBE_GC_MARK_BEGIN();
30073008

30083009
// 1. fix GC bits of objects in the remset.
30093010
for (int t_i = 0; t_i < jl_n_threads; t_i++)
@@ -3030,6 +3031,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
30303031
gc_mark_loop(ptls, sp);
30313032
gc_mark_sp_init(gc_cache, &sp);
30323033
gc_num.since_sweep += gc_num.allocd;
3034+
JL_PROBE_GC_MARK_END(scanned_bytes, perm_scanned_bytes);
30333035
gc_settime_premark_end();
30343036
gc_time_mark_pause(t0, scanned_bytes, perm_scanned_bytes);
30353037
int64_t actual_allocd = gc_num.since_sweep;
@@ -3135,6 +3137,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
31353137
perm_scanned_bytes = 0;
31363138
scanned_bytes = 0;
31373139
// 5. start sweeping
3140+
JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
31383141
sweep_weak_refs();
31393142
sweep_stack_pools();
31403143
gc_sweep_foreign_objs();
@@ -3144,6 +3147,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
31443147
gc_sweep_pool(sweep_full);
31453148
if (sweep_full)
31463149
gc_sweep_perm_alloc();
3150+
JL_PROBE_GC_SWEEP_END();
31473151
// sweeping is over
31483152
// 6. if it is a quick sweep, put back the remembered objects in queued state
31493153
// so that we don't trigger the barrier again on them.
@@ -3201,6 +3205,8 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
32013205

32023206
JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
32033207
{
3208+
JL_PROBE_GC_BEGIN(collection);
3209+
32043210
jl_task_t *ct = jl_current_task;
32053211
jl_ptls_t ptls = ct->ptls;
32063212
if (jl_gc_disable_counter) {
@@ -3230,6 +3236,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
32303236
// TODO (concurrently queue objects)
32313237
// no-op for non-threading
32323238
jl_gc_wait_for_the_world();
3239+
JL_PROBE_GC_STOP_THE_WORLD();
3240+
32333241
gc_invoke_callbacks(jl_gc_cb_pre_gc_t,
32343242
gc_cblist_pre_gc, (collection));
32353243

@@ -3247,6 +3255,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
32473255
// no-op for non-threading
32483256
jl_safepoint_end_gc();
32493257
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
3258+
JL_PROBE_GC_END();
32503259

32513260
// Only disable finalizers on current thread
32523261
// Doing this on all threads is racy (it's impossible to check
@@ -3257,6 +3266,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
32573266
run_finalizers(ct);
32583267
ptls->in_finalizer = was_in_finalizer;
32593268
}
3269+
JL_PROBE_GC_FINALIZER();
3270+
32603271
gc_invoke_callbacks(jl_gc_cb_post_gc_t,
32613272
gc_cblist_post_gc, (collection));
32623273
#ifdef _OS_WINDOWS_

src/julia_internal.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1457,10 +1457,30 @@ uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
14571457
//
14581458
// If the arguments to `JL_PROBE_{PROBE}` are expensive to compute, the call to
14591459
// these functions must be guarded by a JL_PROBE_{PROBE}_ENABLED() check, to
1460-
// minimize performance impact when probing is off.
1460+
// minimize performance impact when probing is off. As an example:
1461+
//
1462+
// if (JL_PROBE_GC_STOP_THE_WORLD_ENABLED())
1463+
// JL_PROBE_GC_STOP_THE_WORLD();
14611464

14621465
#else
14631466
// define a dummy version of the probe functions
1467+
#define JL_PROBE_GC_BEGIN(type) do ; while (0)
1468+
#define JL_PROBE_GC_STOP_THE_WORLD() do ; while (0)
1469+
#define JL_PROBE_GC_MARK_BEGIN() do ; while (0)
1470+
#define JL_PROBE_GC_MARK_END() do ; while (0)
1471+
#define JL_PROBE_GC_SWEEP_BEGIN() do ; while (0)
1472+
#define JL_PROBE_GC_SWEEP_END() do ; while (0)
1473+
#define JL_PROBE_GC_END() do ; while (0)
1474+
#define JL_PROBE_GC_FINALIZER() do ; while (0)
1475+
1476+
#define JL_PROBE_GC_BEGIN_ENABLED() (0)
1477+
#define JL_PROBE_GC_STOP_THE_WORLD_ENABLED() (0)
1478+
#define JL_PROBE_GC_MARK_BEGIN_ENABLED() (0)
1479+
#define JL_PROBE_GC_MARK_END_ENABLED() (0)
1480+
#define JL_PROBE_GC_SWEEP_BEGIN_ENABLED() (0)
1481+
#define JL_PROBE_GC_SWEEP_END_ENABLED() (0)
1482+
#define JL_PROBE_GC_END_ENABLED() (0)
1483+
#define JL_PROBE_GC_FINALIZER_ENABLED() (0)
14641484
#endif
14651485

14661486
#endif

src/uprobes.d

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
/* Julia DTrace provider */
22

33
provider julia {
4+
probe gc__begin()
5+
probe gc__stop_the_world()
6+
probe gc__mark__begin()
7+
probe gc__mark__end(int64 scanned_bytes, int64 perm_scanned_bytes)
8+
probe gc__sweep__begin(int full)
9+
probe gc__sweep__end()
10+
probe gc__end()
11+
probe gc__finalizer()
412
}
513

614
#pragma D attributes Evolving/Evolving/Common provider julia provider

0 commit comments

Comments
 (0)