Skip to content

Commit 8ccc4cc

Browse files
Show any compile time spent in time and timev macros (#37678)
* show any compile time spent in at-time macro
1 parent 8a9666a commit 8ccc4cc

File tree

3 files changed

+62
-18
lines changed

3 files changed

+62
-18
lines changed

base/timing.jl

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ function gc_alloc_count(diff::GC_Diff)
5555
diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
5656
end
5757

58+
# cumulative total time spent on compilation
59+
cumulative_compile_time_ns() = ccall(:jl_cumulative_compile_time_ns, UInt64, ())
5860

5961
# total time spend in garbage collection, in nanoseconds
6062
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())
@@ -102,7 +104,7 @@ function format_bytes(bytes) # also used by InteractiveUtils
102104
end
103105
end
104106

105-
function time_print(elapsedtime, bytes=0, gctime=0, allocs=0)
107+
function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0)
106108
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
107109
length(timestr) < 10 && print(" "^(10 - length(timestr)))
108110
print(timestr, " seconds")
@@ -118,15 +120,18 @@ function time_print(elapsedtime, bytes=0, gctime=0, allocs=0)
118120
if gctime > 0
119121
print(", ", Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
120122
end
123+
if compile_time > 0
124+
print(", ", Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
125+
end
121126
if bytes != 0 || allocs != 0
122127
print(")")
123128
end
124129
nothing
125130
end
126131

127-
function timev_print(elapsedtime, diff::GC_Diff)
132+
function timev_print(elapsedtime, diff::GC_Diff, compile_time)
128133
allocs = gc_alloc_count(diff)
129-
time_print(elapsedtime, diff.allocd, diff.total_time, allocs)
134+
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time)
130135
print("\nelapsed time (ns): $elapsedtime\n")
131136
padded_nonzero_print(diff.total_time, "gc time (ns)")
132137
padded_nonzero_print(diff.allocd, "bytes allocated")
@@ -144,7 +149,8 @@ end
144149
145150
A macro to execute an expression, printing the time it took to execute, the number of
146151
allocations, and the total number of bytes its execution caused to be allocated, before
147-
returning the value of the expression.
152+
returning the value of the expression. Any time spent garbage collecting (gc) or
153+
compiling is shown as a percentage.
148154
149155
See also [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
150156
[`@allocated`](@ref).
@@ -155,8 +161,13 @@ See also [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
155161
reduce noise.
156162
157163
```julia-repl
158-
julia> @time rand(10^6);
159-
0.001525 seconds (7 allocations: 7.630 MiB)
164+
julia> x = rand(10,10);
165+
166+
julia> @time x * x;
167+
0.606588 seconds (2.19 M allocations: 116.555 MiB, 3.75% gc time, 99.94% compilation time)
168+
169+
julia> @time x * x;
170+
0.000009 seconds (1 allocation: 896 bytes)
160171
161172
julia> @time begin
162173
sleep(0.3)
@@ -170,12 +181,14 @@ macro time(ex)
170181
quote
171182
while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
172183
local stats = gc_num()
184+
local compile_elapsedtime = cumulative_compile_time_ns()
173185
local elapsedtime = time_ns()
174186
local val = $(esc(ex))
175187
elapsedtime = time_ns() - elapsedtime
188+
compile_elapsedtime = cumulative_compile_time_ns() - compile_elapsedtime
176189
local diff = GC_Diff(gc_num(), stats)
177190
time_print(elapsedtime, diff.allocd, diff.total_time,
178-
gc_alloc_count(diff))
191+
gc_alloc_count(diff), compile_elapsedtime)
179192
println()
180193
val
181194
end
@@ -192,22 +205,36 @@ See also [`@time`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
192205
[`@allocated`](@ref).
193206
194207
```julia-repl
195-
julia> @timev rand(10^6);
196-
0.001006 seconds (7 allocations: 7.630 MiB)
197-
elapsed time (ns): 1005567
198-
bytes allocated: 8000256
199-
pool allocs: 6
200-
malloc() calls: 1
208+
julia> x = rand(10,10);
209+
210+
julia> @timev x * x;
211+
0.546770 seconds (2.20 M allocations: 116.632 MiB, 4.23% gc time, 99.94% compilation time)
212+
elapsed time (ns): 546769547
213+
gc time (ns): 23115606
214+
bytes allocated: 122297811
215+
pool allocs: 2197930
216+
non-pool GC allocs:1327
217+
malloc() calls: 36
218+
realloc() calls: 5
219+
GC pauses: 3
220+
221+
julia> @timev x * x;
222+
0.000010 seconds (1 allocation: 896 bytes)
223+
elapsed time (ns): 9848
224+
bytes allocated: 896
225+
pool allocs: 1
201226
```
202227
"""
203228
macro timev(ex)
204229
quote
205230
while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
206231
local stats = gc_num()
232+
local compile_elapsedtime = cumulative_compile_time_ns()
207233
local elapsedtime = time_ns()
208234
local val = $(esc(ex))
209235
elapsedtime = time_ns() - elapsedtime
210-
timev_print(elapsedtime, GC_Diff(gc_num(), stats))
236+
compile_elapsedtime = cumulative_compile_time_ns() - compile_elapsedtime
237+
timev_print(elapsedtime, GC_Diff(gc_num(), stats), compile_elapsedtime)
211238
val
212239
end
213240
end

doc/src/manual/performance-tips.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ julia> function sum_global()
7070
end;
7171
7272
julia> @time sum_global()
73-
0.017705 seconds (15.28 k allocations: 694.484 KiB)
73+
0.009639 seconds (7.36 k allocations: 300.310 KiB, 98.32% compilation time)
7474
496.84883432553846
7575
7676
julia> @time sum_global()
@@ -106,15 +106,15 @@ julia> function sum_arg(x)
106106
end;
107107
108108
julia> @time sum_arg(x)
109-
0.007701 seconds (821 allocations: 43.059 KiB)
109+
0.006202 seconds (4.18 k allocations: 217.860 KiB, 99.72% compilation time)
110110
496.84883432553846
111111
112112
julia> @time sum_arg(x)
113-
0.000006 seconds (5 allocations: 176 bytes)
113+
0.000005 seconds (1 allocation: 16 bytes)
114114
496.84883432553846
115115
```
116116

117-
The 5 allocations seen are from running the `@time` macro itself in global scope. If we instead run
117+
The 1 allocation seen is from running the `@time` macro itself in global scope. If we instead run
118118
the timing in a function, we can see that indeed no allocations are performed:
119119

120120
```jldoctest sumarg; filter = r"[0-9\.]+ seconds"

src/jitlayers.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
6767
}
6868
}
6969

70+
static uint64_t cumulative_compile_time = 0;
71+
72+
extern "C" JL_DLLEXPORT
73+
uint64_t jl_cumulative_compile_time_ns() {
74+
return cumulative_compile_time;
75+
}
76+
7077
// this generates llvm code for the lambda info
7178
// and adds the result to the jitlayers
7279
// (and the shadow module),
@@ -210,6 +217,7 @@ extern "C" JL_DLLEXPORT
210217
void jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
211218
{
212219
JL_LOCK(&codegen_lock);
220+
uint64_t compiler_start_time = jl_hrtime();
213221
jl_codegen_params_t params;
214222
jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
215223
if (pparams == NULL)
@@ -228,6 +236,7 @@ void jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declr
228236
if (llvmmod == NULL)
229237
jl_add_to_ee(std::unique_ptr<Module>(into));
230238
}
239+
cumulative_compile_time += (jl_hrtime() - compiler_start_time);
231240
JL_UNLOCK(&codegen_lock);
232241
}
233242

@@ -251,8 +260,10 @@ void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
251260
if (!jl_is_concrete_type(declrt) || jl_is_kind(declrt))
252261
jl_error("@ccallable: return type must be concrete and correspond to a C type");
253262
JL_LOCK(&codegen_lock);
263+
uint64_t compiler_start_time = jl_hrtime();
254264
if (!jl_type_mappable_to_c(declrt))
255265
jl_error("@ccallable: return type doesn't correspond to a C type");
266+
cumulative_compile_time += (jl_hrtime() - compiler_start_time);
256267
JL_UNLOCK(&codegen_lock);
257268

258269
// validate method signature
@@ -281,6 +292,7 @@ extern "C"
281292
jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
282293
{
283294
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
295+
uint64_t compiler_start_time = jl_hrtime();
284296
// if we don't have any decls already, try to generate it now
285297
jl_code_info_t *src = NULL;
286298
JL_GC_PUSH1(&src);
@@ -317,6 +329,7 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
317329
else {
318330
codeinst = NULL;
319331
}
332+
cumulative_compile_time += (jl_hrtime() - compiler_start_time);
320333
JL_UNLOCK(&codegen_lock);
321334
JL_GC_POP();
322335
return codeinst;
@@ -329,6 +342,7 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
329342
return;
330343
}
331344
JL_LOCK(&codegen_lock);
345+
uint64_t compiler_start_time = jl_hrtime();
332346
if (unspec->invoke == NULL) {
333347
jl_code_info_t *src = NULL;
334348
JL_GC_PUSH1(&src);
@@ -355,6 +369,7 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
355369
}
356370
JL_GC_POP();
357371
}
372+
cumulative_compile_time += (jl_hrtime() - compiler_start_time);
358373
JL_UNLOCK(&codegen_lock); // Might GC
359374
}
360375

@@ -376,6 +391,7 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
376391
// (using sentinel value `1` instead)
377392
// so create an exception here so we can print pretty our lies
378393
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
394+
uint64_t compiler_start_time = jl_hrtime();
379395
specfptr = (uintptr_t)codeinst->specptr.fptr;
380396
if (specfptr == 0) {
381397
jl_code_info_t *src = jl_type_infer(mi, world, 0);
@@ -399,6 +415,7 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
399415
}
400416
JL_GC_POP();
401417
}
418+
cumulative_compile_time += (jl_hrtime() - compiler_start_time);
402419
JL_UNLOCK(&codegen_lock);
403420
}
404421
if (specfptr != 0)

0 commit comments

Comments
 (0)