Skip to content

Commit 55154b3

Browse files
IanButterworthKristofferC
authored andcommitted
Measure compile time only when using time macros pt.2: handling when code under test throws and make compile timing thread-local (#38915)
* ensure compile timing disables in time & timev macros * make comp measurement switching threadsafe (cherry picked from commit 158e22f)
1 parent 327f298 commit 55154b3

File tree

6 files changed

+83
-64
lines changed

6 files changed

+83
-64
lines changed

base/timing.jl

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -115,41 +115,44 @@ function format_bytes(bytes) # also used by InteractiveUtils
115115
end
116116
end
117117

118-
function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0)
118+
function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, newline=false)
119119
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
120-
length(timestr) < 10 && print(" "^(10 - length(timestr)))
121-
print(timestr, " seconds")
122-
parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
123-
parens && print(" (")
124-
if bytes != 0 || allocs != 0
125-
allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
126-
if ma == 1
127-
print(Int(allocs), _cnt_units[ma], allocs==1 ? " allocation: " : " allocations: ")
128-
else
129-
print(Ryu.writefixed(Float64(allocs), 2), _cnt_units[ma], " allocations: ")
130-
end
131-
print(format_bytes(bytes))
132-
end
133-
if gctime > 0
120+
str = sprint() do io
121+
print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
122+
print(io, timestr, " seconds")
123+
parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
124+
parens && print(io, " (")
134125
if bytes != 0 || allocs != 0
135-
print(", ")
126+
allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
127+
if ma == 1
128+
print(io, Int(allocs), _cnt_units[ma], allocs==1 ? " allocation: " : " allocations: ")
129+
else
130+
print(io, Ryu.writefixed(Float64(allocs), 2), _cnt_units[ma], " allocations: ")
131+
end
132+
print(io, format_bytes(bytes))
136133
end
137-
print(Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
138-
end
139-
if compile_time > 0
140-
if bytes != 0 || allocs != 0 || gctime > 0
141-
print(", ")
134+
if gctime > 0
135+
if bytes != 0 || allocs != 0
136+
print(io, ", ")
137+
end
138+
print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
142139
end
143-
print(Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
140+
if compile_time > 0
141+
if bytes != 0 || allocs != 0 || gctime > 0
142+
print(io, ", ")
143+
end
144+
print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
145+
end
146+
parens && print(io, ")")
144147
end
145-
parens && print(")")
148+
newline ? println(str) : print(str)
146149
nothing
147150
end
148151

149152
function timev_print(elapsedtime, diff::GC_Diff, compile_time)
150153
allocs = gc_alloc_count(diff)
151-
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time)
152-
print("\nelapsed time (ns): $elapsedtime\n")
154+
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, true)
155+
print("elapsed time (ns): $elapsedtime\n")
153156
padded_nonzero_print(diff.total_time, "gc time (ns)")
154157
padded_nonzero_print(diff.allocd, "bytes allocated")
155158
padded_nonzero_print(diff.poolalloc, "pool allocs")
@@ -200,13 +203,14 @@ macro time(ex)
200203
local stats = gc_num()
201204
local compile_elapsedtime = cumulative_compile_time_ns_before()
202205
local elapsedtime = time_ns()
203-
local val = $(esc(ex))
204-
elapsedtime = time_ns() - elapsedtime
205-
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
206+
local val = try
207+
$(esc(ex))
208+
finally
209+
elapsedtime = time_ns() - elapsedtime
210+
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
211+
end
206212
local diff = GC_Diff(gc_num(), stats)
207-
time_print(elapsedtime, diff.allocd, diff.total_time,
208-
gc_alloc_count(diff), compile_elapsedtime)
209-
println()
213+
time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), compile_elapsedtime, true)
210214
val
211215
end
212216
end
@@ -248,10 +252,14 @@ macro timev(ex)
248252
local stats = gc_num()
249253
local compile_elapsedtime = cumulative_compile_time_ns_before()
250254
local elapsedtime = time_ns()
251-
local val = $(esc(ex))
252-
elapsedtime = time_ns() - elapsedtime
253-
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
254-
timev_print(elapsedtime, GC_Diff(gc_num(), stats), compile_elapsedtime)
255+
local val = try
256+
$(esc(ex))
257+
finally
258+
elapsedtime = time_ns() - elapsedtime
259+
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
260+
end
261+
local diff = GC_Diff(gc_num(), stats)
262+
timev_print(elapsedtime, diff, compile_elapsedtime)
255263
val
256264
end
257265
end

src/aotcompile.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
293293
JL_GC_PUSH1(&src);
294294
JL_LOCK(&codegen_lock);
295295
uint64_t compiler_start_time = 0;
296-
if (jl_measure_compile_time)
296+
int tid = jl_threadid();
297+
if (jl_measure_compile_time[tid])
297298
compiler_start_time = jl_hrtime();
298299

299300
CompilationPolicy policy = (CompilationPolicy) _policy;
@@ -417,8 +418,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
417418
}
418419

419420
data->M = std::move(clone);
420-
if (jl_measure_compile_time)
421-
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
421+
if (jl_measure_compile_time[tid])
422+
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
422423
JL_UNLOCK(&codegen_lock); // Might GC
423424
return (void*)data;
424425
}
@@ -896,7 +897,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
896897
jl_llvm_functions_t decls;
897898
JL_LOCK(&codegen_lock);
898899
uint64_t compiler_start_time = 0;
899-
if (jl_measure_compile_time)
900+
int tid = jl_threadid();
901+
if (jl_measure_compile_time[tid])
900902
compiler_start_time = jl_hrtime();
901903
std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);
902904

@@ -921,8 +923,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
921923
m.release(); // the return object `llvmf` will be the owning pointer
922924
}
923925
JL_GC_POP();
924-
if (jl_measure_compile_time)
925-
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
926+
if (jl_measure_compile_time[tid])
927+
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
926928
JL_UNLOCK(&codegen_lock); // Might GC
927929
if (F)
928930
return F;

src/gf.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3139,21 +3139,20 @@ int jl_has_concrete_subtype(jl_value_t *typ)
31393139
//static jl_mutex_t typeinf_lock;
31403140
#define typeinf_lock codegen_lock
31413141

3142-
uint8_t jl_measure_compile_time = 0;
3143-
uint64_t jl_cumulative_compile_time = 0;
31443142
static uint64_t inference_start_time = 0;
31453143

31463144
JL_DLLEXPORT void jl_typeinf_begin(void)
31473145
{
31483146
JL_LOCK(&typeinf_lock);
3149-
if (jl_measure_compile_time)
3147+
if (jl_measure_compile_time[jl_threadid()])
31503148
inference_start_time = jl_hrtime();
31513149
}
31523150

31533151
JL_DLLEXPORT void jl_typeinf_end(void)
31543152
{
3155-
if (typeinf_lock.count == 1 && jl_measure_compile_time)
3156-
jl_cumulative_compile_time += (jl_hrtime() - inference_start_time);
3153+
int tid = jl_threadid();
3154+
if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
3155+
jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
31573156
JL_UNLOCK(&typeinf_lock);
31583157
}
31593158

src/jitlayers.cpp

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,16 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
7878
extern "C" JL_DLLEXPORT
7979
uint64_t jl_cumulative_compile_time_ns_before()
8080
{
81-
jl_measure_compile_time = 1;
82-
return jl_cumulative_compile_time;
81+
int tid = jl_threadid();
82+
jl_measure_compile_time[tid] = 1;
83+
return jl_cumulative_compile_time[tid];
8384
}
8485
extern "C" JL_DLLEXPORT
8586
uint64_t jl_cumulative_compile_time_ns_after()
8687
{
87-
jl_measure_compile_time = 0;
88-
return jl_cumulative_compile_time;
88+
int tid = jl_threadid();
89+
jl_measure_compile_time[tid] = 0;
90+
return jl_cumulative_compile_time[tid];
8991
}
9092

9193
// this generates llvm code for the lambda info
@@ -231,7 +233,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
231233
{
232234
JL_LOCK(&codegen_lock);
233235
uint64_t compiler_start_time = 0;
234-
if (jl_measure_compile_time)
236+
int tid = jl_threadid();
237+
if (jl_measure_compile_time[tid])
235238
compiler_start_time = jl_hrtime();
236239
jl_codegen_params_t params;
237240
jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
@@ -255,8 +258,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
255258
if (success && llvmmod == NULL)
256259
jl_add_to_ee(std::unique_ptr<Module>(into));
257260
}
258-
if (codegen_lock.count == 1 && jl_measure_compile_time)
259-
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
261+
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
262+
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
260263
JL_UNLOCK(&codegen_lock);
261264
return success;
262265
}
@@ -314,7 +317,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
314317
{
315318
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
316319
uint64_t compiler_start_time = 0;
317-
if (jl_measure_compile_time)
320+
int tid = jl_threadid();
321+
if (jl_measure_compile_time[tid])
318322
compiler_start_time = jl_hrtime();
319323
// if we don't have any decls already, try to generate it now
320324
jl_code_info_t *src = NULL;
@@ -352,8 +356,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
352356
else {
353357
codeinst = NULL;
354358
}
355-
if (codegen_lock.count == 1 && jl_measure_compile_time)
356-
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
359+
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
360+
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
357361
JL_UNLOCK(&codegen_lock);
358362
JL_GC_POP();
359363
return codeinst;
@@ -367,7 +371,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
367371
}
368372
JL_LOCK(&codegen_lock);
369373
uint64_t compiler_start_time = 0;
370-
if (jl_measure_compile_time)
374+
int tid = jl_threadid();
375+
if (jl_measure_compile_time[tid])
371376
compiler_start_time = jl_hrtime();
372377
if (unspec->invoke == NULL) {
373378
jl_code_info_t *src = NULL;
@@ -395,8 +400,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
395400
}
396401
JL_GC_POP();
397402
}
398-
if (codegen_lock.count == 1 && jl_measure_compile_time)
399-
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
403+
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
404+
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
400405
JL_UNLOCK(&codegen_lock); // Might GC
401406
}
402407

@@ -419,7 +424,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
419424
// so create an exception here so we can print pretty our lies
420425
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
421426
uint64_t compiler_start_time = 0;
422-
if (jl_measure_compile_time)
427+
int tid = jl_threadid();
428+
if (jl_measure_compile_time[tid])
423429
compiler_start_time = jl_hrtime();
424430
specfptr = (uintptr_t)codeinst->specptr.fptr;
425431
if (specfptr == 0) {
@@ -444,8 +450,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
444450
}
445451
JL_GC_POP();
446452
}
447-
if (jl_measure_compile_time)
448-
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
453+
if (jl_measure_compile_time[tid])
454+
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
449455
JL_UNLOCK(&codegen_lock);
450456
}
451457
if (specfptr != 0)

src/julia_internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,8 @@ static inline uint64_t cycleclock(void)
124124

125125
#include "timing.h"
126126

127-
extern uint8_t jl_measure_compile_time;
128-
extern uint64_t jl_cumulative_compile_time;
127+
extern uint8_t *jl_measure_compile_time;
128+
extern uint64_t *jl_cumulative_compile_time;
129129

130130
#ifdef _COMPILER_MICROSOFT_
131131
# define jl_return_address() ((uintptr_t)_ReturnAddress())

src/threading.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ jl_get_ptls_states_func jl_get_ptls_states_getter(void)
222222
#endif
223223

224224
jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
225+
uint8_t *jl_measure_compile_time = NULL;
226+
uint64_t *jl_cumulative_compile_time = NULL;
225227

226228
// return calling thread's ID
227229
// Also update the suspended_threads list in signals-mach when changing the
@@ -399,6 +401,8 @@ void jl_init_threading(void)
399401
jl_n_threads = (uint64_t)strtol(cp, NULL, 10);
400402
if (jl_n_threads <= 0)
401403
jl_n_threads = 1;
404+
jl_measure_compile_time = realloc( jl_measure_compile_time, jl_n_threads * sizeof *jl_measure_compile_time );
405+
jl_cumulative_compile_time = realloc( jl_cumulative_compile_time, jl_n_threads * sizeof *jl_cumulative_compile_time );
402406
#ifndef __clang_analyzer__
403407
jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
404408
#endif

0 commit comments

Comments
 (0)