@@ -12,7 +12,7 @@ module Timings
12
12
13
13
using .. Core
14
14
using .. Compiler: - , + , :, Vector, length, first, empty!, push!, pop!, @inline ,
15
- @inbounds , copy, backtrace
15
+ @inbounds , copy, backtrace, _time_ns
16
16
17
17
# What we record for any given frame we infer during type inference.
18
18
struct InferenceFrameInfo
53
53
Timing (mi_info, start_time, cur_start_time, time, children) = Timing (mi_info, start_time, cur_start_time, time, children, nothing )
54
54
Timing (mi_info, start_time) = Timing (mi_info, start_time, start_time, UInt64 (0 ), Timing[])
55
55
56
- _time_ns () = ccall (:jl_hrtime , UInt64, ())
57
-
58
56
# We keep a stack of the Timings for each of the MethodInstances currently being timed.
59
57
# Since type inference currently operates via a depth-first search (during abstract
60
58
# evaluation), this vector operates like a call stack. The last node in _timings is the
@@ -93,7 +91,7 @@ If set to `true`, record per-method-instance timings within type inference in th
93
91
__set_measure_typeinf (onoff:: Bool ) = __measure_typeinf__[] = onoff
94
92
const __measure_typeinf__ = RefValue {Bool} (false )
95
93
96
- function finish! (interp:: AbstractInterpreter , caller:: InferenceState , validation_world:: UInt )
94
+ function finish! (interp:: AbstractInterpreter , caller:: InferenceState , validation_world:: UInt , time_before :: UInt64 )
97
95
result = caller. result
98
96
opt = result. src
99
97
if opt isa OptimizationState
@@ -139,9 +137,12 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
139
137
if ! @isdefined di
140
138
di = DebugInfo (result. linfo)
141
139
end
142
- ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
140
+ time_now = _time_ns ()
141
+ time_self_ns = caller. time_self_ns + (time_now - time_before)
142
+ time_total = (time_now - caller. time_start - caller. time_paused) * 1e-9
143
+ ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
143
144
ci, inferred_result, const_flag, first (result. valid_worlds), last (result. valid_worlds), encode_effects (result. ipo_effects),
144
- result. analysis_results, di, edges)
145
+ result. analysis_results, time_total, caller . time_caches, time_self_ns * 1e-9 , di, edges)
145
146
engine_reject (interp, ci)
146
147
if ! discard_src && isdefined (interp, :codegen ) && uncompressed isa CodeInfo
147
148
# record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work
@@ -182,8 +183,8 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
182
183
end
183
184
ccall (:jl_fill_codeinst , Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
184
185
ci, rettype, exctype, nothing , const_flags, min_world, max_world, ipo_effects, nothing , di, edges)
185
- ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
186
- ci, nothing , const_flag, min_world, max_world, ipo_effects, nothing , di, edges)
186
+ ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
187
+ ci, nothing , const_flag, min_world, max_world, ipo_effects, nothing , 0.0 , 0.0 , 0.0 , di, edges)
187
188
code_cache (interp)[mi] = ci
188
189
if isdefined (interp, :codegen )
189
190
interp. codegen[ci] = src
@@ -192,14 +193,14 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
192
193
return nothing
193
194
end
194
195
195
- function finish_nocycle (:: AbstractInterpreter , frame:: InferenceState )
196
+ function finish_nocycle (:: AbstractInterpreter , frame:: InferenceState , time_before :: UInt64 )
196
197
finishinfer! (frame, frame. interp, frame. cycleid)
197
198
opt = frame. result. src
198
199
if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
199
200
optimize (frame. interp, opt, frame. result)
200
201
end
201
202
validation_world = get_world_counter ()
202
- finish! (frame. interp, frame, validation_world)
203
+ finish! (frame. interp, frame, validation_world, time_before )
203
204
if isdefined (frame. result, :ci )
204
205
# After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
205
206
# (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
@@ -214,7 +215,7 @@ function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
214
215
return nothing
215
216
end
216
217
217
- function finish_cycle (:: AbstractInterpreter , frames:: Vector{AbsIntState} , cycleid:: Int )
218
+ function finish_cycle (:: AbstractInterpreter , frames:: Vector{AbsIntState} , cycleid:: Int , time_before :: UInt64 )
218
219
cycle_valid_worlds = WorldRange ()
219
220
cycle_valid_effects = EFFECTS_TOTAL
220
221
for frameid = cycleid: length (frames)
@@ -231,23 +232,45 @@ function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cyclei
231
232
caller = frames[frameid]:: InferenceState
232
233
adjust_cycle_frame! (caller, cycle_valid_worlds, cycle_valid_effects)
233
234
finishinfer! (caller, caller. interp, cycleid)
235
+ time_now = _time_ns ()
236
+ caller. time_self_ns += (time_now - time_before)
237
+ time_before = time_now
234
238
end
239
+ time_caches = 0.0 # the total and adjusted time of every entry in the cycle are the same
240
+ time_paused = UInt64 (0 )
235
241
for frameid = cycleid: length (frames)
236
242
caller = frames[frameid]:: InferenceState
237
243
opt = caller. result. src
238
244
if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
239
245
optimize (caller. interp, opt, caller. result)
246
+ time_now = _time_ns ()
247
+ caller. time_self_ns += (time_now - time_before)
248
+ time_before = time_now
240
249
end
250
+ time_caches += caller. time_caches
251
+ time_paused += caller. time_paused
252
+ caller. time_paused = UInt64 (0 )
253
+ caller. time_caches = 0.0
241
254
end
255
+ cycletop = frames[cycleid]:: InferenceState
256
+ time_start = cycletop. time_start
242
257
validation_world = get_world_counter ()
243
258
cis = CodeInstance[]
244
259
for frameid = cycleid: length (frames)
245
260
caller = frames[frameid]:: InferenceState
246
- finish! (caller. interp, caller, validation_world)
261
+ caller. time_start = time_start
262
+ caller. time_caches = time_caches
263
+ caller. time_paused = time_paused
264
+ finish! (caller. interp, caller, validation_world, time_before)
247
265
if isdefined (caller. result, :ci )
248
266
push! (cis, caller. result. ci)
249
267
end
250
268
end
269
+ if cycletop. parentid != 0
270
+ parent = frames[cycletop. parentid]
271
+ parent. time_caches += time_caches
272
+ parent. time_paused += time_paused
273
+ end
251
274
# After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
252
275
# (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
253
276
# validity.
@@ -778,9 +801,10 @@ function return_cached_result(interp::AbstractInterpreter, method::Method, codei
778
801
rt = cached_return_type (codeinst)
779
802
exct = codeinst. exctype
780
803
effects = ipo_effects (codeinst)
781
- edge = codeinst
782
804
update_valid_age! (caller, WorldRange (min_world (codeinst), max_world (codeinst)))
783
- return Future (MethodCallResult (interp, caller, method, rt, exct, effects, edge, edgecycle, edgelimited))
805
+ caller. time_caches += reinterpret (Float16, codeinst. time_infer_total)
806
+ caller. time_caches += reinterpret (Float16, codeinst. time_infer_cache_saved)
807
+ return Future (MethodCallResult (interp, caller, method, rt, exct, effects, codeinst, edgecycle, edgelimited))
784
808
end
785
809
786
810
function MethodCallResult (:: AbstractInterpreter , sv:: AbsIntState , method:: Method ,
@@ -876,7 +900,9 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
876
900
if frame === false
877
901
# completely new, but check again after reserving in the engine
878
902
if cache_mode == CACHE_MODE_GLOBAL
903
+ reserve_start = _time_ns () # subtract engine_reserve (thread-synchronization) time from callers to avoid double-counting
879
904
ci_from_engine = engine_reserve (interp, mi)
905
+ caller. time_paused += (_time_ns () - reserve_start)
880
906
edge_ci = ci_from_engine
881
907
codeinst = get (code_cache (interp), mi, nothing )
882
908
if codeinst isa CodeInstance # return existing rettype if the code is already inferred
0 commit comments