Skip to content

Commit 5e8ac85

Browse files
vtjnashKristofferC
authored andcommitted
[internals] add time metrics for every CodeInstance (#57074)
Adds 4 new Float16 fields to CodeInstance to replace Compiler.Timings with continually collected and available measurements. Sample results on a novel method signature: julia> @time code_native(devnull, ÷, dump_module=false, (Int32, UInt16)); 0.006262 seconds (3.62 k allocations: 186.641 KiB, 75.53% compilation time) julia> b = which(÷, (Int32, UInt16)).specializations[6].cache CodeInstance for MethodInstance for div(::Int32, ::UInt16) julia> reinterpret(Float16, b.time_infer_self) Float16(0.0002766) julia> reinterpret(Float16, b.time_infer_total) Float16(0.00049) julia> reinterpret(Float16, b.time_infer_cache_saved) Float16(0.02774) julia> reinterpret(Float16, b.time_compile) Float16(0.003773) Closes #56115 (cherry picked from commit 18b5d8f)
1 parent 4652a09 commit 5e8ac85

18 files changed

+221
-125
lines changed

Compiler/src/abstractinterpretation.jl

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,8 @@ function const_prop_call(interp::AbstractInterpreter,
13521352
end
13531353
assign_parentchild!(frame, sv)
13541354
if !typeinf(interp, frame)
1355+
sv.time_caches += frame.time_caches
1356+
sv.time_paused += frame.time_paused
13551357
add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
13561358
@assert frame.frameid != 0 && frame.cycleid == frame.frameid
13571359
callstack = frame.callstack::Vector{AbsIntState}
@@ -4313,6 +4315,7 @@ end
43134315
# make as much progress on `frame` as possible (by handling cycles)
43144316
warnlength::Int = 2500
43154317
function typeinf(interp::AbstractInterpreter, frame::InferenceState)
4318+
time_before = _time_ns()
43164319
callstack = frame.callstack::Vector{AbsIntState}
43174320
nextstates = CurrentState[]
43184321
takenext = frame.frameid
@@ -4344,24 +4347,30 @@ function typeinf(interp::AbstractInterpreter, frame::InferenceState)
43444347
# get_compileable_sig), but still must be finished up since it may see and
43454348
# change the local variables of the InferenceState at currpc, we do this
43464349
# even if the nextresult status is already completed.
4347-
continue
43484350
elseif isdefined(nextstates[nextstateid], :result) || !isempty(callee.ip)
43494351
# Next make progress on this frame
43504352
prev = length(callee.tasks) + 1
43514353
nextstates[nextstateid] = typeinf_local(interp, callee, nextstates[nextstateid])
43524354
reverse!(callee.tasks, prev)
43534355
elseif callee.cycleid == length(callstack)
43544356
# With no active ip's and no cycles, frame is done
4355-
finish_nocycle(interp, callee)
4357+
time_now = _time_ns()
4358+
callee.time_self_ns += (time_now - time_before)
4359+
time_before = time_now
4360+
finish_nocycle(interp, callee, time_before)
43564361
callee.frameid == 0 && break
43574362
takenext = length(callstack)
43584363
nextstateid = takenext + 1 - frame.frameid
43594364
#@assert length(nextstates) == nextstateid + 1
43604365
#@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
43614366
resize!(nextstates, nextstateid)
4367+
continue
43624368
elseif callee.cycleid == callee.frameid
43634369
# If the current frame is the top part of a cycle, check if the whole cycle
43644370
# is done, and if not, pick the next item to work on.
4371+
time_now = _time_ns()
4372+
callee.time_self_ns += (time_now - time_before)
4373+
time_before = time_now
43654374
no_active_ips_in_cycle = true
43664375
for i = callee.cycleid:length(callstack)
43674376
caller = callstack[i]::InferenceState
@@ -4372,7 +4381,7 @@ function typeinf(interp::AbstractInterpreter, frame::InferenceState)
43724381
end
43734382
end
43744383
if no_active_ips_in_cycle
4375-
finish_cycle(interp, callstack, callee.cycleid)
4384+
finish_cycle(interp, callstack, callee.cycleid, time_before)
43764385
end
43774386
takenext = length(callstack)
43784387
nextstateid = takenext + 1 - frame.frameid
@@ -4382,10 +4391,14 @@ function typeinf(interp::AbstractInterpreter, frame::InferenceState)
43824391
else
43834392
#@assert length(nextstates) == nextstateid
43844393
end
4394+
continue
43854395
else
43864396
# Continue to the next frame in this cycle
43874397
takenext = takenext - 1
43884398
end
4399+
time_now = _time_ns()
4400+
callee.time_self_ns += (time_now - time_before)
4401+
time_before = time_now
43894402
end
43904403
#@assert all(nextresult -> !isdefined(nextresult, :result), nextstates)
43914404
return is_inferred(frame)

Compiler/src/inferencestate.jl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,10 @@ mutable struct InferenceState
302302
bestguess #::Type
303303
exc_bestguess
304304
ipo_effects::Effects
305+
time_start::UInt64
306+
time_caches::Float64
307+
time_paused::UInt64
308+
time_self_ns::UInt64
305309

306310
#= flags =#
307311
# Whether to restrict inference of abstract call sites to avoid excessive work
@@ -392,6 +396,7 @@ mutable struct InferenceState
392396
currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, bb_saw_latestworld, ssavaluetypes, ssaflags, edges, stmt_info,
393397
tasks, pclimitations, limitations, cycle_backedges, callstack, parentid, frameid, cycleid,
394398
result, unreachable, bestguess, exc_bestguess, ipo_effects,
399+
_time_ns(), 0.0, 0, 0,
395400
restrict_abstract_call_sites, cache_mode, insert_coverage,
396401
interp)
397402

@@ -815,6 +820,8 @@ mutable struct IRInterpretationState
815820
const mi::MethodInstance
816821
world::WorldWithRange
817822
curridx::Int
823+
time_caches::Float64
824+
time_paused::UInt64
818825
const argtypes_refined::Vector{Bool}
819826
const sptypes::Vector{VarState}
820827
const tpdum::TwoPhaseDefUseMap
@@ -849,7 +856,8 @@ mutable struct IRInterpretationState
849856
tasks = WorkThunk[]
850857
edges = Any[]
851858
callstack = AbsIntState[]
852-
return new(spec_info, ir, mi, WorldWithRange(world, valid_worlds), curridx, argtypes_refined, ir.sptypes, tpdum,
859+
return new(spec_info, ir, mi, WorldWithRange(world, valid_worlds),
860+
curridx, 0.0, 0, argtypes_refined, ir.sptypes, tpdum,
853861
ssa_refined, lazyreachability, tasks, edges, callstack, 0, 0)
854862
end
855863
end

Compiler/src/typeinfer.jl

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ module Timings
1212

1313
using ..Core
1414
using ..Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
15-
@inbounds, copy, backtrace
15+
@inbounds, copy, backtrace, _time_ns
1616

1717
# What we record for any given frame we infer during type inference.
1818
struct InferenceFrameInfo
@@ -53,8 +53,6 @@ end
5353
Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
5454
Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
5555

56-
_time_ns() = ccall(:jl_hrtime, UInt64, ())
57-
5856
# We keep a stack of the Timings for each of the MethodInstances currently being timed.
5957
# Since type inference currently operates via a depth-first search (during abstract
6058
# evaluation), this vector operates like a call stack. The last node in _timings is the
@@ -93,7 +91,7 @@ If set to `true`, record per-method-instance timings within type inference in th
9391
__set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
9492
const __measure_typeinf__ = RefValue{Bool}(false)
9593

96-
function finish!(interp::AbstractInterpreter, caller::InferenceState, validation_world::UInt)
94+
function finish!(interp::AbstractInterpreter, caller::InferenceState, validation_world::UInt, time_before::UInt64)
9795
result = caller.result
9896
opt = result.src
9997
if opt isa OptimizationState
@@ -139,9 +137,12 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
139137
if !@isdefined di
140138
di = DebugInfo(result.linfo)
141139
end
142-
ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
140+
time_now = _time_ns()
141+
time_self_ns = caller.time_self_ns + (time_now - time_before)
142+
time_total = (time_now - caller.time_start - caller.time_paused) * 1e-9
143+
ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
143144
ci, inferred_result, const_flag, first(result.valid_worlds), last(result.valid_worlds), encode_effects(result.ipo_effects),
144-
result.analysis_results, di, edges)
145+
result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, di, edges)
145146
engine_reject(interp, ci)
146147
if !discard_src && isdefined(interp, :codegen) && uncompressed isa CodeInfo
147148
# record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work
@@ -182,8 +183,8 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
182183
end
183184
ccall(:jl_fill_codeinst, Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
184185
ci, rettype, exctype, nothing, const_flags, min_world, max_world, ipo_effects, nothing, di, edges)
185-
ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
186-
ci, nothing, const_flag, min_world, max_world, ipo_effects, nothing, di, edges)
186+
ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
187+
ci, nothing, const_flag, min_world, max_world, ipo_effects, nothing, 0.0, 0.0, 0.0, di, edges)
187188
code_cache(interp)[mi] = ci
188189
if isdefined(interp, :codegen)
189190
interp.codegen[ci] = src
@@ -192,14 +193,14 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
192193
return nothing
193194
end
194195

195-
function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
196+
function finish_nocycle(::AbstractInterpreter, frame::InferenceState, time_before::UInt64)
196197
finishinfer!(frame, frame.interp, frame.cycleid)
197198
opt = frame.result.src
198199
if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
199200
optimize(frame.interp, opt, frame.result)
200201
end
201202
validation_world = get_world_counter()
202-
finish!(frame.interp, frame, validation_world)
203+
finish!(frame.interp, frame, validation_world, time_before)
203204
if isdefined(frame.result, :ci)
204205
# After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
205206
# (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
@@ -214,7 +215,7 @@ function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
214215
return nothing
215216
end
216217

217-
function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cycleid::Int)
218+
function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cycleid::Int, time_before::UInt64)
218219
cycle_valid_worlds = WorldRange()
219220
cycle_valid_effects = EFFECTS_TOTAL
220221
for frameid = cycleid:length(frames)
@@ -231,23 +232,45 @@ function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cyclei
231232
caller = frames[frameid]::InferenceState
232233
adjust_cycle_frame!(caller, cycle_valid_worlds, cycle_valid_effects)
233234
finishinfer!(caller, caller.interp, cycleid)
235+
time_now = _time_ns()
236+
caller.time_self_ns += (time_now - time_before)
237+
time_before = time_now
234238
end
239+
time_caches = 0.0 # the total and adjusted time of every entry in the cycle are the same
240+
time_paused = UInt64(0)
235241
for frameid = cycleid:length(frames)
236242
caller = frames[frameid]::InferenceState
237243
opt = caller.result.src
238244
if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
239245
optimize(caller.interp, opt, caller.result)
246+
time_now = _time_ns()
247+
caller.time_self_ns += (time_now - time_before)
248+
time_before = time_now
240249
end
250+
time_caches += caller.time_caches
251+
time_paused += caller.time_paused
252+
caller.time_paused = UInt64(0)
253+
caller.time_caches = 0.0
241254
end
255+
cycletop = frames[cycleid]::InferenceState
256+
time_start = cycletop.time_start
242257
validation_world = get_world_counter()
243258
cis = CodeInstance[]
244259
for frameid = cycleid:length(frames)
245260
caller = frames[frameid]::InferenceState
246-
finish!(caller.interp, caller, validation_world)
261+
caller.time_start = time_start
262+
caller.time_caches = time_caches
263+
caller.time_paused = time_paused
264+
finish!(caller.interp, caller, validation_world, time_before)
247265
if isdefined(caller.result, :ci)
248266
push!(cis, caller.result.ci)
249267
end
250268
end
269+
if cycletop.parentid != 0
270+
parent = frames[cycletop.parentid]
271+
parent.time_caches += time_caches
272+
parent.time_paused += time_paused
273+
end
251274
# After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
252275
# (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
253276
# validity.
@@ -778,9 +801,10 @@ function return_cached_result(interp::AbstractInterpreter, method::Method, codei
778801
rt = cached_return_type(codeinst)
779802
exct = codeinst.exctype
780803
effects = ipo_effects(codeinst)
781-
edge = codeinst
782804
update_valid_age!(caller, WorldRange(min_world(codeinst), max_world(codeinst)))
783-
return Future(MethodCallResult(interp, caller, method, rt, exct, effects, edge, edgecycle, edgelimited))
805+
caller.time_caches += reinterpret(Float16, codeinst.time_infer_total)
806+
caller.time_caches += reinterpret(Float16, codeinst.time_infer_cache_saved)
807+
return Future(MethodCallResult(interp, caller, method, rt, exct, effects, codeinst, edgecycle, edgelimited))
784808
end
785809

786810
function MethodCallResult(::AbstractInterpreter, sv::AbsIntState, method::Method,
@@ -876,7 +900,9 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
876900
if frame === false
877901
# completely new, but check again after reserving in the engine
878902
if cache_mode == CACHE_MODE_GLOBAL
903+
reserve_start = _time_ns() # subtract engine_reserve (thread-synchronization) time from callers to avoid double-counting
879904
ci_from_engine = engine_reserve(interp, mi)
905+
caller.time_paused += (_time_ns() - reserve_start)
880906
edge_ci = ci_from_engine
881907
codeinst = get(code_cache(interp), mi, nothing)
882908
if codeinst isa CodeInstance # return existing rettype if the code is already inferred

Compiler/src/utilities.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,3 +351,5 @@ function inbounds_option()
351351
end
352352

353353
is_asserts() = ccall(:jl_is_assertsbuild, Cint, ()) == 1
354+
355+
_time_ns() = ccall(:jl_hrtime, UInt64, ())

base/Base.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@ include("views.jl")
3636

3737
# numeric operations
3838
include("hashing.jl")
39-
include("rounding.jl")
4039
include("div.jl")
41-
include("float.jl")
4240
include("twiceprecision.jl")
4341
include("complex.jl")
4442
include("rational.jl")

base/Base_compiler.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,8 @@ include("operators.jl")
277277
include("pointer.jl")
278278
include("refvalue.jl")
279279
include("cmem.jl")
280+
include("rounding.jl")
281+
include("float.jl")
280282

281283
include("checked.jl")
282284
using .Checked

base/essentials.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ macro _nothrow_meta()
323323
#=:consistent_overlay=#false,
324324
#=:nortcall=#false))
325325
end
326-
# can be used in place of `@assume_effects :nothrow` (supposed to be used for bootstrapping)
326+
# can be used in place of `@assume_effects :noub` (supposed to be used for bootstrapping)
327327
macro _noub_meta()
328328
return _is_internal(__module__) && Expr(:meta, Expr(:purity,
329329
#=:consistent=#false,

0 commit comments

Comments
 (0)