Skip to content

Commit e0a1611

Browse files
vtjnashKristofferC
authored andcommitted
much faster code-coverage for packages (#57988)
We already have an excellent framework for selective code reuse, so use that tool instead of a sledgehammer for inserting selective coverage and malloc instrumentation. As a small bonus, this should also be significantly more accurate by not being vulnerable to precompilation inserting incorrect (uninstrumented) contents into the caches. (cherry picked from commit 5fedf47)
1 parent 65657d1 commit e0a1611

File tree

4 files changed

+85
-64
lines changed

4 files changed

+85
-64
lines changed

Compiler/src/inferencestate.jl

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -575,21 +575,23 @@ function (::ComputeTryCatch{Handler})(code::Vector{Any}, bbs::Union{Vector{Basic
575575
end
576576

577577
# check if coverage mode is enabled
578-
function should_insert_coverage(mod::Module, debuginfo::DebugInfo)
579-
coverage_enabled(mod) && return true
580-
JLOptions().code_coverage == 3 || return false
578+
should_insert_coverage(mod::Module, debuginfo::DebugInfo) = should_instrument(mod, debuginfo, true)
579+
580+
function should_instrument(mod::Module, debuginfo::DebugInfo, only_if_affects_optimizer::Bool=false)
581+
instrumentation_enabled(mod, only_if_affects_optimizer) && return true
582+
JLOptions().code_coverage == 3 || JLOptions().malloc_log == 3 || return false
581583
# path-specific coverage mode: if any line falls in a tracked file enable coverage for all
582-
return _should_insert_coverage(debuginfo)
584+
return _should_instrument(debuginfo)
583585
end
584586

585-
_should_insert_coverage(mod::Symbol) = is_file_tracked(mod)
586-
_should_insert_coverage(mod::Method) = _should_insert_coverage(mod.file)
587-
_should_insert_coverage(mod::MethodInstance) = _should_insert_coverage(mod.def)
588-
_should_insert_coverage(mod::Module) = false
589-
function _should_insert_coverage(info::DebugInfo)
587+
_should_instrument(loc::Symbol) = is_file_tracked(loc)
588+
_should_instrument(loc::Method) = _should_instrument(loc.file)
589+
_should_instrument(loc::MethodInstance) = _should_instrument(loc.def)
590+
_should_instrument(loc::Module) = false
591+
function _should_instrument(info::DebugInfo)
590592
linetable = info.linetable
591-
linetable === nothing || (_should_insert_coverage(linetable) && return true)
592-
_should_insert_coverage(info.def) && return true
593+
linetable === nothing || (_should_instrument(linetable) && return true)
594+
_should_instrument(info.def) && return true
593595
return false
594596
end
595597

Compiler/src/utilities.jl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ end
329329

330330
inlining_enabled() = (JLOptions().can_inline == 1)
331331

332-
function coverage_enabled(m::Module)
332+
function instrumentation_enabled(m::Module, only_if_affects_optimizer::Bool)
333333
generating_output() && return false # don't alter caches
334334
cov = JLOptions().code_coverage
335335
if cov == 1 # user
@@ -340,6 +340,17 @@ function coverage_enabled(m::Module)
340340
elseif cov == 2 # all
341341
return true
342342
end
343+
if !only_if_affects_optimizer
344+
log = JLOptions().malloc_log
345+
if log == 1 # user
346+
m = moduleroot(m)
347+
m === Core && return false
348+
isdefined(Main, :Base) && m === Main.Base && return false
349+
return true
350+
elseif log == 2 # all
351+
return true
352+
end
353+
end
343354
return false
344355
end
345356

base/loading.jl

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,21 +1243,7 @@ const TIMING_IMPORTS = Threads.Atomic{Int}(0)
12431243
# these return either the array of modules loaded from the path / content given
12441244
# or an Exception that describes why it couldn't be loaded
12451245
# and it reconnects the Base.Docs.META
1246-
function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}, ignore_native::Union{Nothing,Bool}=nothing; register::Bool=true)
1247-
if isnothing(ignore_native)
1248-
if JLOptions().code_coverage == 0 && JLOptions().malloc_log == 0
1249-
ignore_native = false
1250-
else
1251-
io = open(path, "r")
1252-
try
1253-
iszero(isvalid_cache_header(io)) && return ArgumentError("Incompatible header in cache file $path.")
1254-
_, (includes, _, _), _, _, _, _, _, _ = parse_cache_header(io, path)
1255-
ignore_native = pkg_tracked(includes)
1256-
finally
1257-
close(io)
1258-
end
1259-
end
1260-
end
1246+
function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}; register::Bool=true)
12611247
assert_havelock(require_lock)
12621248
timing_imports = TIMING_IMPORTS[] > 0
12631249
try
@@ -1276,6 +1262,7 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No
12761262
depmods[i] = dep
12771263
end
12781264

1265+
ignore_native = false
12791266
unlock(require_lock) # temporarily _unlock_ during these operations
12801267
sv = try
12811268
if ocachepath !== nothing
@@ -1945,44 +1932,16 @@ function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128)
19451932
return ErrorException("Required dependency $modkey failed to load from a cache file.")
19461933
end
19471934

1948-
# returns whether the package is tracked in coverage or malloc tracking based on
1949-
# JLOptions and includes
1950-
function pkg_tracked(includes)
1951-
if JLOptions().code_coverage == 0 && JLOptions().malloc_log == 0
1952-
return false
1953-
elseif JLOptions().code_coverage == 1 || JLOptions().malloc_log == 1 # user
1954-
# Just say true. Pkgimages aren't in Base
1955-
return true
1956-
elseif JLOptions().code_coverage == 2 || JLOptions().malloc_log == 2 # all
1957-
return true
1958-
elseif JLOptions().code_coverage == 3 || JLOptions().malloc_log == 3 # tracked path
1959-
if JLOptions().tracked_path == C_NULL
1960-
return false
1961-
else
1962-
tracked_path = unsafe_string(JLOptions().tracked_path)
1963-
if isempty(tracked_path)
1964-
return false
1965-
else
1966-
return any(includes) do inc
1967-
startswith(inc.filename, tracked_path)
1968-
end
1969-
end
1970-
end
1971-
end
1972-
end
1973-
19741935
# loads a precompile cache file, ignoring stale_cachefile tests
19751936
# load all dependent modules first
19761937
function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String})
19771938
assert_havelock(require_lock)
19781939
local depmodnames
19791940
io = open(path, "r")
1980-
ignore_native = false
19811941
try
19821942
iszero(isvalid_cache_header(io)) && return ArgumentError("Incompatible header in cache file $path.")
19831943
_, (includes, _, _), depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io, path)
19841944

1985-
ignore_native = pkg_tracked(includes)
19861945

19871946
pkgimage = !isempty(clone_targets)
19881947
if pkgimage
@@ -2009,7 +1968,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union
20091968
depmods[i] = dep
20101969
end
20111970
# then load the file
2012-
loaded = _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native; register = true)
1971+
loaded = _include_from_serialized(pkg, path, ocachepath, depmods; register = true)
20131972
return loaded
20141973
end
20151974

base/staticdata.jl

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
module StaticData
44

5-
using Core: CodeInstance, MethodInstance
6-
using Base: get_world_counter
5+
using .Core: CodeInstance, MethodInstance
6+
using .Base: JLOptions, Compiler, get_world_counter, _methods_by_ftype, get_methodtable
77

88
const WORLD_AGE_REVALIDATION_SENTINEL::UInt = 1
99
const _jl_debug_method_invalidation = Ref{Union{Nothing,Vector{Any}}}(nothing)
@@ -73,6 +73,51 @@ end
7373

7474
get_require_world() = unsafe_load(cglobal(:jl_require_world, UInt))
7575

76+
function gen_staged_sig(def::Method, mi::MethodInstance)
77+
isdefined(def, :generator) || return nothing
78+
isdispatchtuple(mi.specTypes) || return nothing
79+
gen = Core.Typeof(def.generator)
80+
return Tuple{gen, UInt, Method, Vararg}
81+
## more precise method lookup, but more costly and likely not actually better?
82+
#tts = (mi.specTypes::DataType).parameters
83+
#sps = Any[Core.Typeof(mi.sparam_vals[i]) for i in 1:length(mi.sparam_vals)]
84+
#if def.isva
85+
# return Tuple{gen, UInt, Method, sps..., tts[1:def.nargs - 1]..., Tuple{tts[def.nargs - 1:end]...}}
86+
#else
87+
# return Tuple{gen, UInt, Method, sps..., tts...}
88+
#end
89+
end
90+
91+
function needs_instrumentation(codeinst::CodeInstance, mi::MethodInstance, def::Method, validation_world::UInt)
92+
if JLOptions().code_coverage != 0 || JLOptions().malloc_log != 0
93+
# test if the code needs to run with instrumentation, in which case we cannot use existing generated code
94+
if isdefined(def, :debuginfo) ? # generated_only functions do not have debuginfo, so fall back to considering their codeinst debuginfo though this may be slower (and less accurate?)
95+
Compiler.should_instrument(def.module, def.debuginfo) :
96+
Compiler.should_instrument(def.module, codeinst.debuginfo)
97+
return true
98+
end
99+
gensig = gen_staged_sig(def, mi)
100+
if gensig !== nothing
101+
# if this is defined by a generator, try to consider forcing re-running the generators too, to add coverage for them
102+
minworld = Ref{UInt}(1)
103+
maxworld = Ref{UInt}(typemax(UInt))
104+
has_ambig = Ref{Int32}(0)
105+
result = _methods_by_ftype(gensig, nothing, -1, validation_world, #=ambig=#false, minworld, maxworld, has_ambig)
106+
if result !== nothing
107+
for k = 1:length(result)
108+
match = result[k]::Core.MethodMatch
109+
genmethod = match.method
110+
# no, I refuse to refuse to recurse into your cursed generated function generators and will only test one level deep here
111+
if isdefined(genmethod, :debuginfo) && Compiler.should_instrument(genmethod.module, genmethod.debuginfo)
112+
return true
113+
end
114+
end
115+
end
116+
end
117+
end
118+
return false
119+
end
120+
76121
# Test all edges relevant to a method:
77122
# - Visit the entire call graph, starting from edges[idx] to determine if that method is valid
78123
# - Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
@@ -84,6 +129,12 @@ function verify_method(codeinst::CodeInstance, stack::Vector{CodeInstance}, visi
84129
return 0, world, max_valid2
85130
end
86131
end
132+
mi = get_ci_mi(codeinst)
133+
def = mi.def::Method
134+
if needs_instrumentation(codeinst, mi, def, validation_world)
135+
return 0, world, UInt(0)
136+
end
137+
87138
# Implicitly referenced bindings in the current module do not get explicit edges.
88139
# If they were invalidated, they'll be in `mwis`. If they weren't, they imply a minworld
89140
# of `get_require_world`. In principle, this is only required for methods that do reference
@@ -92,8 +143,6 @@ function verify_method(codeinst::CodeInstance, stack::Vector{CodeInstance}, visi
92143
# but no implicit edges) is rare and there would be little benefit to lower the minworld for it
93144
# in any case, so we just always use `get_require_world` here.
94145
local minworld::UInt, maxworld::UInt = get_require_world(), validation_world
95-
def = get_ci_mi(codeinst).def
96-
@assert def isa Method
97146
if haskey(visiting, codeinst)
98147
return visiting[codeinst], minworld, maxworld
99148
end
@@ -225,7 +274,7 @@ function verify_method(codeinst::CodeInstance, stack::Vector{CodeInstance}, visi
225274
end
226275
@atomic :monotonic child.max_world = maxworld
227276
if maxworld == validation_world && validation_world == get_world_counter()
228-
Base.Compiler.store_backedges(child, child.edges)
277+
Compiler.store_backedges(child, child.edges)
229278
end
230279
@assert visiting[child] == length(stack) + 1
231280
delete!(visiting, child)
@@ -243,7 +292,7 @@ function verify_call(@nospecialize(sig), expecteds::Core.SimpleVector, i::Int, n
243292
minworld = Ref{UInt}(1)
244293
maxworld = Ref{UInt}(typemax(UInt))
245294
has_ambig = Ref{Int32}(0)
246-
result = Base._methods_by_ftype(sig, nothing, lim, world, #=ambig=#false, minworld, maxworld, has_ambig)
295+
result = _methods_by_ftype(sig, nothing, lim, world, #=ambig=#false, minworld, maxworld, has_ambig)
247296
if result === nothing
248297
maxworld[] = 0
249298
else
@@ -306,11 +355,11 @@ function verify_invokesig(@nospecialize(invokesig), expected::Method, world::UIn
306355
else
307356
minworld = 1
308357
maxworld = typemax(UInt)
309-
mt = Base.get_methodtable(expected)
358+
mt = get_methodtable(expected)
310359
if mt === nothing
311360
maxworld = 0
312361
else
313-
matched, valid_worlds = Base.Compiler._findsup(invokesig, mt, world)
362+
matched, valid_worlds = Compiler._findsup(invokesig, mt, world)
314363
minworld, maxworld = valid_worlds.min_world, valid_worlds.max_world
315364
if matched === nothing
316365
maxworld = 0

0 commit comments

Comments
 (0)