Skip to content

Commit 4178477

Browse files
vchuravymaleadt
andauthored
Add disk cache infrastructure for Julia 1.11 (#557)
Co-authored-by: Tim Besard <tim.besard@gmail.com>
1 parent ee9077d commit 4178477

File tree

8 files changed

+208
-18
lines changed

8 files changed

+208
-18
lines changed

LocalPreferences.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[GPUCompiler]
2+
# wether caching of object files should be enabled. If the disk cache is enabled
3+
# cache files are storied in scratch memory.
4+
#disk_cache = "false"

Project.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,23 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
99
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
1010
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
1111
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
12+
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
1213
Scratch = "6c6a2e73-6563-6170-7368-637461726353"
14+
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
15+
TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
1316
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
1417
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
1518

1619
[compat]
1720
ExprTools = "0.1"
1821
InteractiveUtils = "1"
22+
LLVM = "8"
1923
Libdl = "1"
2024
Logging = "1"
2125
UUIDs = "1"
22-
LLVM = "8"
26+
Preferences = "1"
2327
Scratch = "1"
28+
Serialization = "1"
29+
TOML = "1"
2430
TimerOutputs = "0.5"
2531
julia = "1.8"

src/GPUCompiler.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ using ExprTools: splitdef, combinedef
99

1010
using Libdl
1111

12+
using Serialization
1213
using Scratch: @get_scratch!
14+
using Preferences
1315

1416
const CC = Core.Compiler
1517
using Core: MethodInstance, CodeInstance, CodeInfo

src/execution.jl

Lines changed: 128 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,54 @@ end
6262

6363
## cached compilation
6464

65+
### Notes on interactions with package images and disk cache.
66+
# Julia uses package images (pkgimg) to cache both the result of inference,
67+
# and the result of native code emissions. Up until Julia v1.11 neither the
68+
# inferred nor the nativce code of foreign abstract interpreters was cached
69+
# across sessions. Julia v1.11 allows for caching of inference results across
70+
# sessions as long as those inference results are created during precompilation.
71+
#
72+
# Julia cache hierarchy is roughly as follows:
73+
# Function (name of a thing)
74+
# -> Method (particular piece of code to dispatch to with a signature)
75+
# -> MethodInstance (A particular Method + particular signature)
76+
# -> CodeInstance (A MethodInstance compiled for a world)
77+
#
78+
# In order to cache code across sessions we need to insert CodeInstance(owner=GPUCompilerCacheToken)
79+
# into the internal cache. Once we have done so we know that a particular CodeInstance is unique in
80+
# the system. (During pkgimg loading conflicts will be resolved).
81+
#
82+
# When a pkgimg is loaded we check it's validity, this means checking that all depdencies are the same,
83+
# the pkgimg was created for the right set of compiler flags, and that all source code that was used
84+
# to create this pkgimg is the same. When a CodeInstance is inside a pkgimg we can extend the chain of
85+
# validity even for GPU code, we cannot verify a "runtime" CodeInstance in the same way.
86+
#
87+
# Therefore when we see a compilation request for a CodeInstance that is originating from a pkgimg
88+
# we can use it as part of the hash for the on-disk cache. (see `cache_file`)
89+
90+
"""
91+
disk_cache_enabled()
92+
93+
Query if caching to disk is enabled.
94+
"""
95+
disk_cache_enabled() = parse(Bool, @load_preference("disk_cache", "false"))
96+
97+
"""
98+
enable_disk_cache!(state::Bool=true)
99+
100+
Activate the GPUCompiler disk cache in the current environment.
101+
You will need to restart your Julia environment for it to take effect.
102+
103+
!!! note
104+
The cache functionality requires Julia 1.11
105+
"""
106+
function enable_disk_cache!(state::Bool=true)
107+
@set_preferences!("disk_cache"=>string(state))
108+
end
109+
110+
disk_cache_path() = @get_scratch!("disk_cache")
111+
clear_disk_cache!() = rm(disk_cache_path(); recursive=true, force=true)
112+
65113
const cache_lock = ReentrantLock()
66114

67115
"""
@@ -108,6 +156,37 @@ function cached_compilation(cache::AbstractDict{<:Any,V},
108156
return obj::V
109157
end
110158

159+
@noinline function cache_file(ci::CodeInstance, cfg::CompilerConfig)
160+
h = hash(Base.objectid(ci))
161+
@static if isdefined(Base, :object_build_id)
162+
bid = Base.object_build_id(ci)
163+
if bid === nothing # CI is from a runtime compilation, not worth caching on disk
164+
return nothing
165+
else
166+
bid = bid % UInt64 # The upper 64bit are a checksum, unavailable during precompilation
167+
end
168+
h = hash(bid, h)
169+
end
170+
h = hash(cfg, h)
171+
172+
gpucompiler_buildid = Base.module_build_id(@__MODULE__)
173+
if (gpucompiler_buildid >> 64) % UInt64 == 0xffffffffffffffff
174+
return nothing # Don't cache during precompilation of GPUCompiler
175+
end
176+
177+
return joinpath(
178+
disk_cache_path(),
179+
# bifurcate the cache by build id of GPUCompiler
180+
string(gpucompiler_buildid),
181+
string(h, ".jls"))
182+
end
183+
184+
struct DiskCacheEntry
185+
src::Type # Originally MethodInstance, but upon deserialize they were not uniqued...
186+
cfg::CompilerConfig
187+
asm
188+
end
189+
111190
@noinline function actual_compilation(cache::AbstractDict, src::MethodInstance, world::UInt,
112191
cfg::CompilerConfig, compiler::Function, linker::Function)
113192
job = CompilerJob(src, cfg, world)
@@ -117,20 +196,64 @@ end
117196
ci = ci_cache_lookup(ci_cache(job), src, world, world)::Union{Nothing,CodeInstance}
118197
if ci !== nothing
119198
key = (ci, cfg)
120-
if haskey(cache, key)
121-
obj = cache[key]
122-
end
199+
obj = get(cache, key, nothing)
123200
end
124201

125202
# slow path: compile and link
126203
if obj === nothing || compile_hook[] !== nothing
127-
# TODO: consider loading the assembly from an on-disk cache here
128-
asm = compiler(job)
204+
asm = nothing
205+
path = nothing
206+
ondisk_hit = false
207+
@static if VERSION >= v"1.11.0-"
208+
# Don't try to hit the disk cache if we are for a *compile* hook
209+
# TODO:
210+
# - Sould we hit disk cache if Base.generating_output()
211+
# - Should we allow backend to opt out?
212+
if ci !== nothing && obj === nothing && disk_cache_enabled()
213+
path = cache_file(ci, cfg)
214+
@debug "Looking for on-disk cache" job path
215+
if path !== nothing && isfile(path)
216+
ondisk_hit = true
217+
try
218+
@debug "Loading compiled kernel" job path
219+
# The MI we deserialize here didn't get uniqued...
220+
entry = deserialize(path)::DiskCacheEntry
221+
if entry.src == src.specTypes && entry.cfg == cfg
222+
asm = entry.asm
223+
else
224+
@show entry.src == src.specTypes
225+
@show entry.cfg == cfg
226+
@warn "Cache missmatch" src.specTypes cfg entry.src entry.cfg
227+
end
228+
catch ex
229+
@warn "Failed to load compiled kernel" job path exception=(ex, catch_backtrace())
230+
end
231+
end
232+
end
233+
end
129234

235+
if asm === nothing || compile_hook[] !== nothing
236+
# Run the compiler in-case we need to hook it.
237+
asm = compiler(job)
238+
end
130239
if obj !== nothing
131240
# we got here because of a *compile* hook; don't bother linking
132241
return obj
133242
end
243+
244+
@static if VERSION >= v"1.11.0-"
245+
if !ondisk_hit && path !== nothing && disk_cache_enabled()
246+
@debug "Writing out on-disk cache" job path
247+
tmppath, io = mktemp(;cleanup=false)
248+
entry = DiskCacheEntry(src.specTypes, cfg, asm)
249+
serialize(io, entry)
250+
close(io)
251+
# atomic move
252+
mkpath(dirname(path))
253+
Base.rename(tmppath, path, force=true)
254+
end
255+
end
256+
134257
obj = linker(job, asm)
135258

136259
if ci === nothing

src/jlgen.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,27 @@ macro in_world(world, ex)
587587
end
588588
end
589589

590+
"""
591+
precompile(job::CompilerJob)
592+
593+
Compile the GPUCompiler job. In particular this will run inference using the foreign
594+
abstract interpreter.
595+
"""
596+
function Base.precompile(@nospecialize(job::CompilerJob))
597+
if job.source.def.primary_world > job.world || job.world > job.source.def.deleted_world
598+
error("Cannot compile $(job.source) for world $(job.world); method is only valid in worlds $(job.source.def.primary_world) to $(job.source.def.deleted_world)")
599+
end
600+
601+
# populate the cache
602+
interp = get_interpreter(job)
603+
cache = CC.code_cache(interp)
604+
if ci_cache_lookup(cache, job.source, job.world, job.world) === nothing
605+
ci_cache_populate(interp, cache, job.source, job.world, job.world)
606+
return ci_cache_lookup(cache, job.source, job.world, job.world) !== nothing
607+
end
608+
return true
609+
end
610+
590611
function compile_method_instance(@nospecialize(job::CompilerJob))
591612
if job.source.def.primary_world > job.world || job.world > job.source.def.deleted_world
592613
error("Cannot compile $(job.source) for world $(job.world); method is only valid in worlds $(job.source.def.primary_world) to $(job.source.def.deleted_world)")

test/native_tests.jl

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -549,20 +549,21 @@ precompile_test_harness("Inference caching") do load_path
549549
import GPUCompiler
550550
using PrecompileTools
551551

552-
function kernel()
552+
function kernel(A, x)
553+
A[1] = x
553554
return
554555
end
555556

556557
let
557-
job, _ = NativeCompiler.create_job(kernel, ())
558-
GPUCompiler.code_typed(job)
558+
job, _ = NativeCompiler.create_job(kernel, (Vector{Int}, Int))
559+
precompile(job)
559560
end
560561

561562
# identity is foreign
562563
@setup_workload begin
563564
job, _ = NativeCompiler.create_job(identity, (Int,))
564565
@compile_workload begin
565-
GPUCompiler.code_typed(job)
566+
precompile(job)
566567
end
567568
end
568569
end) |> string)
@@ -578,20 +579,35 @@ precompile_test_harness("Inference caching") do load_path
578579
job, _ = NativeCompiler.create_job(identity, (Int,))
579580
GPUCompiler.ci_cache_token(job)
580581
end
581-
ci = isdefined(identity_mi, :cache) ? identity_mi.cache : nothing
582-
while ci !== nothing
583-
@test ci.owner !== token
584-
ci = isdefined(ci, :next) ? ci.next : nothing
585-
end
582+
@test !check_presence(identity_mi, token)
586583

587584
using InferenceCaching
588585

589586
# Check that kernel survived
590-
kernel_mi = GPUCompiler.methodinstance(typeof(InferenceCaching.kernel), Tuple{})
587+
kernel_mi = GPUCompiler.methodinstance(typeof(InferenceCaching.kernel), Tuple{Vector{Int}, Int})
591588
@test check_presence(kernel_mi, token)
592589

593590
# check that identity survived
594591
@test check_presence(identity_mi, token)
592+
593+
GPUCompiler.clear_disk_cache!()
594+
@test GPUCompiler.disk_cache_enabled() == false
595+
596+
GPUCompiler.enable_disk_cache!()
597+
@test GPUCompiler.disk_cache_enabled() == true
598+
599+
job, _ = NativeCompiler.create_job(InferenceCaching.kernel, (Vector{Int}, Int))
600+
@assert job.source == kernel_mi
601+
ci = GPUCompiler.ci_cache_lookup(GPUCompiler.ci_cache(job), job.source, job.world, job.world)
602+
@assert ci !== nothing
603+
@assert ci.inferred !== nothing
604+
path = GPUCompiler.cache_file(ci, job.config)
605+
@test path !== nothing
606+
@test !ispath(path)
607+
NativeCompiler.cached_execution(InferenceCaching.kernel, (Vector{Int}, Int))
608+
@test ispath(path)
609+
GPUCompiler.clear_disk_cache!()
610+
@test !ispath(path)
595611
end
596612
end
597613

test/native_testsetup.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,22 @@ function code_execution(@nospecialize(func), @nospecialize(types); kwargs...)
7171
end
7272
end
7373

74+
const runtime_cache = Dict{Any, Any}()
75+
76+
function compiler(job)
77+
JuliaContext() do ctx
78+
GPUCompiler.compile(:asm, job, validate=false)
79+
end
80+
end
81+
82+
function linker(job, asm)
83+
asm
84+
end
85+
86+
# simulates cached codegen
87+
function cached_execution(@nospecialize(func), @nospecialize(types); kwargs...)
88+
job, kwargs = create_job(func, types; kwargs...)
89+
GPUCompiler.cached_compilation(runtime_cache, job.source, job.config, compiler, linker)
90+
end
91+
7492
end

test/ptx_tests.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,14 +339,14 @@ precompile_test_harness("Inference caching") do load_path
339339

340340
let
341341
job, _ = PTXCompiler.create_job(kernel, ())
342-
GPUCompiler.code_typed(job)
342+
precompile(job)
343343
end
344344

345345
# identity is foreign
346346
@setup_workload begin
347347
job, _ = PTXCompiler.create_job(identity, (Int,))
348348
@compile_workload begin
349-
GPUCompiler.code_typed(job)
349+
precompile(job)
350350
end
351351
end
352352
end) |> string)

0 commit comments

Comments
 (0)