Skip to content

Commit 557def9

Browse files
committed
Use new functionality to cache compiler configuraton.
1 parent 66dfab6 commit 557def9

File tree

2 files changed

+27
-9
lines changed

2 files changed

+27
-9
lines changed

src/compiler/execution.jl

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -296,17 +296,17 @@ The output of this function is automatically cached, i.e. you can simply call `c
296296
in a hot path without degrading performance. New code will be generated automatically, when
297297
when function changes, or when different types or keyword arguments are provided.
298298
"""
299-
function cufunction(f::F, tt::TT=Tuple{}; name=nothing, always_inline=false, kwargs...) where {F,TT}
299+
function cufunction(f::F, tt::TT=Tuple{}; kwargs...) where {F,TT}
300300
cuda = active_state()
301+
302+
# compile the function
301303
cache = cufunction_cache(cuda.context)
302-
target = CUDACompilerTarget(cuda.device; kwargs...)
303-
params = CUDACompilerParams()
304-
config = CompilerConfig(target, params; kernel=true, name, always_inline)
304+
config = cufunction_compiler(cuda.device; kwargs...)::CUDACompilerConfig
305305
fun = GPUCompiler.cached_compilation(cache, config, F, tt,
306-
cufunction_compile,
307-
cufunction_link)
308-
# compilation is cached on the function type, so we can only create a kernel object here
309-
# (as it captures the function _instance_). this allocates, so use another cache level.
306+
cufunction_compile, cufunction_link)
307+
308+
# create a callable object that captures the function instance. we don't need to think
309+
# about world age here, as GPUCompiler already does and will return a different object
310310
h = hash(fun, hash(f, hash(tt)))
311311
kernel = get(_cufunction_kernel_cache, h, nothing)
312312
if kernel === nothing
@@ -331,6 +331,23 @@ function cufunction_cache(ctx::CuContext)
331331
return subcache
332332
end
333333

334+
const _cufunction_compiler_cache = Dict{UInt, CUDACompilerConfig}()
335+
function cufunction_compiler(dev; kwargs...)
336+
h = hash(dev, hash(kwargs))
337+
config = get(_cufunction_compiler_cache, h, nothing)
338+
if config === nothing
339+
config = cufunction_compiler_create(dev; kwargs...)
340+
_cufunction_compiler_cache[h] = config
341+
end
342+
return config
343+
end
344+
@noinline function cufunction_compiler_create(dev; name=nothing, always_inline=false, kwargs...)
345+
# TODO: merge with `device_properties`
346+
target = CUDACompilerTarget(dev; kwargs...)
347+
params = CUDACompilerParams()
348+
CompilerConfig(target, params; kernel=true, name, always_inline)
349+
end
350+
334351
const _cufunction_kernel_cache = Dict{UInt, Any}();
335352

336353
# helper to run a binary and collect all relevant output

src/compiler/gpucompiler.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ end
5151

5252
struct CUDACompilerParams <: AbstractCompilerParams end
5353

54-
CUDACompilerJob = CompilerJob{PTXCompilerTarget,CUDACompilerParams}
54+
const CUDACompilerConfig = CompilerConfig{PTXCompilerTarget, CUDACompilerParams}
55+
const CUDACompilerJob = CompilerJob{PTXCompilerTarget,CUDACompilerParams}
5556

5657
GPUCompiler.runtime_module(@nospecialize(job::CUDACompilerJob)) = CUDA
5758

0 commit comments

Comments
 (0)