Skip to content

Commit 66dfab6

Browse files
committed
Adapt to GPUCompiler changes.
1 parent 940d23d commit 66dfab6

File tree

6 files changed

+23
-19
lines changed

6 files changed

+23
-19
lines changed

Manifest.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,9 @@ version = "0.1.4"
105105

106106
[[GPUCompiler]]
107107
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
108-
git-tree-sha1 = "19d693666a304e8c371798f4900f7435558c7cde"
108+
git-tree-sha1 = "fd6431121f31fed05a5386ac88b9bb3f97fdfa69"
109109
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
110-
version = "0.17.3"
110+
version = "0.18.0"
111111

112112
[[InteractiveUtils]]
113113
deps = ["Markdown"]

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ CUDA_Runtime_Discovery = "0.1"
3939
CUDA_Runtime_jll = "0.4"
4040
ExprTools = "0.1"
4141
GPUArrays = "8.6"
42-
GPUCompiler = "0.17"
42+
GPUCompiler = "0.18"
4343
LLVM = "4.15"
4444
Preferences = "1"
4545
Random123 = "1.2"

src/compiler/execution.jl

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,11 +299,10 @@ when function changes, or when different types or keyword arguments are provided
299299
function cufunction(f::F, tt::TT=Tuple{}; name=nothing, always_inline=false, kwargs...) where {F,TT}
300300
cuda = active_state()
301301
cache = cufunction_cache(cuda.context)
302-
source = FunctionSpec(f, tt, true, name)
303302
target = CUDACompilerTarget(cuda.device; kwargs...)
304303
params = CUDACompilerParams()
305-
job = CompilerJob(target, source, params; always_inline)
306-
fun = GPUCompiler.cached_compilation(cache, job,
304+
config = CompilerConfig(target, params; kernel=true, name, always_inline)
305+
fun = GPUCompiler.cached_compilation(cache, config, F, tt,
307306
cufunction_compile,
308307
cufunction_link)
309308
# compilation is cached on the function type, so we can only create a kernel object here
@@ -400,7 +399,7 @@ function cufunction_compile(@nospecialize(job::CompilerJob), ctx)
400399
push!(ptxas_opts, "--compile-only")
401400
end
402401

403-
arch = "sm_$(job.target.cap.major)$(job.target.cap.minor)"
402+
arch = "sm_$(job.config.target.cap.major)$(job.config.target.cap.minor)"
404403

405404
# compile to machine code
406405
# NOTE: we use tempname since mktemp doesn't support suffixes, and mktempdir is slow
@@ -516,7 +515,7 @@ a callable kernel object. Device-side equivalent of [`CUDA.cufunction`](@ref).
516515
No keyword arguments are supported.
517516
"""
518517
@inline function dynamic_cufunction(f::F, tt::Type=Tuple{}) where {F <: Function}
519-
fptr = GPUCompiler.deferred_codegen(Val(f), Val(tt))
518+
fptr = GPUCompiler.deferred_codegen(Val(F), Val(tt))
520519
fun = CuDeviceFunction(fptr)
521520
DeviceKernel{F,tt}(f, fun, kernel_state())
522521
end

src/compiler/gpucompiler.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ function GPUCompiler.link_libraries!(@nospecialize(job::CUDACompilerJob), mod::L
6767
invoke(GPUCompiler.link_libraries!,
6868
Tuple{CompilerJob{PTXCompilerTarget}, typeof(mod), typeof(undefined_fns)},
6969
job, mod, undefined_fns)
70-
link_libdevice!(mod, job.target.cap, undefined_fns)
70+
link_libdevice!(mod, job.config.target.cap, undefined_fns)
7171
end
7272

7373
GPUCompiler.method_table(@nospecialize(job::CUDACompilerJob)) = method_table

src/compiler/reflection.jl

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,19 @@ See also: [`@device_code_sass`](@ref)
4444
"""
4545
function code_sass(io::IO, @nospecialize(func), @nospecialize(types), kernel::Bool=true;
4646
verbose::Bool=false, always_inline::Bool=false, kwargs...)
47-
tt = Base.to_tuple_type(types)
47+
source = FunctionSpec(typeof(func), Base.to_tuple_type(types))
4848
target = CUDACompilerTarget(device(); kwargs...)
4949
params = CUDACompilerParams()
50-
job = CompilerJob(target, FunctionSpec(func, tt, kernel), params; always_inline)
50+
config = CompilerConfig(target, params; kernel)
51+
job = CompilerJob(source, config)
5152
code_sass(io, job; verbose=verbose)
5253
end
5354

5455
# multiple subscribers aren't supported, so make sure we only call CUPTI once
5556
const cupti_lock = ReentrantLock()
5657

5758
function code_sass(io::IO, job::CUDACompilerJob; verbose::Bool=false)
58-
if !job.source.kernel
59+
if !job.config.kernel
5960
error("Can only generate SASS code for kernel functions")
6061
end
6162

@@ -126,10 +127,11 @@ for method in (:code_typed, :code_warntype, :code_llvm, :code_native)
126127
kernel::Bool=false, minthreads=nothing, maxthreads=nothing,
127128
blocks_per_sm=nothing, maxregs=nothing, always_inline::Bool=false,
128129
kwargs...)
129-
source = FunctionSpec(func, Base.to_tuple_type(types), kernel)
130+
source = FunctionSpec(typeof(func), Base.to_tuple_type(types))
130131
target = CUDACompilerTarget(device(); minthreads, maxthreads, blocks_per_sm, maxregs)
131132
params = CUDACompilerParams()
132-
job = CompilerJob(target, source, params; always_inline)
133+
config = CompilerConfig(target, params; kernel, always_inline)
134+
job = CompilerJob(source, config)
133135
GPUCompiler.$method($(args...); kwargs...)
134136
end
135137
$method(@nospecialize(func), @nospecialize(types); kwargs...) =
@@ -145,10 +147,11 @@ const code_ptx = code_native
145147
Return a type `r` such that `f(args...)::r` where `args::tt`.
146148
"""
147149
function return_type(@nospecialize(func), @nospecialize(tt))
148-
source = FunctionSpec(func, tt, true)
150+
source = FunctionSpec(typeof(func), tt)
149151
target = CUDACompilerTarget(device())
150152
params = CUDACompilerParams()
151-
job = CompilerJob(target, source, params)
153+
config = CompilerConfig(target, params; kernel=true)
154+
job = CompilerJob(source, config)
152155
interp = GPUCompiler.get_interpreter(job)
153156
if VERSION >= v"1.8-"
154157
sig = Base.signature_type(func, tt)

src/device/runtime.jl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,16 @@ GPUCompiler.reset_runtime()
1010

1111
# load or build the runtime for the most likely compilation job given a compute capability
1212
function precompile_runtime(caps=CUDA.llvm_compat(LLVM.version()).cap)
13-
dummy_source = FunctionSpec(()->return, Tuple{})
13+
f = ()->return
14+
dummy_source = FunctionSpec(typeof(f), Tuple{})
1415
params = CUDACompilerParams()
1516
JuliaContext() do ctx
1617
for cap in caps
1718
# NOTE: this often runs when we don't have a functioning set-up,
1819
# so we don't use CUDACompilerTarget(...) which requires NVML
19-
target = PTXCompilerTarget(; cap=cap)
20-
job = CompilerJob(target, dummy_source, params)
20+
target = PTXCompilerTarget(; cap)
21+
config = CompilerConfig(target, params)
22+
job = CompilerJob(dummy_source, config)
2123
GPUCompiler.load_runtime(job; ctx)
2224
end
2325
end

0 commit comments

Comments
 (0)