Adapt to GPUCompiler changes.

maleadt · maleadt · commit 66dfab6b2bd7 · 2023-03-14T19:52:55.000+01:00
diff --git a/Manifest.toml b/Manifest.toml
@@ -105,9 +105,9 @@ version = "0.1.4"
 
 [[GPUCompiler]]
 deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
-git-tree-sha1 = "19d693666a304e8c371798f4900f7435558c7cde"
+git-tree-sha1 = "fd6431121f31fed05a5386ac88b9bb3f97fdfa69"
 uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
-version = "0.17.3"
+version = "0.18.0"
 
 [[InteractiveUtils]]
 deps = ["Markdown"]
diff --git a/Project.toml b/Project.toml
@@ -39,7 +39,7 @@ CUDA_Runtime_Discovery = "0.1"
 CUDA_Runtime_jll = "0.4"
 ExprTools = "0.1"
 GPUArrays = "8.6"
-GPUCompiler = "0.17"
+GPUCompiler = "0.18"
 LLVM = "4.15"
 Preferences = "1"
 Random123 = "1.2"
diff --git a/src/compiler/execution.jl b/src/compiler/execution.jl
@@ -299,11 +299,10 @@ when function changes, or when different types or keyword arguments are provided
 function cufunction(f::F, tt::TT=Tuple{}; name=nothing, always_inline=false, kwargs...) where {F,TT}
     cuda = active_state()
     cache = cufunction_cache(cuda.context)
-    source = FunctionSpec(f, tt, true, name)
     target = CUDACompilerTarget(cuda.device; kwargs...)
     params = CUDACompilerParams()
-    job = CompilerJob(target, source, params; always_inline)
-    fun = GPUCompiler.cached_compilation(cache, job,
+    config = CompilerConfig(target, params; kernel=true, name, always_inline)
+    fun = GPUCompiler.cached_compilation(cache, config, F, tt,
                                          cufunction_compile,
                                          cufunction_link)
     # compilation is cached on the function type, so we can only create a kernel object here
@@ -400,7 +399,7 @@ function cufunction_compile(@nospecialize(job::CompilerJob), ctx)
         push!(ptxas_opts, "--compile-only")
     end
 
-    arch = "sm_$(job.target.cap.major)$(job.target.cap.minor)"
+    arch = "sm_$(job.config.target.cap.major)$(job.config.target.cap.minor)"
 
     # compile to machine code
     # NOTE: we use tempname since mktemp doesn't support suffixes, and mktempdir is slow
@@ -516,7 +515,7 @@ a callable kernel object. Device-side equivalent of [`CUDA.cufunction`](@ref).
 No keyword arguments are supported.
 """
 @inline function dynamic_cufunction(f::F, tt::Type=Tuple{}) where {F <: Function}
-    fptr = GPUCompiler.deferred_codegen(Val(f), Val(tt))
+    fptr = GPUCompiler.deferred_codegen(Val(F), Val(tt))
     fun = CuDeviceFunction(fptr)
     DeviceKernel{F,tt}(f, fun, kernel_state())
 end
diff --git a/src/compiler/gpucompiler.jl b/src/compiler/gpucompiler.jl
@@ -67,7 +67,7 @@ function GPUCompiler.link_libraries!(@nospecialize(job::CUDACompilerJob), mod::L
     invoke(GPUCompiler.link_libraries!,
            Tuple{CompilerJob{PTXCompilerTarget}, typeof(mod), typeof(undefined_fns)},
            job, mod, undefined_fns)
-    link_libdevice!(mod, job.target.cap, undefined_fns)
+    link_libdevice!(mod, job.config.target.cap, undefined_fns)
 end
 
 GPUCompiler.method_table(@nospecialize(job::CUDACompilerJob)) = method_table
diff --git a/src/compiler/reflection.jl b/src/compiler/reflection.jl
@@ -44,18 +44,19 @@ See also: [`@device_code_sass`](@ref)
 """
 function code_sass(io::IO, @nospecialize(func), @nospecialize(types), kernel::Bool=true;
                    verbose::Bool=false, always_inline::Bool=false, kwargs...)
-    tt = Base.to_tuple_type(types)
+    source = FunctionSpec(typeof(func), Base.to_tuple_type(types))
     target = CUDACompilerTarget(device(); kwargs...)
     params = CUDACompilerParams()
-    job = CompilerJob(target, FunctionSpec(func, tt, kernel), params; always_inline)
+    config = CompilerConfig(target, params; kernel)
+    job = CompilerJob(source, config)
     code_sass(io, job; verbose=verbose)
 end
 
 # multiple subscribers aren't supported, so make sure we only call CUPTI once
 const cupti_lock = ReentrantLock()
 
 function code_sass(io::IO, job::CUDACompilerJob; verbose::Bool=false)
-    if !job.source.kernel
+    if !job.config.kernel
         error("Can only generate SASS code for kernel functions")
     end
 
@@ -126,10 +127,11 @@ for method in (:code_typed, :code_warntype, :code_llvm, :code_native)
                          kernel::Bool=false, minthreads=nothing, maxthreads=nothing,
                          blocks_per_sm=nothing, maxregs=nothing, always_inline::Bool=false,
                          kwargs...)
-            source = FunctionSpec(func, Base.to_tuple_type(types), kernel)
+            source = FunctionSpec(typeof(func), Base.to_tuple_type(types))
             target = CUDACompilerTarget(device(); minthreads, maxthreads, blocks_per_sm, maxregs)
             params = CUDACompilerParams()
-            job = CompilerJob(target, source, params; always_inline)
+            config = CompilerConfig(target, params; kernel, always_inline)
+            job = CompilerJob(source, config)
             GPUCompiler.$method($(args...); kwargs...)
         end
         $method(@nospecialize(func), @nospecialize(types); kwargs...) =
@@ -145,10 +147,11 @@ const code_ptx = code_native
 Return a type `r` such that `f(args...)::r` where `args::tt`.
 """
 function return_type(@nospecialize(func), @nospecialize(tt))
-    source = FunctionSpec(func, tt, true)
+    source = FunctionSpec(typeof(func), tt)
     target = CUDACompilerTarget(device())
     params = CUDACompilerParams()
-    job = CompilerJob(target, source, params)
+    config = CompilerConfig(target, params; kernel=true)
+    job = CompilerJob(source, config)
     interp = GPUCompiler.get_interpreter(job)
     if VERSION >= v"1.8-"
         sig = Base.signature_type(func, tt)
diff --git a/src/device/runtime.jl b/src/device/runtime.jl
@@ -10,14 +10,16 @@ GPUCompiler.reset_runtime()
 
 # load or build the runtime for the most likely compilation job given a compute capability
 function precompile_runtime(caps=CUDA.llvm_compat(LLVM.version()).cap)
-    dummy_source = FunctionSpec(()->return, Tuple{})
+    f = ()->return
+    dummy_source = FunctionSpec(typeof(f), Tuple{})
     params = CUDACompilerParams()
     JuliaContext() do ctx
         for cap in caps
             # NOTE: this often runs when we don't have a functioning set-up,
             #       so we don't use CUDACompilerTarget(...) which requires NVML
-            target = PTXCompilerTarget(; cap=cap)
-            job = CompilerJob(target, dummy_source, params)
+            target = PTXCompilerTarget(; cap)
+            config = CompilerConfig(target, params)
+            job = CompilerJob(dummy_source, config)
             GPUCompiler.load_runtime(job; ctx)
         end
     end