From 4b58767f9569e8e901fd3f366515c9c093ee5fff Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Wed, 8 Jan 2025 16:46:25 -0300 Subject: [PATCH 1/2] Allow disabling the linking of libdevice in CUDACompilerParams --- src/compiler/compilation.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl index c105bc0b77..aafbcefe78 100644 --- a/src/compiler/compilation.jl +++ b/src/compiler/compilation.jl @@ -3,12 +3,14 @@ Base.@kwdef struct CUDACompilerParams <: AbstractCompilerParams cap::VersionNumber ptx::VersionNumber + link_libdevice::Bool # Used by Reactant.jl end +CUDACompilerParams(;cap::VersionNumber, ptx::VersionNumber) = CUDACompilerParams(cap=cap, ptx=ptx, link_libdevice=true) function Base.hash(params::CUDACompilerParams, h::UInt) h = hash(params.cap, h) h = hash(params.ptx, h) - + h = hash(params.link_libdevice, h) return h end @@ -27,6 +29,9 @@ GPUCompiler.isintrinsic(@nospecialize(job::CUDACompilerJob), fn::String) = # link libdevice function GPUCompiler.link_libraries!(@nospecialize(job::CUDACompilerJob), mod::LLVM.Module, undefined_fns::Vector{String}) + if !job.config.params.link_libdevice + return # Don't link libdevice, used by Reactant.jl to raise NVVM intrinsics into MLIR + end # only link if there's undefined __nv_ functions if !any(fn->startswith(fn, "__nv_"), undefined_fns) return From b3364d3d606b9495a4378e52c9792a2fb82a5215 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 6 Feb 2025 16:14:59 -0300 Subject: [PATCH 2/2] Address some comments --- src/compiler/compilation.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl index aafbcefe78..029bbeb40e 100644 --- a/src/compiler/compilation.jl +++ b/src/compiler/compilation.jl @@ -3,10 +3,9 @@ Base.@kwdef struct CUDACompilerParams <: AbstractCompilerParams cap::VersionNumber ptx::VersionNumber - link_libdevice::Bool # Used by Reactant.jl + link_libdevice::Bool = true # Used by Reactant.jl end -CUDACompilerParams(;cap::VersionNumber, ptx::VersionNumber) = CUDACompilerParams(cap=cap, ptx=ptx, link_libdevice=true) function Base.hash(params::CUDACompilerParams, h::UInt) h = hash(params.cap, h) h = hash(params.ptx, h) @@ -20,18 +19,19 @@ const CUDACompilerJob = CompilerJob{PTXCompilerTarget,CUDACompilerParams} GPUCompiler.runtime_module(@nospecialize(job::CUDACompilerJob)) = CUDA # filter out functions from libdevice and cudadevrt -GPUCompiler.isintrinsic(@nospecialize(job::CUDACompilerJob), fn::String) = - invoke(GPUCompiler.isintrinsic, - Tuple{CompilerJob{PTXCompilerTarget}, typeof(fn)}, - job, fn) || - fn == "__nvvm_reflect" || startswith(fn, "cuda") +function GPUCompiler.isintrinsic(@nospecialize(job::CUDACompilerJob), fn::String) + is_intrinsic = invoke(GPUCompiler.isintrinsic, + Tuple{CompilerJob{PTXCompilerTarget}, typeof(fn)}, job, fn) + is_intrinsic |= fn == "__nvvm_reflect" + is_intrinsic |= startswith(fn, "cuda") + is_intrinsic |= !job.config.params.link_libdevice ? startswith(fn, "__nv_") : false # Reactant.jl wants to handle __nv_ functions + return is_intrinsic +end # link libdevice function GPUCompiler.link_libraries!(@nospecialize(job::CUDACompilerJob), mod::LLVM.Module, undefined_fns::Vector{String}) - if !job.config.params.link_libdevice - return # Don't link libdevice, used by Reactant.jl to raise NVVM intrinsics into MLIR - end + job.config.params.link_libdevice || return # only link if there's undefined __nv_ functions if !any(fn->startswith(fn, "__nv_"), undefined_fns) return