Skip to content

Commit 9c1a20b

Browse files
committed
Add always_inline parameter to ptx backend
This add the parameter `always_inline` to force the inlining of functions in the ptx backend.
1 parent 830ae86 commit 9c1a20b

File tree

3 files changed

+46
-2
lines changed

3 files changed

+46
-2
lines changed

src/ptx.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Base.@kwdef struct PTXCompilerTarget <: AbstractCompilerTarget
2121
maxthreads::Union{Nothing,Int,NTuple{<:Any,Int}} = nothing
2222
blocks_per_sm::Union{Nothing,Int} = nothing
2323
maxregs::Union{Nothing,Int} = nothing
24+
always_inline::Bool = false
2425
end
2526

2627
function Base.hash(target::PTXCompilerTarget, h::UInt)
@@ -35,6 +36,7 @@ function Base.hash(target::PTXCompilerTarget, h::UInt)
3536
h = hash(target.maxthreads, h)
3637
h = hash(target.blocks_per_sm, h)
3738
h = hash(target.maxregs, h)
39+
h = hash(target.always_inline, h)
3840

3941
h
4042
end
@@ -74,6 +76,7 @@ function Base.show(io::IO, @nospecialize(job::CompilerJob{PTXCompilerTarget}))
7476
job.target.maxthreads !== nothing && print(io, ", maxthreads=$(job.target.maxthreads)")
7577
job.target.blocks_per_sm !== nothing && print(io, ", blocks_per_sm=$(job.target.blocks_per_sm)")
7678
job.target.maxregs !== nothing && print(io, ", maxregs=$(job.target.maxregs)")
79+
job.target.always_inline !== nothing && print(io, ", always_inline=$(job.target.always_inline)")
7780
end
7881

7982
const ptx_intrinsics = ("vprintf", "__assertfail", "malloc", "free")
@@ -86,6 +89,20 @@ runtime_slug(@nospecialize(job::CompilerJob{PTXCompilerTarget})) =
8689
"-debuginfo=$(Int(llvm_debug_info(job)))" *
8790
"-exitable=$(job.target.exitable)"
8891

92+
function optimization_params(@nospecialize(job::CompilerJob{PTXCompilerTarget}))
93+
kwargs = NamedTuple()
94+
95+
if VERSION < v"1.8.0-DEV.486"
96+
kwargs = (kwargs..., unoptimize_throw_blocks=false)
97+
end
98+
99+
if job.target.always_inline
100+
kwargs = (kwargs..., inline_cost_threshold=typemax(Int))
101+
end
102+
103+
return OptimizationParams(;kwargs...)
104+
end
105+
89106
function process_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), mod::LLVM.Module)
90107
ctx = context(mod)
91108

test/definitions/ptx.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,12 @@ GPUCompiler.runtime_module(::PTXCompilerJob) = PTXTestRuntime
3939

4040
function ptx_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false,
4141
minthreads=nothing, maxthreads=nothing, blocks_per_sm=nothing,
42-
maxregs=nothing, kwargs...)
42+
maxregs=nothing, always_inline=false, kwargs...)
4343
source = FunctionSpec(func, Base.to_tuple_type(types), kernel)
4444
target = PTXCompilerTarget(cap=v"7.0",
4545
minthreads=minthreads, maxthreads=maxthreads,
46-
blocks_per_sm=blocks_per_sm, maxregs=maxregs)
46+
blocks_per_sm=blocks_per_sm, maxregs=maxregs,
47+
always_inline=always_inline)
4748
params = TestCompilerParams()
4849
CompilerJob(target, source, params), kwargs
4950
end

test/ptx.jl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,32 @@ end
174174
end
175175
end
176176

177+
@testset "always_inline" begin
178+
@eval f_expensive(x) = $(foldl((e, _) -> :(sink($e) + sink(x)), 1:100; init=:x))
179+
function g(x)
180+
f_expensive(x)
181+
return
182+
end
183+
function h(x)
184+
f_expensive(x)
185+
return
186+
end
187+
188+
asm = sprint(io->ptx_code_native(io, g, Tuple{Int64}; kernel=true))
189+
@test occursin(r"\.func .*julia_f_expensive", asm)
190+
191+
asm = sprint(io->ptx_code_native(io, g, Tuple{Int64};
192+
kernel=true, always_inline=true))
193+
@test !occursin(r"\.func .*julia_f_expensive", asm)
194+
195+
asm = sprint(io->ptx_code_native(io, h, Tuple{Int64};
196+
kernel=true, always_inline=true))
197+
@test !occursin(r"\.func .*julia_f_expensive", asm)
198+
199+
asm = sprint(io->ptx_code_native(io, h, Tuple{Int64}; kernel=true))
200+
@test occursin(r"\.func .*julia_f_expensive", asm)
201+
end
202+
177203
@testset "child function reuse" begin
178204
# bug: depending on a child function from multiple parents resulted in
179205
# the child only being present once

0 commit comments

Comments
 (0)