@@ -21,6 +21,7 @@ Base.@kwdef struct PTXCompilerTarget <: AbstractCompilerTarget
21
21
maxthreads:: Union{Nothing,Int,NTuple{<:Any,Int}} = nothing
22
22
blocks_per_sm:: Union{Nothing,Int} = nothing
23
23
maxregs:: Union{Nothing,Int} = nothing
24
+ always_inline:: Bool = false
24
25
end
25
26
26
27
function Base. hash (target:: PTXCompilerTarget , h:: UInt )
@@ -35,6 +36,7 @@ function Base.hash(target::PTXCompilerTarget, h::UInt)
35
36
h = hash (target. maxthreads, h)
36
37
h = hash (target. blocks_per_sm, h)
37
38
h = hash (target. maxregs, h)
39
+ h = hash (target. always_inline, h)
38
40
39
41
h
40
42
end
@@ -74,6 +76,7 @@ function Base.show(io::IO, @nospecialize(job::CompilerJob{PTXCompilerTarget}))
74
76
job. target. maxthreads != = nothing && print (io, " , maxthreads=$(job. target. maxthreads) " )
75
77
job. target. blocks_per_sm != = nothing && print (io, " , blocks_per_sm=$(job. target. blocks_per_sm) " )
76
78
job. target. maxregs != = nothing && print (io, " , maxregs=$(job. target. maxregs) " )
79
+ job. target. always_inline != = nothing && print (io, " , always_inline=$(job. target. always_inline) " )
77
80
end
78
81
79
82
const ptx_intrinsics = (" vprintf" , " __assertfail" , " malloc" , " free" )
@@ -86,6 +89,20 @@ runtime_slug(@nospecialize(job::CompilerJob{PTXCompilerTarget})) =
86
89
" -debuginfo=$(Int (llvm_debug_info (job))) " *
87
90
" -exitable=$(job. target. exitable) "
88
91
92
+ function optimization_params (@nospecialize (job:: CompilerJob{PTXCompilerTarget} ))
93
+ kwargs = NamedTuple ()
94
+
95
+ if VERSION < v " 1.8.0-DEV.486"
96
+ kwargs = (kwargs... , unoptimize_throw_blocks= false )
97
+ end
98
+
99
+ if job. target. always_inline
100
+ kwargs = (kwargs... , inline_cost_threshold= typemax (Int))
101
+ end
102
+
103
+ return OptimizationParams (;kwargs... )
104
+ end
105
+
89
106
function process_module! (@nospecialize (job:: CompilerJob{PTXCompilerTarget} ), mod:: LLVM.Module )
90
107
ctx = context (mod)
91
108
0 commit comments