Skip to content

Commit e18cdd2

Browse files
authored
Adapt to LLVM 17. (#583)
1 parent 104629e commit e18cdd2

File tree

6 files changed

+115
-61
lines changed

6 files changed

+115
-61
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ InteractiveUtils = "1"
1919
Libdl = "1"
2020
Logging = "1"
2121
UUIDs = "1"
22-
LLVM = "6.6"
22+
LLVM = "7.1"
2323
Scratch = "1"
2424
TimerOutputs = "0.5"
2525
julia = "1.8"

src/driver.jl

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -325,16 +325,22 @@ const __llvm_initialized = Ref(false)
325325
# global variables. this makes sure that the optimizer can, e.g.,
326326
# rewrite function signatures.
327327
if toplevel
328-
# TODO: there's no good API to use internalize with the new pass manager yet
329-
@dispose pm=ModulePassManager() begin
330-
exports = collect(values(jobs))
331-
for gvar in globals(ir)
332-
if linkage(gvar) == LLVM.API.LLVMExternalLinkage
333-
push!(exports, LLVM.name(gvar))
334-
end
328+
preserved_gvs = collect(values(jobs))
329+
for gvar in globals(ir)
330+
if linkage(gvar) == LLVM.API.LLVMExternalLinkage
331+
push!(preserved_gvs, LLVM.name(gvar))
332+
end
333+
end
334+
if use_newpm && LLVM.version() >= v"17"
335+
@dispose pb=PassBuilder() mpm=NewPMModulePassManager(pb) begin
336+
add!(mpm, InternalizePass(InternalizePassOptions(; preserved_gvs)))
337+
run!(mpm, ir)
338+
end
339+
else
340+
@dispose pm=ModulePassManager() begin
341+
internalize!(pm, preserved_gvs)
342+
run!(pm, ir)
335343
end
336-
internalize!(pm, exports)
337-
run!(pm, ir)
338344
end
339345
end
340346

src/irgen.jl

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -95,27 +95,29 @@ function irgen(@nospecialize(job::CompilerJob))
9595
end
9696
end
9797

98-
# TODO: there's no good API to use internalize with the new pass manager yet
99-
@dispose pm=ModulePassManager() begin
100-
global current_job
101-
current_job = job
102-
103-
linkage!(entry, LLVM.API.LLVMExternalLinkage)
104-
105-
# internalize all functions, but keep exported global variables
106-
exports = String[LLVM.name(entry)]
107-
for gvar in globals(mod)
108-
push!(exports, LLVM.name(gvar))
98+
# internalize all functions and, but keep exported global variables.
99+
linkage!(entry, LLVM.API.LLVMExternalLinkage)
100+
preserved_gvs = String[LLVM.name(entry)]
101+
for gvar in globals(mod)
102+
push!(preserved_gvs, LLVM.name(gvar))
103+
end
104+
if use_newpm && LLVM.version() >= v"17"
105+
@dispose pb=PassBuilder() mpm=NewPMModulePassManager(pb) begin
106+
add!(mpm, InternalizePass(InternalizePassOptions(; preserved_gvs)))
107+
add!(mpm, AlwaysInlinerPass())
108+
run!(mpm, mod)
109+
end
110+
else
111+
@dispose pm=ModulePassManager() begin
112+
internalize!(pm, preserved_gvs)
113+
always_inliner!(pm)
114+
run!(pm, mod)
109115
end
110-
internalize!(pm, exports)
111-
112-
# inline llvmcall bodies
113-
always_inliner!(pm)
114-
115-
can_throw(job) || add!(pm, ModulePass("LowerThrow", lower_throw!))
116-
117-
run!(pm, mod)
118116
end
117+
118+
global current_job
119+
current_job = job
120+
can_throw(job) || lower_throw!(mod)
119121
end
120122

121123
return mod, compiled

src/optim.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ end
1313
## new pm
1414

1515
function optimize_newpm!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level)
16-
triple = llvm_triple(job.config.target)
1716
tm = llvm_machine(job.config.target)
1817

1918
global current_job
@@ -34,7 +33,9 @@ function buildNewPMPipeline!(mpm, @nospecialize(job::CompilerJob), opt_level)
3433
buildEarlySimplificationPipeline(mpm, job, opt_level)
3534
add!(mpm, AlwaysInlinerPass())
3635
buildEarlyOptimizerPipeline(mpm, job, opt_level)
37-
add!(mpm, LowerSIMDLoopPass())
36+
if VERSION < v"1.10"
37+
add!(mpm, LowerSIMDLoopPass())
38+
end
3839
add!(mpm, NewPMFunctionPassManager) do fpm
3940
buildLoopOptimizerPipeline(fpm, job, opt_level)
4041
buildScalarOptimizerPipeline(fpm, job, opt_level)
@@ -113,8 +114,9 @@ end
113114

114115
function buildLoopOptimizerPipeline(fpm, @nospecialize(job::CompilerJob), opt_level)
115116
add!(fpm, NewPMLoopPassManager) do lpm
116-
# TODO LowerSIMDLoopPass
117-
# LoopPass since JuliaLang/julia#51883
117+
if VERSION >= v"1.10"
118+
add!(lpm, LowerSIMDLoopPass())
119+
end
118120
if opt_level >= 2
119121
add!(lpm, LoopRotatePass())
120122
end

src/ptx.jl

Lines changed: 70 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -153,42 +153,86 @@ end
153153
function optimize_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
154154
mod::LLVM.Module)
155155
tm = llvm_machine(job.config.target)
156-
# TODO can't convert to newpm because speculative-execution doesn't have a parameter in the default PassBuilder parser
157-
@dispose pm=ModulePassManager() begin
158-
add_library_info!(pm, triple(mod))
159-
add_transform_info!(pm, tm)
160-
161-
# TODO: need to run this earlier; optimize_module! is called after addOptimizationPasses!
162-
add!(pm, FunctionPass("NVVMReflect", nvvm_reflect!))
163-
164-
# needed by GemmKernels.jl-like code
165-
speculative_execution_if_has_branch_divergence!(pm)
156+
# TODO: Use the registered target passes (JuliaGPU/GPUCompiler.jl#450)
157+
if use_newpm
158+
@dispose pb=PassBuilder(tm) mpm=NewPMModulePassManager(pb) begin
159+
add!(mpm, NewPMFunctionPassManager) do fpm
160+
# TODO: need to run this earlier; optimize_module! is called after addOptimizationPasses!
161+
add!(legacy2newpm(nvvm_reflect!), fpm)
162+
163+
# needed by GemmKernels.jl-like code
164+
add!(fpm, SpeculativeExecutionPass())
165+
166+
# NVPTX's target machine info enables runtime unrolling,
167+
# but Julia's pass sequence only invokes the simple unroller.
168+
add!(fpm, LoopUnrollPass(LoopUnrollOptions(; job.config.opt_level)))
169+
add!(fpm, InstCombinePass()) # clean-up redundancy
170+
add!(fpm, NewPMLoopPassManager) do lpm
171+
add!(lpm, LICMPass()) # the inner runtime check might be
172+
# outer loop invariant
173+
end
174+
175+
# the above loop unroll pass might have unrolled regular, non-runtime nested loops.
176+
# that code still needs to be optimized (arguably, multiple unroll passes should be
177+
# scheduled by the Julia optimizer). do so here, instead of re-optimizing entirely.
178+
if job.config.opt_level == 2
179+
add!(fpm, GVNPass())
180+
elseif job.config.opt_level == 1
181+
add!(fpm, EarlyCSEPass())
182+
end
183+
add!(fpm, DSEPass())
184+
185+
add!(fpm, SimplifyCFGPass())
186+
end
166187

167-
# NVPTX's target machine info enables runtime unrolling,
168-
# but Julia's pass sequence only invokes the simple unroller.
169-
loop_unroll!(pm)
170-
instruction_combining!(pm) # clean-up redundancy
171-
licm!(pm) # the inner runtime check might be outer loop invariant
188+
# get rid of the internalized functions; now possible unused
189+
add!(mpm, GlobalDCEPass())
172190

173-
# the above loop unroll pass might have unrolled regular, non-runtime nested loops.
174-
# that code still needs to be optimized (arguably, multiple unroll passes should be
175-
# scheduled by the Julia optimizer). do so here, instead of re-optimizing entirely.
176-
early_csemem_ssa!(pm) # TODO: gvn instead? see NVPTXTargetMachine.cpp::addEarlyCSEOrGVNPass
177-
dead_store_elimination!(pm)
191+
run!(mpm, mod, tm)
192+
end
193+
else
194+
@dispose pm=ModulePassManager() begin
195+
add_library_info!(pm, triple(mod))
196+
add_transform_info!(pm, tm)
197+
198+
# TODO: need to run this earlier; optimize_module! is called after addOptimizationPasses!
199+
add!(pm, FunctionPass("NVVMReflect", nvvm_reflect!))
200+
201+
# needed by GemmKernels.jl-like code
202+
speculative_execution_if_has_branch_divergence!(pm)
203+
204+
# NVPTX's target machine info enables runtime unrolling,
205+
# but Julia's pass sequence only invokes the simple unroller.
206+
loop_unroll!(pm)
207+
instruction_combining!(pm) # clean-up redundancy
208+
licm!(pm) # the inner runtime check might be outer loop invariant
209+
210+
# the above loop unroll pass might have unrolled regular, non-runtime nested loops.
211+
# that code still needs to be optimized (arguably, multiple unroll passes should be
212+
# scheduled by the Julia optimizer). do so here, instead of re-optimizing entirely.
213+
if job.config.opt_level == 2
214+
gvn!(pm)
215+
elseif job.config.opt_level == 1
216+
early_cse!(pm)
217+
end
218+
dead_store_elimination!(pm)
178219

179-
cfgsimplification!(pm)
220+
cfgsimplification!(pm)
180221

181-
# get rid of the internalized functions; now possible unused
182-
global_dce!(pm)
222+
# get rid of the internalized functions; now possible unused
223+
global_dce!(pm)
183224

184-
run!(pm, mod)
225+
run!(pm, mod)
226+
end
185227
end
186228
end
187229

188230
function finish_ir!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
189231
mod::LLVM.Module, entry::LLVM.Function)
190-
for f in functions(mod)
191-
lower_unreachable!(f)
232+
if LLVM.version() < v"17"
233+
for f in functions(mod)
234+
lower_unreachable!(f)
235+
end
192236
end
193237

194238
if job.config.kernel

test/ptx_tests.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ end
114114
@test occursin(r"@\w*kernel\w*\(\[1 x i64\] %state", ir)
115115

116116
# child1 doesn't use the state
117-
@test occursin(r"@\w*child1\w*\((i64|i8\*)", ir)
117+
@test occursin(r"@\w*child1\w*\((i64|i8\*|ptr)", ir)
118118

119119
# child2 does
120120
@test occursin(r"@\w*child2\w*\(\[1 x i64\] %state", ir)
@@ -341,7 +341,7 @@ precompile_test_harness("Inference caching") do load_path
341341
job, _ = PTXCompiler.create_job(kernel, ())
342342
GPUCompiler.code_typed(job)
343343
end
344-
344+
345345
# identity is foreign
346346
@setup_workload begin
347347
job, _ = PTXCompiler.create_job(identity, (Int,))

0 commit comments

Comments
 (0)