Skip to content

Commit cb736ee

Browse files
authored
Update New PassManager Pipeline (#559)
1 parent a807841 commit cb736ee

File tree

3 files changed

+41
-29
lines changed

3 files changed

+41
-29
lines changed

src/optim.jl

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ function buildNewPMPipeline!(mpm, @nospecialize(job::CompilerJob), opt_level=2)
3838
add!(mpm, NewPMFunctionPassManager) do fpm
3939
buildLoopOptimizerPipeline(fpm, job, opt_level)
4040
buildScalarOptimizerPipeline(fpm, job, opt_level)
41-
if false && opt_level >= 2
41+
if uses_julia_runtime(job) && opt_level >= 2
4242
# XXX: we disable vectorization, as this generally isn't useful for GPU targets
4343
# and actually causes issues with some back-end compilers (like Metal).
44+
# TODO: Make this not dependent on `uses_julia_runtime` (likely CPU), but it's own control
4445
buildVectorPipeline(fpm, job, opt_level)
4546
end
4647
if isdebug(:optim)
@@ -112,6 +113,8 @@ end
112113

113114
function buildLoopOptimizerPipeline(fpm, @nospecialize(job::CompilerJob), opt_level)
114115
add!(fpm, NewPMLoopPassManager) do lpm
116+
# TODO LowerSIMDLoopPass
117+
# LoopPass since JuliaLang/julia#51883
115118
if opt_level >= 2
116119
add!(lpm, LoopRotatePass())
117120
end
@@ -121,7 +124,7 @@ function buildLoopOptimizerPipeline(fpm, @nospecialize(job::CompilerJob), opt_le
121124
add!(fpm, NewPMLoopPassManager, #=UseMemorySSA=#true) do lpm
122125
add!(lpm, LICMPass())
123126
add!(lpm, JuliaLICMPass())
124-
add!(lpm, SimpleLoopUnswitchPass())
127+
add!(lpm, SimpleLoopUnswitchPass(SimpleLoopUnswitchPassOptions(nontrivial=true, trivial=true)))
125128
add!(lpm, LICMPass())
126129
add!(lpm, JuliaLICMPass())
127130
end
@@ -186,23 +189,41 @@ function buildVectorPipeline(fpm, @nospecialize(job::CompilerJob), opt_level)
186189
end
187190

188191
function buildIntrinsicLoweringPipeline(mpm, @nospecialize(job::CompilerJob), opt_level)
189-
# lower exception handling
190-
if uses_julia_runtime(job)
192+
add!(mpm, RemoveNIPass())
193+
194+
# lower GC intrinsics
195+
add!(mpm, NewPMFunctionPassManager) do fpm
196+
if !uses_julia_runtime(job)
197+
add!(legacy2newpm(lower_gc_frame!), fpm)
198+
end
199+
end
200+
201+
# lower kernel state intrinsics
202+
# NOTE: we can only do so here, as GC lowering can introduce calls to the runtime,
203+
# and thus additional uses of the kernel state intrinsics.
204+
if job.config.kernel
205+
# TODO: now that all kernel state-related passes are being run here, merge some?
206+
add!(legacy2newpm(add_kernel_state!), mpm)
191207
add!(mpm, NewPMFunctionPassManager) do fpm
192-
add!(fpm, LowerExcHandlersPass())
208+
add!(legacy2newpm(lower_kernel_state!), fpm)
193209
end
210+
add!(legacy2newpm(cleanup_kernel_state!), mpm)
194211
end
195212

196-
add!(mpm, NewPMFunctionPassManager) do fpm
197-
add!(fpm, GCInvariantVerifierPass())
213+
if !uses_julia_runtime(job)
214+
# remove dead uses of ptls
215+
add!(mpm, NewPMFunctionPassManager) do fpm
216+
add!(fpm, ADCEPass())
217+
end
218+
add!(legacy2newpm(lower_ptls!), mpm)
198219
end
199-
add!(mpm, RemoveNIPass())
200220

201-
# lower GC intrinsics
202221
add!(mpm, NewPMFunctionPassManager) do fpm
203-
if !uses_julia_runtime(job)
204-
add!(legacy2newpm(lower_gc_frame!), fpm)
222+
# lower exception handling
223+
if uses_julia_runtime(job)
224+
add!(fpm, LowerExcHandlersPass())
205225
end
226+
add!(fpm, GCInvariantVerifierPass())
206227
add!(fpm, LateLowerGCPass())
207228
if uses_julia_runtime(job) && VERSION >= v"1.11.0-DEV.208"
208229
add!(fpm, FinalLowerGCPass())
@@ -220,27 +241,9 @@ function buildIntrinsicLoweringPipeline(mpm, @nospecialize(job::CompilerJob), op
220241
end
221242
end
222243

223-
# lower kernel state intrinsics
224-
# NOTE: we can only do so here, as GC lowering can introduce calls to the runtime,
225-
# and thus additional uses of the kernel state intrinsics.
226-
if job.config.kernel
227-
# TODO: now that all kernel state-related passes are being run here, merge some?
228-
add!(legacy2newpm(add_kernel_state!), mpm)
229-
add!(mpm, NewPMFunctionPassManager) do fpm
230-
add!(legacy2newpm(lower_kernel_state!), fpm)
231-
end
232-
add!(legacy2newpm(cleanup_kernel_state!), mpm)
233-
end
234-
235244
# lower PTLS intrinsics
236245
if uses_julia_runtime(job)
237246
add!(mpm, LowerPTLSPass())
238-
else
239-
# remove dead uses of ptls
240-
add!(mpm, NewPMFunctionPassManager) do fpm
241-
add!(fpm, ADCEPass())
242-
end
243-
add!(legacy2newpm(lower_ptls!), mpm)
244247
end
245248

246249
if opt_level >= 1

test/gcn_tests.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,10 @@ false && @testset "GC and TLS lowering" begin
191191

192192
asm = sprint(io->GCN.code_native(io, mod.kernel, Tuple{Int}))
193193
@test occursin("gpu_gc_pool_alloc", asm)
194+
@test !occursin("julia.push_gc_frame", asm)
195+
@test !occursin("julia.pop_gc_frame", asm)
196+
@test !occursin("julia.get_gc_frame_slot", asm)
197+
@test !occursin("julia.new_gc_frame", asm)
194198

195199
# make sure that we can still ellide allocations
196200
function ref_kernel(ptr, i)

test/ptx_tests.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,11 @@ end
279279

280280
asm = sprint(io->PTX.code_native(io, mod.kernel, Tuple{Int}))
281281
@test occursin("gpu_gc_pool_alloc", asm)
282+
@test !occursin("julia.push_gc_frame", asm)
283+
@test !occursin("julia.pop_gc_frame", asm)
284+
@test !occursin("julia.get_gc_frame_slot", asm)
285+
@test !occursin("julia.new_gc_frame", asm)
286+
282287

283288
# make sure that we can still ellide allocations
284289
function ref_kernel(ptr, i)

0 commit comments

Comments
 (0)