Skip to content

Commit c9391bf

Browse files
authored
Remove legacy pass manager pipelines. (#605)
1 parent 4f63a1f commit c9391bf

File tree

1 file changed

+1
-283
lines changed

1 file changed

+1
-283
lines changed

src/optim.jl

Lines changed: 1 addition & 283 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,6 @@
11
# LLVM IR optimization
22

3-
function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level=2)
4-
optimize_newpm!(job, mod; opt_level)
5-
# TODO: clean up
6-
return
7-
end
8-
9-
10-
## new pm
11-
12-
function optimize_newpm!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level)
3+
function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level=1)
134
tm = llvm_machine(job.config.target)
145

156
global current_job
@@ -292,279 +283,6 @@ function buildCleanupPipeline(mpm, @nospecialize(job::CompilerJob), opt_level)
292283
end
293284

294285

295-
## legacy pm
296-
297-
function optimize_legacypm!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level)
298-
triple = llvm_triple(job.config.target)
299-
tm = llvm_machine(job.config.target)
300-
301-
global current_job
302-
current_job = job
303-
304-
@dispose pm=ModulePassManager() begin
305-
addTargetPasses!(pm, tm, triple)
306-
addOptimizationPasses!(pm, opt_level)
307-
run!(pm, mod)
308-
end
309-
310-
# NOTE: we need to use multiple distinct pass managers to force pass ordering;
311-
# intrinsics should never get lowered before Julia has optimized them.
312-
# XXX: why doesn't the barrier noop pass work here?
313-
314-
# lower intrinsics
315-
@dispose pm=ModulePassManager() begin
316-
addTargetPasses!(pm, tm, triple)
317-
318-
if !uses_julia_runtime(job)
319-
lower_gc_frame!(pm)
320-
end
321-
322-
if job.config.kernel
323-
# GC lowering is the last pass that may introduce calls to the runtime library,
324-
# and thus additional uses of the kernel state intrinsic.
325-
# TODO: now that all kernel state-related passes are being run here, merge some?
326-
add_kernel_state!(pm)
327-
lower_kernel_state!(pm)
328-
cleanup_kernel_state!(pm)
329-
end
330-
331-
if !uses_julia_runtime(job)
332-
# remove dead uses of ptls
333-
aggressive_dce!(pm)
334-
lower_ptls!(pm)
335-
end
336-
337-
if uses_julia_runtime(job)
338-
lower_exc_handlers!(pm)
339-
end
340-
# the Julia GC lowering pass also has some clean-up that is required
341-
late_lower_gc_frame!(pm)
342-
if uses_julia_runtime(job)
343-
final_lower_gc!(pm)
344-
end
345-
346-
remove_ni!(pm)
347-
remove_julia_addrspaces!(pm)
348-
349-
if uses_julia_runtime(job)
350-
# We need these two passes and the instcombine below
351-
# after GC lowering to let LLVM do some constant propagation on the tags.
352-
# and remove some unnecessary write barrier checks.
353-
gvn!(pm)
354-
sccp!(pm)
355-
# Remove dead use of ptls
356-
dce!(pm)
357-
LLVM.Interop.lower_ptls!(pm, dump_native(job))
358-
instruction_combining!(pm)
359-
# Clean up write barrier and ptls lowering
360-
cfgsimplification!(pm)
361-
end
362-
363-
# Julia's operand bundles confuse the inliner, so repeat here now they are gone.
364-
# FIXME: we should fix the inliner so that inlined code gets optimized early-on
365-
always_inliner!(pm)
366-
367-
# some of Julia's optimization passes happen _after_ lowering intrinsics
368-
combine_mul_add!(pm)
369-
div_rem_pairs!(pm)
370-
371-
run!(pm, mod)
372-
end
373-
374-
# target-specific optimizations
375-
optimize_module!(job, mod)
376-
377-
# we compile a module containing the entire call graph,
378-
# so perform some interprocedural optimizations.
379-
#
380-
# for some reason, these passes need to be distinct from the regular optimization chain,
381-
# or certain values (such as the constant arrays used to populare llvm.compiler.user ad
382-
# part of the LateLowerGCFrame pass) aren't collected properly.
383-
#
384-
# these might not always be safe, as Julia's IR metadata isn't designed for IPO.
385-
@dispose pm=ModulePassManager() begin
386-
addTargetPasses!(pm, tm, triple)
387-
388-
# simplify function calls that don't use the returned value
389-
dead_arg_elimination!(pm)
390-
391-
run!(pm, mod)
392-
end
393-
394-
return
395-
end
396-
397-
function addTargetPasses!(pm, tm, triple)
398-
add_library_info!(pm, triple)
399-
add_transform_info!(pm, tm)
400-
end
401-
402-
# Based on Julia's optimization pipeline, minus the SLP and loop vectorizers.
403-
function addOptimizationPasses!(pm, opt_level)
404-
# compare with the using Julia's optimization pipeline directly:
405-
#ccall(:jl_add_optimization_passes, Cvoid,
406-
# (LLVM.API.LLVMPassManagerRef, Cint, Cint),
407-
# pm, opt_level, #=lower_intrinsics=# 0)
408-
#return
409-
410-
# NOTE: LLVM 12 disabled the hoisting of common instruction
411-
# before loop vectorization (https://reviews.llvm.org/D84108).
412-
#
413-
# This is re-enabled with calls to cfg_simplify here,
414-
# to merge allocations and sometimes eliminate them,
415-
# since AllocOpt does not handle PhiNodes.
416-
# Enable this instruction hoisting because of this and Union benchmarks.
417-
418-
constant_merge!(pm)
419-
420-
if opt_level < 2
421-
cpu_features!(pm)
422-
if opt_level == 1
423-
instruction_simplify!(pm)
424-
end
425-
if LLVM.version() >= v"12"
426-
cfgsimplification!(pm; hoist_common_insts=true)
427-
else
428-
cfgsimplification!(pm)
429-
end
430-
if opt_level == 1
431-
scalar_repl_aggregates!(pm)
432-
instruction_combining!(pm)
433-
early_cse!(pm)
434-
# maybe add GVN?
435-
# also try GVNHoist and GVNSink
436-
end
437-
mem_cpy_opt!(pm)
438-
always_inliner!(pm) # Respect always_inline
439-
lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
440-
return
441-
end
442-
443-
propagate_julia_addrsp!(pm)
444-
scoped_no_alias_aa!(pm)
445-
type_based_alias_analysis!(pm)
446-
if opt_level >= 3
447-
basic_alias_analysis!(pm)
448-
end
449-
if LLVM.version() >= v"12"
450-
cfgsimplification!(pm; hoist_common_insts=true)
451-
else
452-
cfgsimplification!(pm)
453-
end
454-
dce!(pm)
455-
scalar_repl_aggregates!(pm)
456-
457-
#mem_cpy_opt!(pm)
458-
459-
always_inliner!(pm) # Respect always_inline
460-
461-
# Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard
462-
# time merging the `alloca` for the unboxed data and the `alloca` created by
463-
# the `alloc_opt` pass.
464-
465-
alloc_opt!(pm)
466-
# consider AggressiveInstCombinePass at optlevel > 2
467-
instruction_combining!(pm)
468-
if LLVM.version() >= v"12"
469-
cfgsimplification!(pm; hoist_common_insts=true)
470-
else
471-
cfgsimplification!(pm)
472-
end
473-
cpu_features!(pm)
474-
scalar_repl_aggregates!(pm)
475-
# SROA can duplicate PHI nodes which can block LowerSIMD
476-
instruction_combining!(pm)
477-
jump_threading!(pm)
478-
correlated_value_propagation!(pm)
479-
480-
reassociate!(pm)
481-
482-
early_cse!(pm)
483-
484-
# Load forwarding above can expose allocations that aren't actually used
485-
# remove those before optimizing loops.
486-
alloc_opt!(pm)
487-
loop_rotate!(pm)
488-
# moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
489-
490-
# LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
491-
lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
492-
licm!(pm)
493-
julia_licm!(pm)
494-
if LLVM.version() >= v"15"
495-
simple_loop_unswitch_legacy!(pm)
496-
else
497-
# XXX: simple loop unswitch is available on older versions of LLVM too,
498-
# but using this pass instead of the old one breaks Metal.jl.
499-
loop_unswitch!(pm)
500-
end
501-
licm!(pm)
502-
julia_licm!(pm)
503-
inductive_range_check_elimination!(pm)
504-
# Subsequent passes not stripping metadata from terminator
505-
instruction_simplify!(pm)
506-
loop_idiom!(pm)
507-
ind_var_simplify!(pm)
508-
loop_deletion!(pm)
509-
loop_unroll!(pm) # TODO: in Julia createSimpleLoopUnroll
510-
511-
# Run our own SROA on heap objects before LLVM's
512-
alloc_opt!(pm)
513-
# Re-run SROA after loop-unrolling (useful for small loops that operate,
514-
# over the structure of an aggregate)
515-
scalar_repl_aggregates!(pm)
516-
# might not be necessary:
517-
instruction_simplify!(pm)
518-
519-
gvn!(pm)
520-
mem_cpy_opt!(pm)
521-
sccp!(pm)
522-
523-
# These next two passes must come before IRCE to eliminate the bounds check in #43308
524-
correlated_value_propagation!(pm)
525-
dce!(pm)
526-
527-
inductive_range_check_elimination!(pm) # Must come between the two GVN passes
528-
529-
# Run instcombine after redundancy elimination to exploit opportunities
530-
# opened up by them.
531-
# This needs to be InstCombine instead of InstSimplify to allow
532-
# loops over Union-typed arrays to vectorize.
533-
instruction_combining!(pm)
534-
jump_threading!(pm)
535-
if opt_level >= 3
536-
gvn!(pm) # Must come after JumpThreading and before LoopVectorize
537-
end
538-
dead_store_elimination!(pm)
539-
540-
# More dead allocation (store) deletion before loop optimization
541-
# consider removing this:
542-
alloc_opt!(pm)
543-
# see if all of the constant folding has exposed more loops
544-
# to simplification and deletion
545-
# this helps significantly with cleaning up iteration
546-
cfgsimplification!(pm) # See note above, don't hoist instructions before LV
547-
loop_deletion!(pm)
548-
instruction_combining!(pm)
549-
loop_vectorize!(pm)
550-
loop_load_elimination!(pm)
551-
# Cleanup after LV pass
552-
instruction_combining!(pm)
553-
if LLVM.version() >= v"12"
554-
cfgsimplification!(pm; # Aggressive CFG simplification
555-
forward_switch_cond_to_phi=true,
556-
convert_switch_to_lookup_table=true,
557-
need_canonical_loop=true,
558-
hoist_common_insts=true,
559-
#sink_common_insts=true # FIXME: Causes assertion in llvm-late-lowering
560-
)
561-
else
562-
cfgsimplification!(pm)
563-
end
564-
565-
aggressive_dce!(pm)
566-
end
567-
568286

569287
## custom passes
570288

0 commit comments

Comments
 (0)