24
24
#include < llvm/Passes/PassPlugin.h>
25
25
26
26
// NewPM needs to manually include all the pass headers
27
+ #include < llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h>
27
28
#include < llvm/Transforms/IPO/AlwaysInliner.h>
28
29
#include < llvm/Transforms/IPO/Annotation2Metadata.h>
29
30
#include < llvm/Transforms/IPO/ConstantMerge.h>
30
31
#include < llvm/Transforms/IPO/ForceFunctionAttrs.h>
31
32
#include < llvm/Transforms/IPO/GlobalDCE.h>
33
+ #include < llvm/Transforms/IPO/GlobalOpt.h>
32
34
#include < llvm/Transforms/IPO/StripDeadPrototypes.h>
33
35
#include < llvm/Transforms/InstCombine/InstCombine.h>
34
36
#include < llvm/Transforms/Instrumentation/AddressSanitizer.h>
35
37
#include < llvm/Transforms/Instrumentation/MemorySanitizer.h>
36
38
#include < llvm/Transforms/Instrumentation/ThreadSanitizer.h>
37
39
#include < llvm/Transforms/Scalar/ADCE.h>
38
40
#include < llvm/Transforms/Scalar/AnnotationRemarks.h>
41
+ #include < llvm/Transforms/Scalar/BDCE.h>
42
+ #include " llvm/Transforms/Scalar/ConstraintElimination.h"
39
43
#include < llvm/Transforms/Scalar/CorrelatedValuePropagation.h>
40
44
#include < llvm/Transforms/Scalar/DCE.h>
41
45
#include < llvm/Transforms/Scalar/DeadStoreElimination.h>
59
63
#include < llvm/Transforms/Scalar/LowerConstantIntrinsics.h>
60
64
#include < llvm/Transforms/Scalar/LowerExpectIntrinsic.h>
61
65
#include < llvm/Transforms/Scalar/MemCpyOptimizer.h>
66
+ #include < llvm/Transforms/Scalar/MergedLoadStoreMotion.h>
62
67
#include < llvm/Transforms/Scalar/Reassociate.h>
63
68
#include < llvm/Transforms/Scalar/SCCP.h>
64
69
#include < llvm/Transforms/Scalar/SROA.h>
65
70
#include < llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
66
71
#include < llvm/Transforms/Scalar/SimplifyCFG.h>
67
72
#include < llvm/Transforms/Scalar/WarnMissedTransforms.h>
73
+ #include < llvm/Transforms/Utils/LibCallsShrinkWrap.h>
68
74
#include < llvm/Transforms/Utils/InjectTLIMappings.h>
75
+ #include < llvm/Transforms/Utils/Mem2Reg.h>
76
+ #include < llvm/Transforms/Utils/RelLookupTableConverter.h>
69
77
#include < llvm/Transforms/Utils/ModuleUtils.h>
70
78
#include < llvm/Transforms/Utils/SimplifyCFGOptions.h>
71
79
#include < llvm/Transforms/Vectorize/LoopVectorize.h>
@@ -196,10 +204,9 @@ namespace {
196
204
.convertSwitchRangeToICmp (true )
197
205
.convertSwitchToLookupTable (true )
198
206
.forwardSwitchCondToPhi (true )
199
- // These mess with loop rotation, so only do them after that
207
+ . needCanonicalLoops ( false )
200
208
.hoistCommonInsts (true )
201
- // Causes an SRET assertion error in late-gc-lowering
202
- // .sinkCommonInsts(true)
209
+ .sinkCommonInsts (true )
203
210
;
204
211
}
205
212
@@ -341,10 +348,16 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder
341
348
FPM.addPass (DCEPass ());
342
349
FPM.addPass (SimplifyCFGPass (basicSimplifyCFGOptions ()));
343
350
if (O.getSpeedupLevel () >= 1 ) {
344
- // TODO check the LLVM 15 default.
345
- FPM.addPass (SROAPass (SROAOptions::PreserveCFG ));
351
+ FPM. addPass ( SROAPass (SROAOptions::ModifyCFG));
352
+ FPM.addPass (EarlyCSEPass ( ));
346
353
}
347
354
MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM)));
355
+ if (O.getSpeedupLevel () >= 1 ) {
356
+ FunctionPassManager GlobalFPM;
357
+ MPM.addPass (GlobalOptPass ());
358
+ GlobalFPM.addPass (PromotePass ());
359
+ GlobalFPM.addPass (InstCombinePass ());
360
+ }
348
361
}
349
362
invokeEarlySimplificationCallbacks (MPM, PB, O);
350
363
}
@@ -379,22 +392,24 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB,
379
392
if (O.getSpeedupLevel () >= 1 ) {
380
393
FunctionPassManager FPM;
381
394
if (O.getSpeedupLevel () >= 2 ) {
382
- // TODO check the LLVM 15 default.
383
- FPM.addPass (SROAPass (SROAOptions::PreserveCFG));
384
- // SROA can duplicate PHI nodes which can block LowerSIMD
385
- FPM.addPass (InstCombinePass ());
386
- FPM.addPass (JumpThreadingPass ());
387
- FPM.addPass (CorrelatedValuePropagationPass ());
388
- FPM.addPass (ReassociatePass ());
389
- FPM.addPass (EarlyCSEPass ());
390
- JULIA_PASS (FPM.addPass (AllocOptPass ()));
391
- } else { // if (O.getSpeedupLevel() >= 1) (exactly)
392
- FPM.addPass (InstCombinePass ());
393
- FPM.addPass (EarlyCSEPass ());
394
- }
395
- invokePeepholeEPCallbacks (FPM, PB, O);
396
- MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM)));
395
+ FPM.addPass (SROAPass (SROAOptions::ModifyCFG));
396
+ FPM.addPass (EarlyCSEPass (true ));
397
+ FPM.addPass (InstCombinePass ());
398
+ FPM.addPass (AggressiveInstCombinePass ());
399
+ FPM.addPass (JumpThreadingPass ());
400
+ FPM.addPass (CorrelatedValuePropagationPass ());
401
+ FPM.addPass (LibCallsShrinkWrapPass ());
402
+ FPM.addPass (ReassociatePass ());
403
+ FPM.addPass (ConstraintEliminationPass ());
404
+ JULIA_PASS (FPM.addPass (AllocOptPass ()));
405
+ } else { // if (O.getSpeedupLevel() >= 1) (exactly)
406
+ FPM.addPass (EarlyCSEPass ());
407
+ FPM.addPass (InstCombinePass ());
408
+ }
409
+ invokePeepholeEPCallbacks (FPM, PB, O);
410
+ MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM), /* UseMemorySSA = */ true ));
397
411
}
412
+ MPM.addPass (GlobalOptPass ());
398
413
MPM.addPass (GlobalDCEPass ());
399
414
}
400
415
MPM.addPass (AfterEarlyOptimizationMarkerPass ());
@@ -407,41 +422,41 @@ static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB
407
422
LoopPassManager LPM;
408
423
LPM.addPass (LowerSIMDLoopPass ());
409
424
if (O.getSpeedupLevel () >= 2 ) {
410
- LPM.addPass (LoopRotatePass ());
425
+ LPM.addPass (LoopInstSimplifyPass ());
426
+ LPM.addPass (LoopSimplifyCFGPass ());
427
+ LPM.addPass (BeforeLICMMarkerPass ());
428
+ auto opts = LICMOptions ();
429
+ opts.AllowSpeculation = false ;
430
+ LPM.addPass (LICMPass (opts));
431
+ LPM.addPass (JuliaLICMPass ());
432
+ LPM.addPass (LoopRotatePass (true , false ));
433
+ LPM.addPass (LICMPass (LICMOptions ()));
434
+ LPM.addPass (JuliaLICMPass ());
435
+ LPM.addPass (AfterLICMMarkerPass ());
436
+ LPM.addPass (SimpleLoopUnswitchPass (/* NonTrivial*/ true , true ));
411
437
}
412
438
invokeLateLoopOptimizationCallbacks (LPM, PB, O);
413
439
// We don't know if the loop callbacks support MSSA
414
- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA = */ false ));
415
- }
416
- if (O.getSpeedupLevel () >= 2 ) {
417
- LoopPassManager LPM;
418
- LPM.addPass (BeforeLICMMarkerPass ());
419
- LPM.addPass (LICMPass (LICMOptions ()));
420
- LPM.addPass (JuliaLICMPass ());
421
- LPM.addPass (SimpleLoopUnswitchPass (/* NonTrivial*/ true , true ));
422
- LPM.addPass (LICMPass (LICMOptions ()));
423
- LPM.addPass (JuliaLICMPass ());
424
- LPM.addPass (AfterLICMMarkerPass ());
425
- // LICM needs MemorySSA now, so we must use it
426
440
FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA = */ true ));
427
441
}
428
- if (O.getSpeedupLevel () >= 2 ) {
442
+ if (O.getSpeedupLevel () >= 2 )
429
443
FPM.addPass (IRCEPass ());
430
- }
431
444
{
432
445
LoopPassManager LPM;
433
446
LPM.addPass (BeforeLoopSimplificationMarkerPass ());
434
447
if (O.getSpeedupLevel () >= 2 ) {
435
- LPM.addPass (LoopInstSimplifyPass ());
436
448
LPM.addPass (LoopIdiomRecognizePass ());
437
449
LPM.addPass (IndVarSimplifyPass ());
450
+ LPM.addPass (SimpleLoopUnswitchPass (/* NonTrivial*/ true , true ));
438
451
LPM.addPass (LoopDeletionPass ());
439
452
// This unroll will only unroll loops when the trip count is known and small,
440
453
// so that no loop remains
441
454
LPM.addPass (LoopFullUnrollPass ());
442
455
}
443
456
invokeLoopOptimizerEndCallbacks (LPM, PB, O);
444
457
LPM.addPass (AfterLoopSimplificationMarkerPass ());
458
+ FPM.addPass (SimplifyCFGPass (basicSimplifyCFGOptions ()));
459
+ FPM.addPass (InstCombinePass ());
445
460
// We don't know if the loop end callbacks support MSSA
446
461
FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA = */ false ));
447
462
}
@@ -454,17 +469,28 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
454
469
if (options.enable_scalar_optimizations ) {
455
470
if (O.getSpeedupLevel () >= 2 ) {
456
471
JULIA_PASS (FPM.addPass (AllocOptPass ()));
457
- // TODO check the LLVM 15 default.
458
- FPM.addPass (SROAPass (SROAOptions::PreserveCFG ));
459
- FPM.addPass (InstSimplifyPass ());
472
+ FPM. addPass ( SROAPass (SROAOptions::ModifyCFG));
473
+ FPM.addPass (VectorCombinePass ( /* TryEarlyFoldsOnly= */ true ));
474
+ FPM.addPass (MergedLoadStoreMotionPass ());
460
475
FPM.addPass (GVNPass ());
461
- FPM.addPass (MemCpyOptPass ());
462
476
FPM.addPass (SCCPPass ());
477
+ FPM.addPass (BDCEPass ());
478
+ FPM.addPass (InstCombinePass ());
463
479
FPM.addPass (CorrelatedValuePropagationPass ());
464
- FPM.addPass (DCEPass ());
480
+ FPM.addPass (ADCEPass ());
481
+ FPM.addPass (MemCpyOptPass ());
482
+ FPM.addPass (DSEPass ());
465
483
FPM.addPass (IRCEPass ());
466
- FPM.addPass (InstCombinePass ());
467
484
FPM.addPass (JumpThreadingPass ());
485
+ FPM.addPass (ConstraintEliminationPass ());
486
+ } else if (O.getSpeedupLevel () >= 1 ) {
487
+ JULIA_PASS (FPM.addPass (AllocOptPass ()));
488
+ FPM.addPass (SROAPass (SROAOptions::ModifyCFG));
489
+ FPM.addPass (MemCpyOptPass ());
490
+ FPM.addPass (SCCPPass ());
491
+ FPM.addPass (BDCEPass ());
492
+ FPM.addPass (InstCombinePass ());
493
+ FPM.addPass (ADCEPass ());
468
494
}
469
495
if (O.getSpeedupLevel () >= 3 ) {
470
496
FPM.addPass (GVNPass ());
@@ -476,12 +502,15 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
476
502
JULIA_PASS (FPM.addPass (AllocOptPass ()));
477
503
{
478
504
LoopPassManager LPM;
479
- LPM.addPass (LoopDeletionPass ( ));
480
- LPM.addPass (LoopInstSimplifyPass ());
481
- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM)));
505
+ LPM.addPass (LICMPass ( LICMOptions () ));
506
+ LPM.addPass (JuliaLICMPass ());
507
+ FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA = */ true ));
482
508
}
483
- FPM.addPass (LoopDistributePass ());
484
- }
509
+ FPM.addPass (SimplifyCFGPass (aggressiveSimplifyCFGOptions ()));
510
+ FPM.addPass (InstCombinePass ());
511
+ } else if (O.getSpeedupLevel () >= 1 )
512
+ FPM.addPass (SimplifyCFGPass (aggressiveSimplifyCFGOptions ()));
513
+
485
514
invokeScalarOptimizerCallbacks (FPM, PB, O);
486
515
}
487
516
FPM.addPass (AfterScalarOptimizationMarkerPass ());
@@ -491,19 +520,27 @@ static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, Optim
491
520
FPM.addPass (BeforeVectorizationMarkerPass ());
492
521
if (options.enable_vector_pipeline ) {
493
522
// TODO look into loop vectorize options
523
+ // Rerotate loops that might have been unrotated in the simplification
524
+ LoopPassManager LPM;
525
+ LPM.addPass (LoopRotatePass ());
526
+ LPM.addPass (LoopDeletionPass ());
527
+ FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA=*/ false , /* UseBlockFrequencyInfo=*/ false ));
528
+ FPM.addPass (LoopDistributePass ());
494
529
FPM.addPass (InjectTLIMappings ());
495
530
FPM.addPass (LoopVectorizePass ());
496
531
FPM.addPass (LoopLoadEliminationPass ());
497
- FPM.addPass (InstCombinePass ());
498
532
FPM.addPass (SimplifyCFGPass (aggressiveSimplifyCFGOptions ()));
533
+ FPM.addPass (createFunctionToLoopPassAdaptor (LICMPass (LICMOptions ()), /* UseMemorySSA=*/ true , /* UseBlockFrequencyInfo=*/ false ));
534
+ FPM.addPass (EarlyCSEPass ());
535
+ FPM.addPass (CorrelatedValuePropagationPass ());
536
+ FPM.addPass (InstCombinePass ());
499
537
FPM.addPass (SLPVectorizerPass ());
500
- invokeVectorizerCallbacks (FPM, PB, O);
501
538
FPM.addPass (VectorCombinePass ());
502
- FPM.addPass (ADCEPass ());
503
- // TODO add BDCEPass here?
504
- // This unroll will unroll vectorized loops
505
- // as well as loops that we tried but failed to vectorize
539
+ invokeVectorizerCallbacks (FPM, PB, O);
506
540
FPM.addPass (LoopUnrollPass (LoopUnrollOptions (O.getSpeedupLevel (), /* OnlyWhenForced = */ false , /* ForgetSCEV = */ false )));
541
+ FPM.addPass (SROAPass (SROAOptions::PreserveCFG));
542
+ FPM.addPass (InstSimplifyPass ());
543
+ FPM.addPass (AfterVectorizationMarkerPass ());
507
544
}
508
545
FPM.addPass (AfterVectorizationMarkerPass ());
509
546
}
@@ -525,18 +562,18 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *
525
562
FunctionPassManager FPM;
526
563
JULIA_PASS (FPM.addPass (LateLowerGCPass ()));
527
564
JULIA_PASS (FPM.addPass (FinalLowerGCPass ()));
528
- if (O.getSpeedupLevel () >= 2 ) {
529
- FPM.addPass (DSEPass ());
530
- FPM.addPass (GVNPass ());
531
- FPM.addPass (SCCPPass ());
532
- FPM.addPass (DCEPass ());
533
- }
534
565
MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM)));
535
566
}
536
567
JULIA_PASS (MPM.addPass (LowerPTLSPass (options.dump_native )));
537
568
MPM.addPass (RemoveJuliaAddrspacesPass ()); // TODO: Make this conditional on arches (GlobalISel doesn't like our addrsspaces)
538
569
if (O.getSpeedupLevel () >= 1 ) {
539
570
FunctionPassManager FPM;
571
+ if (O.getSpeedupLevel () >= 2 ) {
572
+ FPM.addPass (DSEPass ());
573
+ FPM.addPass (GVNPass ());
574
+ FPM.addPass (SCCPPass ());
575
+ FPM.addPass (DCEPass ());
576
+ }
540
577
FPM.addPass (InstCombinePass ());
541
578
FPM.addPass (SimplifyCFGPass (aggressiveSimplifyCFGOptions ()));
542
579
MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM)));
0 commit comments