19
19
#include " llvm/Analysis/RegionInfo.h"
20
20
#include " llvm/Analysis/RegionIterator.h"
21
21
#include " llvm/Analysis/RegionPass.h"
22
+ #include " llvm/Analysis/TargetTransformInfo.h"
22
23
#include " llvm/Analysis/UniformityAnalysis.h"
23
24
#include " llvm/IR/BasicBlock.h"
24
25
#include " llvm/IR/CFG.h"
@@ -128,6 +129,7 @@ struct PredInfo {
128
129
using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
129
130
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
130
131
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
132
+ using Val2BBMap = DenseMap<Value *, BasicBlock *>;
131
133
132
134
// A traits type that is intended to be used in graph algorithms. The graph
133
135
// traits starts at an entry node, and traverses the RegionNodes that are in
@@ -279,7 +281,7 @@ class StructurizeCFG {
279
281
ConstantInt *BoolTrue;
280
282
ConstantInt *BoolFalse;
281
283
Value *BoolPoison;
282
-
284
+ const TargetTransformInfo *TTI;
283
285
Function *Func;
284
286
Region *ParentRegion;
285
287
@@ -301,8 +303,12 @@ class StructurizeCFG {
301
303
PredMap LoopPreds;
302
304
BranchVector LoopConds;
303
305
306
+ Val2BBMap HoistedValues;
307
+
304
308
RegionNode *PrevNode;
305
309
310
+ void hoistZeroCostElseBlockPhiValues (BasicBlock *ElseBB, BasicBlock *ThenBB);
311
+
306
312
void orderNodes ();
307
313
308
314
void analyzeLoops (RegionNode *N);
@@ -332,6 +338,8 @@ class StructurizeCFG {
332
338
333
339
void simplifyAffectedPhis ();
334
340
341
+ void simplifyHoistedPhis ();
342
+
335
343
DebugLoc killTerminator (BasicBlock *BB);
336
344
337
345
void changeExit (RegionNode *Node, BasicBlock *NewExit,
@@ -359,7 +367,7 @@ class StructurizeCFG {
359
367
360
368
public:
361
369
void init (Region *R);
362
- bool run (Region *R, DominatorTree *DT);
370
+ bool run (Region *R, DominatorTree *DT, const TargetTransformInfo *TTI );
363
371
bool makeUniformRegion (Region *R, UniformityInfo &UA);
364
372
};
365
373
@@ -385,16 +393,21 @@ class StructurizeCFGLegacyPass : public RegionPass {
385
393
if (SCFG.makeUniformRegion (R, UA))
386
394
return false ;
387
395
}
396
+ Function *F = R->getEntry ()->getParent ();
397
+ const TargetTransformInfo *TTI =
398
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (*F);
388
399
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree ();
389
- return SCFG.run (R, DT);
400
+ return SCFG.run (R, DT, TTI );
390
401
}
391
402
392
403
StringRef getPassName () const override { return " Structurize control flow" ; }
393
404
394
405
void getAnalysisUsage (AnalysisUsage &AU) const override {
395
406
if (SkipUniformRegions)
396
407
AU.addRequired <UniformityInfoWrapperPass>();
408
+ AU.addRequired <TargetTransformInfoWrapperPass>();
397
409
AU.addRequired <DominatorTreeWrapperPass>();
410
+ AU.addRequired <TargetTransformInfoWrapperPass>();
398
411
399
412
AU.addPreserved <DominatorTreeWrapperPass>();
400
413
RegionPass::getAnalysisUsage (AU);
@@ -403,6 +416,34 @@ class StructurizeCFGLegacyPass : public RegionPass {
403
416
404
417
} // end anonymous namespace
405
418
419
+ // / Checks whether an instruction is zero cost instruction and checks if the
420
+ // / operands are from different BB. If so, this instruction can be coalesced
421
+ // / if its hoisted to predecessor block. So, this returns true.
422
+ static bool isHoistableInstruction (Instruction *I, BasicBlock *BB,
423
+ const TargetTransformInfo *TTI) {
424
+ if (I->getParent () != BB)
425
+ return false ;
426
+
427
+ // If the instruction is not a zero cost instruction, return false.
428
+ auto Cost = TTI->getInstructionCost (I, TargetTransformInfo::TCK_Latency);
429
+ InstructionCost::CostType CostVal =
430
+ Cost.isValid ()
431
+ ? Cost.getValue ()
432
+ : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
433
+ if (CostVal != 0 )
434
+ return false ;
435
+
436
+ // Check if any operands are instructions defined in the same block.
437
+ for (auto &Op : I->operands ()) {
438
+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
439
+ if (OpI->getParent () == BB)
440
+ return false ;
441
+ }
442
+ }
443
+
444
+ return true ;
445
+ }
446
+
406
447
char StructurizeCFGLegacyPass::ID = 0 ;
407
448
408
449
INITIALIZE_PASS_BEGIN (StructurizeCFGLegacyPass, " structurizecfg" ,
@@ -413,6 +454,39 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
413
454
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, " structurizecfg" ,
414
455
" Structurize the CFG" , false , false )
415
456
457
+ // / Structurization can introduce unnecessary VGPR copies due to register
458
+ // / coalescing interference. For example, if the Else block has a zero-cost
459
+ // / instruction and the Then block modifies the VGPR value, only one value is
460
+ // / live at a time in merge block before structurization. After structurization,
461
+ // / the coalescer may incorrectly treat the Then value as live in the Else block
462
+ // / (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
463
+ // /
464
+ // / This function examines phi nodes whose incoming values are zero-cost
465
+ // / instructions in the Else block. It identifies such values that can be safely
466
+ // / hoisted and moves them to the nearest common dominator of Then and Else
467
+ // / blocks. A follow-up function after setting PhiNodes assigns the hoisted
468
+ // / value to poison phi nodes along the if→flow edge, aiding register coalescing
469
+ // / and minimizing unnecessary live ranges.
470
+ void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
471
+ BasicBlock *ThenBB) {
472
+
473
+ BasicBlock *ElseSucc = ElseBB->getSingleSuccessor ();
474
+ BasicBlock *CommonDominator = DT->findNearestCommonDominator (ElseBB, ThenBB);
475
+
476
+ if (!ElseSucc || !CommonDominator)
477
+ return ;
478
+ Instruction *Term = CommonDominator->getTerminator ();
479
+ for (PHINode &Phi : ElseSucc->phis ()) {
480
+ Value *ElseVal = Phi.getIncomingValueForBlock (ElseBB);
481
+ auto *Inst = dyn_cast<Instruction>(ElseVal);
482
+ if (!Inst || !isHoistableInstruction (Inst, ElseBB, TTI))
483
+ continue ;
484
+ Inst->removeFromParent ();
485
+ Inst->insertInto (CommonDominator, Term->getIterator ());
486
+ HoistedValues[Inst] = CommonDominator;
487
+ }
488
+ }
489
+
416
490
// / Build up the general order of nodes, by performing a topological sort of the
417
491
// / parent region's nodes, while ensuring that there is no outer cycle node
418
492
// / between any two inner cycle nodes.
@@ -535,7 +609,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
535
609
BasicBlock *Other = Term->getSuccessor (!i);
536
610
if (Visited.count (Other) && !Loops.count (Other) &&
537
611
!Pred.count (Other) && !Pred.count (P)) {
538
-
612
+ hoistZeroCostElseBlockPhiValues (Succ, Other);
539
613
Pred[Other] = {BoolFalse, std::nullopt};
540
614
Pred[P] = {BoolTrue, std::nullopt};
541
615
continue ;
@@ -891,6 +965,44 @@ void StructurizeCFG::setPhiValues() {
891
965
AffectedPhis.append (InsertedPhis.begin (), InsertedPhis.end ());
892
966
}
893
967
968
+ // / Updates PHI nodes after hoisted zero cost instructions by replacing poison
969
+ // / entries on Flow nodes with the appropriate hoisted values
970
+ void StructurizeCFG::simplifyHoistedPhis () {
971
+ for (WeakVH VH : AffectedPhis) {
972
+ PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
973
+ if (!Phi || Phi->getNumIncomingValues () != 2 )
974
+ continue ;
975
+
976
+ for (int i = 0 ; i < 2 ; i++) {
977
+ Value *V = Phi->getIncomingValue (i);
978
+ auto BBIt = HoistedValues.find (V);
979
+
980
+ if (BBIt == HoistedValues.end ())
981
+ continue ;
982
+
983
+ Value *OtherV = Phi->getIncomingValue (!i);
984
+ PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
985
+ if (!OtherPhi)
986
+ continue ;
987
+
988
+ int PoisonValBBIdx = -1 ;
989
+ for (size_t i = 0 ; i < OtherPhi->getNumIncomingValues (); i++) {
990
+ if (!isa<PoisonValue>(OtherPhi->getIncomingValue (i)))
991
+ continue ;
992
+ PoisonValBBIdx = i;
993
+ break ;
994
+ }
995
+ if (PoisonValBBIdx == -1 ||
996
+ !DT->dominates (BBIt->second ,
997
+ OtherPhi->getIncomingBlock (PoisonValBBIdx)))
998
+ continue ;
999
+
1000
+ OtherPhi->setIncomingValue (PoisonValBBIdx, V);
1001
+ Phi->setIncomingValue (i, OtherV);
1002
+ }
1003
+ }
1004
+ }
1005
+
894
1006
void StructurizeCFG::simplifyAffectedPhis () {
895
1007
bool Changed;
896
1008
do {
@@ -1283,12 +1395,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1283
1395
}
1284
1396
1285
1397
// / Run the transformation for each region found
1286
- bool StructurizeCFG::run (Region *R, DominatorTree *DT) {
1398
+ bool StructurizeCFG::run (Region *R, DominatorTree *DT,
1399
+ const TargetTransformInfo *TTI) {
1287
1400
if (R->isTopLevelRegion ())
1288
1401
return false ;
1289
1402
1290
1403
this ->DT = DT;
1291
-
1404
+ this -> TTI = TTI;
1292
1405
Func = R->getEntry ()->getParent ();
1293
1406
assert (hasOnlySimpleTerminator (*Func) && " Unsupported block terminator." );
1294
1407
@@ -1300,6 +1413,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1300
1413
insertConditions (false );
1301
1414
insertConditions (true );
1302
1415
setPhiValues ();
1416
+ simplifyHoistedPhis ();
1303
1417
simplifyConditions ();
1304
1418
simplifyAffectedPhis ();
1305
1419
rebuildSSA ();
@@ -1349,7 +1463,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
1349
1463
bool Changed = false ;
1350
1464
DominatorTree *DT = &AM.getResult <DominatorTreeAnalysis>(F);
1351
1465
auto &RI = AM.getResult <RegionInfoAnalysis>(F);
1352
-
1466
+ TargetTransformInfo *TTI = &AM. getResult <TargetIRAnalysis>(F);
1353
1467
UniformityInfo *UI = nullptr ;
1354
1468
if (SkipUniformRegions)
1355
1469
UI = &AM.getResult <UniformityInfoAnalysis>(F);
@@ -1368,7 +1482,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
1368
1482
continue ;
1369
1483
}
1370
1484
1371
- Changed |= SCFG.run (R, DT);
1485
+ Changed |= SCFG.run (R, DT, TTI );
1372
1486
}
1373
1487
if (!Changed)
1374
1488
return PreservedAnalyses::all ();
0 commit comments