Skip to content

Commit 25c3f64

Browse files
authored
Revert "[StructurizeCFG] Hoist and simplify zero-cost incoming else phi values" (#148016)
reverting to fix Buildbot failures.
1 parent ce571c9 commit 25c3f64

File tree

4 files changed

+11
-466
lines changed

4 files changed

+11
-466
lines changed

llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

Lines changed: 8 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "llvm/Analysis/RegionInfo.h"
2020
#include "llvm/Analysis/RegionIterator.h"
2121
#include "llvm/Analysis/RegionPass.h"
22-
#include "llvm/Analysis/TargetTransformInfo.h"
2322
#include "llvm/Analysis/UniformityAnalysis.h"
2423
#include "llvm/IR/BasicBlock.h"
2524
#include "llvm/IR/CFG.h"
@@ -129,7 +128,6 @@ struct PredInfo {
129128
using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
130129
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
131130
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
132-
using Val2BBMap = DenseMap<Value *, BasicBlock *>;
133131

134132
// A traits type that is intended to be used in graph algorithms. The graph
135133
// traits starts at an entry node, and traverses the RegionNodes that are in
@@ -281,7 +279,7 @@ class StructurizeCFG {
281279
ConstantInt *BoolTrue;
282280
ConstantInt *BoolFalse;
283281
Value *BoolPoison;
284-
const TargetTransformInfo *TTI;
282+
285283
Function *Func;
286284
Region *ParentRegion;
287285

@@ -303,12 +301,8 @@ class StructurizeCFG {
303301
PredMap LoopPreds;
304302
BranchVector LoopConds;
305303

306-
Val2BBMap HoistedValues;
307-
308304
RegionNode *PrevNode;
309305

310-
void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
311-
312306
void orderNodes();
313307

314308
void analyzeLoops(RegionNode *N);
@@ -338,8 +332,6 @@ class StructurizeCFG {
338332

339333
void simplifyAffectedPhis();
340334

341-
void simplifyHoistedPhis();
342-
343335
DebugLoc killTerminator(BasicBlock *BB);
344336

345337
void changeExit(RegionNode *Node, BasicBlock *NewExit,
@@ -367,7 +359,7 @@ class StructurizeCFG {
367359

368360
public:
369361
void init(Region *R);
370-
bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
362+
bool run(Region *R, DominatorTree *DT);
371363
bool makeUniformRegion(Region *R, UniformityInfo &UA);
372364
};
373365

@@ -393,21 +385,16 @@ class StructurizeCFGLegacyPass : public RegionPass {
393385
if (SCFG.makeUniformRegion(R, UA))
394386
return false;
395387
}
396-
Function *F = R->getEntry()->getParent();
397-
const TargetTransformInfo *TTI =
398-
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
399388
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
400-
return SCFG.run(R, DT, TTI);
389+
return SCFG.run(R, DT);
401390
}
402391

403392
StringRef getPassName() const override { return "Structurize control flow"; }
404393

405394
void getAnalysisUsage(AnalysisUsage &AU) const override {
406395
if (SkipUniformRegions)
407396
AU.addRequired<UniformityInfoWrapperPass>();
408-
AU.addRequired<TargetTransformInfoWrapperPass>();
409397
AU.addRequired<DominatorTreeWrapperPass>();
410-
AU.addRequired<TargetTransformInfoWrapperPass>();
411398

412399
AU.addPreserved<DominatorTreeWrapperPass>();
413400
RegionPass::getAnalysisUsage(AU);
@@ -416,34 +403,6 @@ class StructurizeCFGLegacyPass : public RegionPass {
416403

417404
} // end anonymous namespace
418405

419-
/// Checks whether an instruction is zero cost instruction and checks if the
420-
/// operands are from different BB. If so, this instruction can be coalesced
421-
/// if its hoisted to predecessor block. So, this returns true.
422-
static bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
423-
const TargetTransformInfo *TTI) {
424-
if (I->getParent() != BB)
425-
return false;
426-
427-
// If the instruction is not a zero cost instruction, return false.
428-
auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
429-
InstructionCost::CostType CostVal =
430-
Cost.isValid()
431-
? Cost.getValue()
432-
: (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
433-
if (CostVal != 0)
434-
return false;
435-
436-
// Check if any operands are instructions defined in the same block.
437-
for (auto &Op : I->operands()) {
438-
if (auto *OpI = dyn_cast<Instruction>(Op)) {
439-
if (OpI->getParent() == BB)
440-
return false;
441-
}
442-
}
443-
444-
return true;
445-
}
446-
447406
char StructurizeCFGLegacyPass::ID = 0;
448407

449408
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
@@ -454,39 +413,6 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
454413
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
455414
"Structurize the CFG", false, false)
456415

457-
/// Structurization can introduce unnecessary VGPR copies due to register
458-
/// coalescing interference. For example, if the Else block has a zero-cost
459-
/// instruction and the Then block modifies the VGPR value, only one value is
460-
/// live at a time in merge block before structurization. After structurization,
461-
/// the coalescer may incorrectly treat the Then value as live in the Else block
462-
/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
463-
///
464-
/// This function examines phi nodes whose incoming values are zero-cost
465-
/// instructions in the Else block. It identifies such values that can be safely
466-
/// hoisted and moves them to the nearest common dominator of Then and Else
467-
/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
468-
/// value to poison phi nodes along the if→flow edge, aiding register coalescing
469-
/// and minimizing unnecessary live ranges.
470-
void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
471-
BasicBlock *ThenBB) {
472-
473-
BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
474-
BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
475-
476-
if (!ElseSucc || !CommonDominator)
477-
return;
478-
Instruction *Term = CommonDominator->getTerminator();
479-
for (PHINode &Phi : ElseSucc->phis()) {
480-
Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
481-
auto *Inst = dyn_cast<Instruction>(ElseVal);
482-
if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI))
483-
continue;
484-
Inst->removeFromParent();
485-
Inst->insertInto(CommonDominator, Term->getIterator());
486-
HoistedValues[Inst] = CommonDominator;
487-
}
488-
}
489-
490416
/// Build up the general order of nodes, by performing a topological sort of the
491417
/// parent region's nodes, while ensuring that there is no outer cycle node
492418
/// between any two inner cycle nodes.
@@ -609,7 +535,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
609535
BasicBlock *Other = Term->getSuccessor(!i);
610536
if (Visited.count(Other) && !Loops.count(Other) &&
611537
!Pred.count(Other) && !Pred.count(P)) {
612-
hoistZeroCostElseBlockPhiValues(Succ, Other);
538+
613539
Pred[Other] = {BoolFalse, std::nullopt};
614540
Pred[P] = {BoolTrue, std::nullopt};
615541
continue;
@@ -965,44 +891,6 @@ void StructurizeCFG::setPhiValues() {
965891
AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
966892
}
967893

968-
/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
969-
/// entries on Flow nodes with the appropriate hoisted values
970-
void StructurizeCFG::simplifyHoistedPhis() {
971-
for (WeakVH VH : AffectedPhis) {
972-
PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
973-
if (!Phi || Phi->getNumIncomingValues() != 2)
974-
continue;
975-
976-
for (int i = 0; i < 2; i++) {
977-
Value *V = Phi->getIncomingValue(i);
978-
auto BBIt = HoistedValues.find(V);
979-
980-
if (BBIt == HoistedValues.end())
981-
continue;
982-
983-
Value *OtherV = Phi->getIncomingValue(!i);
984-
PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
985-
if (!OtherPhi)
986-
continue;
987-
988-
int PoisonValBBIdx = -1;
989-
for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
990-
if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
991-
continue;
992-
PoisonValBBIdx = i;
993-
break;
994-
}
995-
if (PoisonValBBIdx == -1 ||
996-
!DT->dominates(BBIt->second,
997-
OtherPhi->getIncomingBlock(PoisonValBBIdx)))
998-
continue;
999-
1000-
OtherPhi->setIncomingValue(PoisonValBBIdx, V);
1001-
Phi->setIncomingValue(i, OtherV);
1002-
}
1003-
}
1004-
}
1005-
1006894
void StructurizeCFG::simplifyAffectedPhis() {
1007895
bool Changed;
1008896
do {
@@ -1395,13 +1283,12 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
13951283
}
13961284

13971285
/// Run the transformation for each region found
1398-
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
1399-
const TargetTransformInfo *TTI) {
1286+
bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
14001287
if (R->isTopLevelRegion())
14011288
return false;
14021289

14031290
this->DT = DT;
1404-
this->TTI = TTI;
1291+
14051292
Func = R->getEntry()->getParent();
14061293
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
14071294

@@ -1413,7 +1300,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
14131300
insertConditions(false);
14141301
insertConditions(true);
14151302
setPhiValues();
1416-
simplifyHoistedPhis();
14171303
simplifyConditions();
14181304
simplifyAffectedPhis();
14191305
rebuildSSA();
@@ -1463,7 +1349,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
14631349
bool Changed = false;
14641350
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
14651351
auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1466-
TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
1352+
14671353
UniformityInfo *UI = nullptr;
14681354
if (SkipUniformRegions)
14691355
UI = &AM.getResult<UniformityInfoAnalysis>(F);
@@ -1482,7 +1368,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
14821368
continue;
14831369
}
14841370

1485-
Changed |= SCFG.run(R, DT, TTI);
1371+
Changed |= SCFG.run(R, DT);
14861372
}
14871373
if (!Changed)
14881374
return PreservedAnalyses::all();

llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9851,8 +9851,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
98519851
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s6
98529852
; CHECK-NEXT: s_cbranch_execz .LBB8_6
98539853
; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
9854-
; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
98559854
; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
9855+
; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
98569856
; CHECK-NEXT: s_movk_i32 s4, 0xf800
98579857
; CHECK-NEXT: s_mov_b32 s5, -1
98589858
; CHECK-NEXT: .LBB8_5: ; %memmove_bwd_loop
@@ -11167,8 +11167,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
1116711167
; ALIGNED-NEXT: s_andn2_saveexec_b32 s6, s6
1116811168
; ALIGNED-NEXT: s_cbranch_execz .LBB8_6
1116911169
; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
11170-
; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
1117111170
; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
11171+
; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
1117211172
; ALIGNED-NEXT: s_movk_i32 s4, 0xf800
1117311173
; ALIGNED-NEXT: s_mov_b32 s5, -1
1117411174
; ALIGNED-NEXT: .LBB8_5: ; %memmove_bwd_loop
@@ -12381,8 +12381,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
1238112381
; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2024
1238212382
; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2020
1238312383
; UNROLL3-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:2016
12384-
; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1
1238512384
; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0
12385+
; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1
1238612386
; UNROLL3-NEXT: s_waitcnt vmcnt(3)
1238712387
; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2028
1238812388
; UNROLL3-NEXT: s_waitcnt vmcnt(2)

0 commit comments

Comments
 (0)