Skip to content

Commit 258a425

Browse files
committed
[ScheduleDAGRRList] Recompute topological ordering on demand.
Currently there is a single point in ScheduleDAGRRList, where we actually query the topological order (besides init code). Currently we are recomputing the order after adding a node (which does not have predecessors) and then we add predecessors edge-by-edge. We can avoid adding edges one-by-one after we added a new node. In that case, we can just rebuild the order from scratch after adding the edges to the DAG and avoid all the updates to the ordering. Also, we can delay updating the DAG until we query the DAG, if we keep a list of added edges. Depending on the number of updates, we can either apply them when needed or recompute the order from scratch. This brings down the geomean compile time for of CTMark with -O1 down 0.3% on X86, with no regressions. Reviewers: MatzeB, atrick, efriedma, niravd, paquette Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D60125 llvm-svn: 358583
1 parent 491ff04 commit 258a425

File tree

3 files changed

+87
-24
lines changed

3 files changed

+87
-24
lines changed

llvm/include/llvm/CodeGen/ScheduleDAG.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,12 @@ class TargetRegisterInfo;
691691
std::vector<SUnit> &SUnits;
692692
SUnit *ExitSU;
693693

694+
// Have any new nodes been added?
695+
bool Dirty = false;
696+
697+
// Outstanding added edges, that have not been applied to the ordering.
698+
SmallVector<std::pair<SUnit *, SUnit *>, 16> Updates;
699+
694700
/// Maps topological index to the node number.
695701
std::vector<int> Index2Node;
696702
/// Maps the node number to its topological index.
@@ -710,6 +716,11 @@ class TargetRegisterInfo;
710716
/// Assigns the topological index to the node n.
711717
void Allocate(int n, int index);
712718

719+
/// Fix the ordering, by either recomputing from scratch or by applying
720+
/// any outstanding updates. Uses a heuristic to estimate what will be
721+
/// cheaper.
722+
void FixOrder();
723+
713724
public:
714725
ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits, SUnit *ExitSU);
715726

@@ -734,11 +745,19 @@ class TargetRegisterInfo;
734745
/// added from SUnit \p X to SUnit \p Y.
735746
void AddPred(SUnit *Y, SUnit *X);
736747

748+
/// Queues an update to the topological ordering to accommodate an edge to
749+
/// be added from SUnit \p X to SUnit \p Y.
750+
void AddPredQueued(SUnit *Y, SUnit *X);
751+
737752
/// Updates the topological ordering to accommodate an an edge to be
738753
/// removed from the specified node \p N from the predecessors of the
739754
/// current node \p M.
740755
void RemovePred(SUnit *M, SUnit *N);
741756

757+
/// Mark the ordering as temporarily broken, after a new node has been
758+
/// added.
759+
void MarkDirty() { Dirty = true; }
760+
742761
typedef std::vector<int>::iterator iterator;
743762
typedef std::vector<int>::const_iterator const_iterator;
744763
iterator begin() { return Index2Node.begin(); }

llvm/lib/CodeGen/ScheduleDAG.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,11 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
462462
// On insertion of the edge X->Y, the algorithm first marks by calling DFS
463463
// the nodes reachable from Y, and then shifts them using Shift to lie
464464
// immediately after X in Index2Node.
465+
466+
// Cancel pending updates, mark as valid.
467+
Dirty = false;
468+
Updates.clear();
469+
465470
unsigned DAGSize = SUnits.size();
466471
std::vector<SUnit*> WorkList;
467472
WorkList.reserve(DAGSize);
@@ -515,6 +520,31 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
515520
#endif
516521
}
517522

523+
void ScheduleDAGTopologicalSort::FixOrder() {
524+
// Recompute from scratch after new nodes have been added.
525+
if (Dirty) {
526+
InitDAGTopologicalSorting();
527+
return;
528+
}
529+
530+
// Otherwise apply updates one-by-one.
531+
for (auto &U : Updates)
532+
AddPred(U.first, U.second);
533+
Updates.clear();
534+
}
535+
536+
void ScheduleDAGTopologicalSort::AddPredQueued(SUnit *Y, SUnit *X) {
537+
// Recomputing the order from scratch is likely more efficient than applying
538+
// updates one-by-one for too many updates. The current cut-off is arbitrarily
539+
// chosen.
540+
Dirty = Dirty || Updates.size() > 10;
541+
542+
if (Dirty)
543+
return;
544+
545+
Updates.emplace_back(Y, X);
546+
}
547+
518548
void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
519549
int UpperBound, LowerBound;
520550
LowerBound = Node2Index[Y->NodeNum];
@@ -672,6 +702,7 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
672702
}
673703

674704
bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
705+
FixOrder();
675706
// Is SU reachable from TargetSU via successor edges?
676707
if (IsReachable(SU, TargetSU))
677708
return true;
@@ -684,6 +715,7 @@ bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
684715

685716
bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
686717
const SUnit *TargetSU) {
718+
FixOrder();
687719
// If insertion of the edge SU->TargetSU would create a cycle
688720
// then there is a path from TargetSU to SU.
689721
int UpperBound, LowerBound;

llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,14 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes {
219219
return Topo.WillCreateCycle(SU, TargetSU);
220220
}
221221

222+
/// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
223+
/// This returns true if this is a new predecessor.
224+
/// Does *NOT* update the topological ordering! It just queues an update.
225+
void AddPredQueued(SUnit *SU, const SDep &D) {
226+
Topo.AddPredQueued(SU, D.getSUnit());
227+
SU->addPred(D);
228+
}
229+
222230
/// AddPred - adds a predecessor edge to SUnit SU.
223231
/// This returns true if this is a new predecessor.
224232
/// Updates the topological ordering if required.
@@ -266,24 +274,22 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes {
266274
void ListScheduleBottomUp();
267275

268276
/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
269-
/// Updates the topological ordering if required.
270277
SUnit *CreateNewSUnit(SDNode *N) {
271278
unsigned NumSUnits = SUnits.size();
272279
SUnit *NewNode = newSUnit(N);
273280
// Update the topological ordering.
274281
if (NewNode->NodeNum >= NumSUnits)
275-
Topo.InitDAGTopologicalSorting();
282+
Topo.MarkDirty();
276283
return NewNode;
277284
}
278285

279286
/// CreateClone - Creates a new SUnit from an existing one.
280-
/// Updates the topological ordering if required.
281287
SUnit *CreateClone(SUnit *N) {
282288
unsigned NumSUnits = SUnits.size();
283289
SUnit *NewNode = Clone(N);
284290
// Update the topological ordering.
285291
if (NewNode->NodeNum >= NumSUnits)
286-
Topo.InitDAGTopologicalSorting();
292+
Topo.MarkDirty();
287293
return NewNode;
288294
}
289295

@@ -365,7 +371,7 @@ void ScheduleDAGRRList::Schedule() {
365371
BuildSchedGraph(nullptr);
366372

367373
LLVM_DEBUG(dump());
368-
Topo.InitDAGTopologicalSorting();
374+
Topo.MarkDirty();
369375

370376
AvailableQueue->initNodes(SUnits);
371377

@@ -1017,8 +1023,9 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
10171023
NewSU = &SUnits[N->getNodeId()];
10181024
// If NewSU has already been scheduled, we need to clone it, but this
10191025
// negates the benefit to unfolding so just return SU.
1020-
if (NewSU->isScheduled)
1026+
if (NewSU->isScheduled) {
10211027
return SU;
1028+
}
10221029
isNewN = false;
10231030
} else {
10241031
NewSU = CreateNewSUnit(N);
@@ -1071,23 +1078,23 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
10711078
for (const SDep &Pred : ChainPreds) {
10721079
RemovePred(SU, Pred);
10731080
if (isNewLoad)
1074-
AddPred(LoadSU, Pred);
1081+
AddPredQueued(LoadSU, Pred);
10751082
}
10761083
for (const SDep &Pred : LoadPreds) {
10771084
RemovePred(SU, Pred);
10781085
if (isNewLoad)
1079-
AddPred(LoadSU, Pred);
1086+
AddPredQueued(LoadSU, Pred);
10801087
}
10811088
for (const SDep &Pred : NodePreds) {
10821089
RemovePred(SU, Pred);
1083-
AddPred(NewSU, Pred);
1090+
AddPredQueued(NewSU, Pred);
10841091
}
10851092
for (SDep D : NodeSuccs) {
10861093
SUnit *SuccDep = D.getSUnit();
10871094
D.setSUnit(SU);
10881095
RemovePred(SuccDep, D);
10891096
D.setSUnit(NewSU);
1090-
AddPred(SuccDep, D);
1097+
AddPredQueued(SuccDep, D);
10911098
// Balance register pressure.
10921099
if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
10931100
!D.isCtrl() && NewSU->NumRegDefsLeft > 0)
@@ -1099,15 +1106,15 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
10991106
RemovePred(SuccDep, D);
11001107
if (isNewLoad) {
11011108
D.setSUnit(LoadSU);
1102-
AddPred(SuccDep, D);
1109+
AddPredQueued(SuccDep, D);
11031110
}
11041111
}
11051112

11061113
// Add a data dependency to reflect that NewSU reads the value defined
11071114
// by LoadSU.
11081115
SDep D(LoadSU, SDep::Data, 0);
11091116
D.setLatency(LoadSU->Latency);
1110-
AddPred(NewSU, D);
1117+
AddPredQueued(NewSU, D);
11111118

11121119
if (isNewLoad)
11131120
AvailableQueue->addNode(LoadSU);
@@ -1179,7 +1186,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
11791186
// New SUnit has the exact same predecessors.
11801187
for (SDep &Pred : SU->Preds)
11811188
if (!Pred.isArtificial())
1182-
AddPred(NewSU, Pred);
1189+
AddPredQueued(NewSU, Pred);
11831190

11841191
// Only copy scheduled successors. Cut them from old node's successor
11851192
// list and move them over.
@@ -1191,7 +1198,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
11911198
if (SuccSU->isScheduled) {
11921199
SDep D = Succ;
11931200
D.setSUnit(NewSU);
1194-
AddPred(SuccSU, D);
1201+
AddPredQueued(SuccSU, D);
11951202
D.setSUnit(SU);
11961203
DelDeps.push_back(std::make_pair(SuccSU, D));
11971204
}
@@ -1230,25 +1237,25 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
12301237
if (SuccSU->isScheduled) {
12311238
SDep D = Succ;
12321239
D.setSUnit(CopyToSU);
1233-
AddPred(SuccSU, D);
1240+
AddPredQueued(SuccSU, D);
12341241
DelDeps.push_back(std::make_pair(SuccSU, Succ));
12351242
}
12361243
else {
12371244
// Avoid scheduling the def-side copy before other successors. Otherwise
12381245
// we could introduce another physreg interference on the copy and
12391246
// continue inserting copies indefinitely.
1240-
AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1247+
AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
12411248
}
12421249
}
12431250
for (auto &DelDep : DelDeps)
12441251
RemovePred(DelDep.first, DelDep.second);
12451252

12461253
SDep FromDep(SU, SDep::Data, Reg);
12471254
FromDep.setLatency(SU->Latency);
1248-
AddPred(CopyFromSU, FromDep);
1255+
AddPredQueued(CopyFromSU, FromDep);
12491256
SDep ToDep(CopyFromSU, SDep::Data, 0);
12501257
ToDep.setLatency(CopyFromSU->Latency);
1251-
AddPred(CopyToSU, ToDep);
1258+
AddPredQueued(CopyToSU, ToDep);
12521259

12531260
AvailableQueue->updateNode(SU);
12541261
AvailableQueue->addNode(CopyFromSU);
@@ -1478,6 +1485,11 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
14781485
if (CurSU)
14791486
return CurSU;
14801487

1488+
// We query the topological order in the loop body, so make sure outstanding
1489+
// updates are applied before entering it (we only enter the loop if there
1490+
// are some interferences). If we make changes to the ordering, we exit
1491+
// the loop.
1492+
14811493
// All candidates are delayed due to live physical reg dependencies.
14821494
// Try backtracking, code duplication, or inserting cross class copies
14831495
// to resolve it.
@@ -1507,7 +1519,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
15071519
}
15081520
LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
15091521
<< ") to SU(" << TrySU->NodeNum << ")\n");
1510-
AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1522+
AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
15111523

15121524
// If one or more successors has been unscheduled, then the current
15131525
// node is no longer available.
@@ -1561,14 +1573,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
15611573
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
15621574
LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
15631575
<< " to SU #" << Copies.front()->NodeNum << "\n");
1564-
AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1576+
AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
15651577
NewDef = Copies.back();
15661578
}
15671579

15681580
LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
15691581
<< " to SU #" << TrySU->NodeNum << "\n");
15701582
LiveRegDefs[Reg] = NewDef;
1571-
AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1583+
AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
15721584
TrySU->isAvailable = false;
15731585
CurSU = NewDef;
15741586
}
@@ -3017,9 +3029,9 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
30173029
if (SuccSU != &SU) {
30183030
Edge.setSUnit(PredSU);
30193031
scheduleDAG->RemovePred(SuccSU, Edge);
3020-
scheduleDAG->AddPred(&SU, Edge);
3032+
scheduleDAG->AddPredQueued(&SU, Edge);
30213033
Edge.setSUnit(&SU);
3022-
scheduleDAG->AddPred(SuccSU, Edge);
3034+
scheduleDAG->AddPredQueued(SuccSU, Edge);
30233035
--i;
30243036
}
30253037
}
@@ -3101,7 +3113,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
31013113
LLVM_DEBUG(dbgs()
31023114
<< " Adding a pseudo-two-addr edge from SU #"
31033115
<< SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3104-
scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
3116+
scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
31053117
}
31063118
}
31073119
}

0 commit comments

Comments
 (0)