@@ -407,6 +407,32 @@ class LoopInterchangeLegality {
407
407
SmallVector<PHINode *, 8 > InnerLoopInductions;
408
408
};
409
409
410
+ // / Manages information utilized by the profitability check for cache. The main
411
+ // / purpose of this class is to delay the computation of CacheCost until it is
412
+ // / actually needed.
413
+ class CacheCostManager {
414
+ Loop *OutermostLoop;
415
+ LoopStandardAnalysisResults *AR;
416
+ DependenceInfo *DI;
417
+
418
+ // / CacheCost for \ref OutermostLoop. Once it is computed, it is cached. Note
419
+ // / that the result can be nullptr.
420
+ std::optional<std::unique_ptr<CacheCost>> CC;
421
+
422
+ // / Maps each loop to an index representing the optimal position within the
423
+ // / loop-nest, as determined by the cache cost analysis.
424
+ DenseMap<const Loop *, unsigned > CostMap;
425
+
426
+ void computeIfUnitinialized ();
427
+
428
+ public:
429
+ CacheCostManager (Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
430
+ DependenceInfo *DI)
431
+ : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
432
+ CacheCost *getCacheCost ();
433
+ const DenseMap<const Loop *, unsigned > &getCostMap ();
434
+ };
435
+
410
436
// / LoopInterchangeProfitability checks if it is profitable to interchange the
411
437
// / loop.
412
438
class LoopInterchangeProfitability {
@@ -418,15 +444,12 @@ class LoopInterchangeProfitability {
418
444
// / Check if the loop interchange is profitable.
419
445
bool isProfitable (const Loop *InnerLoop, const Loop *OuterLoop,
420
446
unsigned InnerLoopId, unsigned OuterLoopId,
421
- CharMatrix &DepMatrix,
422
- const DenseMap<const Loop *, unsigned > &CostMap,
423
- std::unique_ptr<CacheCost> &CC);
447
+ CharMatrix &DepMatrix, CacheCostManager &CCM);
424
448
425
449
private:
426
450
int getInstrOrderCost ();
427
451
std::optional<bool > isProfitablePerLoopCacheAnalysis (
428
- const DenseMap<const Loop *, unsigned > &CostMap,
429
- std::unique_ptr<CacheCost> &CC);
452
+ const DenseMap<const Loop *, unsigned > &CostMap, CacheCost *CC);
430
453
std::optional<bool > isProfitablePerInstrOrderCost ();
431
454
std::optional<bool > isProfitableForVectorization (unsigned InnerLoopId,
432
455
unsigned OuterLoopId,
@@ -477,15 +500,15 @@ struct LoopInterchange {
477
500
LoopInfo *LI = nullptr ;
478
501
DependenceInfo *DI = nullptr ;
479
502
DominatorTree *DT = nullptr ;
480
- std::unique_ptr<CacheCost> CC = nullptr ;
503
+ LoopStandardAnalysisResults *AR = nullptr ;
481
504
482
505
// / Interface to emit optimization remarks.
483
506
OptimizationRemarkEmitter *ORE;
484
507
485
508
LoopInterchange (ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
486
- DominatorTree *DT, std::unique_ptr<CacheCost> &CC ,
509
+ DominatorTree *DT, LoopStandardAnalysisResults *AR ,
487
510
OptimizationRemarkEmitter *ORE)
488
- : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC) ), ORE(ORE) {}
511
+ : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR ), ORE(ORE) {}
489
512
490
513
bool run (Loop *L) {
491
514
if (L->getParentLoop ())
@@ -540,19 +563,7 @@ struct LoopInterchange {
540
563
}
541
564
542
565
unsigned SelecLoopId = selectLoopForInterchange (LoopList);
543
- // Obtain the loop vector returned from loop cache analysis beforehand,
544
- // and put each <Loop, index> pair into a map for constant time query
545
- // later. Indices in loop vector reprsent the optimal order of the
546
- // corresponding loop, e.g., given a loopnest with depth N, index 0
547
- // indicates the loop should be placed as the outermost loop and index N
548
- // indicates the loop should be placed as the innermost loop.
549
- //
550
- // For the old pass manager CacheCost would be null.
551
- DenseMap<const Loop *, unsigned > CostMap;
552
- if (CC != nullptr ) {
553
- for (const auto &[Idx, Cost] : enumerate(CC->getLoopCosts ()))
554
- CostMap[Cost.first ] = Idx;
555
- }
566
+ CacheCostManager CCM (LoopList[0 ], AR, DI);
556
567
// We try to achieve the globally optimal memory access for the loopnest,
557
568
// and do interchange based on a bubble-sort fasion. We start from
558
569
// the innermost loop, move it outwards to the best possible position
@@ -561,7 +572,7 @@ struct LoopInterchange {
561
572
bool ChangedPerIter = false ;
562
573
for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
563
574
bool Interchanged =
564
- processLoop (LoopList, i, i - 1 , DependencyMatrix, CostMap );
575
+ processLoop (LoopList, i, i - 1 , DependencyMatrix, CCM );
565
576
ChangedPerIter |= Interchanged;
566
577
Changed |= Interchanged;
567
578
}
@@ -576,7 +587,7 @@ struct LoopInterchange {
576
587
bool processLoop (SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
577
588
unsigned OuterLoopId,
578
589
std::vector<std::vector<char >> &DependencyMatrix,
579
- const DenseMap< const Loop *, unsigned > &CostMap ) {
590
+ CacheCostManager &CCM ) {
580
591
Loop *OuterLoop = LoopList[OuterLoopId];
581
592
Loop *InnerLoop = LoopList[InnerLoopId];
582
593
LLVM_DEBUG (dbgs () << " Processing InnerLoopId = " << InnerLoopId
@@ -589,7 +600,7 @@ struct LoopInterchange {
589
600
LLVM_DEBUG (dbgs () << " Loops are legal to interchange\n " );
590
601
LoopInterchangeProfitability LIP (OuterLoop, InnerLoop, SE, ORE);
591
602
if (!LIP.isProfitable (InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
592
- DependencyMatrix, CostMap, CC )) {
603
+ DependencyMatrix, CCM )) {
593
604
LLVM_DEBUG (dbgs () << " Interchanging loops not profitable.\n " );
594
605
return false ;
595
606
}
@@ -1122,6 +1133,35 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
1122
1133
return true ;
1123
1134
}
1124
1135
1136
+ void CacheCostManager::computeIfUnitinialized () {
1137
+ if (CC.has_value ())
1138
+ return ;
1139
+
1140
+ LLVM_DEBUG (dbgs () << " Compute CacheCost.\n " );
1141
+ CC = CacheCost::getCacheCost (*OutermostLoop, *AR, *DI);
1142
+ // Obtain the loop vector returned from loop cache analysis beforehand,
1143
+ // and put each <Loop, index> pair into a map for constant time query
1144
+ // later. Indices in loop vector reprsent the optimal order of the
1145
+ // corresponding loop, e.g., given a loopnest with depth N, index 0
1146
+ // indicates the loop should be placed as the outermost loop and index N
1147
+ // indicates the loop should be placed as the innermost loop.
1148
+ //
1149
+ // For the old pass manager CacheCost would be null.
1150
+ if (*CC != nullptr )
1151
+ for (const auto &[Idx, Cost] : enumerate((*CC)->getLoopCosts ()))
1152
+ CostMap[Cost.first ] = Idx;
1153
+ }
1154
+
1155
+ CacheCost *CacheCostManager::getCacheCost () {
1156
+ computeIfUnitinialized ();
1157
+ return CC->get ();
1158
+ }
1159
+
1160
+ const DenseMap<const Loop *, unsigned > &CacheCostManager::getCostMap () {
1161
+ computeIfUnitinialized ();
1162
+ return CostMap;
1163
+ }
1164
+
1125
1165
int LoopInterchangeProfitability::getInstrOrderCost () {
1126
1166
unsigned GoodOrder, BadOrder;
1127
1167
BadOrder = GoodOrder = 0 ;
@@ -1177,8 +1217,7 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
1177
1217
1178
1218
std::optional<bool >
1179
1219
LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis (
1180
- const DenseMap<const Loop *, unsigned > &CostMap,
1181
- std::unique_ptr<CacheCost> &CC) {
1220
+ const DenseMap<const Loop *, unsigned > &CostMap, CacheCost *CC) {
1182
1221
// This is the new cost model returned from loop cache analysis.
1183
1222
// A smaller index means the loop should be placed an outer loop, and vice
1184
1223
// versa.
@@ -1246,9 +1285,7 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1246
1285
1247
1286
bool LoopInterchangeProfitability::isProfitable (
1248
1287
const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1249
- unsigned OuterLoopId, CharMatrix &DepMatrix,
1250
- const DenseMap<const Loop *, unsigned > &CostMap,
1251
- std::unique_ptr<CacheCost> &CC) {
1288
+ unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1252
1289
// isProfitable() is structured to avoid endless loop interchange. If the
1253
1290
// highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
1254
1291
// decide the profitability then, profitability check will stop and return the
@@ -1261,9 +1298,12 @@ bool LoopInterchangeProfitability::isProfitable(
1261
1298
std::optional<bool > shouldInterchange;
1262
1299
for (RuleTy RT : Profitabilities) {
1263
1300
switch (RT) {
1264
- case RuleTy::PerLoopCacheAnalysis:
1301
+ case RuleTy::PerLoopCacheAnalysis: {
1302
+ CacheCost *CC = CCM.getCacheCost ();
1303
+ const DenseMap<const Loop *, unsigned > &CostMap = CCM.getCostMap ();
1265
1304
shouldInterchange = isProfitablePerLoopCacheAnalysis (CostMap, CC);
1266
1305
break ;
1306
+ }
1267
1307
case RuleTy::PerInstrOrderCost:
1268
1308
shouldInterchange = isProfitablePerInstrOrderCost ();
1269
1309
break ;
@@ -1841,10 +1881,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1841
1881
});
1842
1882
1843
1883
DependenceInfo DI (&F, &AR.AA , &AR.SE , &AR.LI );
1844
- std::unique_ptr<CacheCost> CC =
1845
- CacheCost::getCacheCost (LN.getOutermostLoop (), AR, DI);
1846
-
1847
- if (!LoopInterchange (&AR.SE , &AR.LI , &DI, &AR.DT , CC, &ORE).run (LN))
1884
+ if (!LoopInterchange (&AR.SE , &AR.LI , &DI, &AR.DT , &AR, &ORE).run (LN))
1848
1885
return PreservedAnalyses::all ();
1849
1886
U.markLoopNestChanged (true );
1850
1887
return getLoopPassPreservedAnalyses ();
0 commit comments