@@ -46,7 +46,7 @@ using namespace llvm;
46
46
using namespace bolt ;
47
47
48
48
namespace opts {
49
- extern cl::OptionCategory BoltCategory;
49
+
50
50
extern cl::OptionCategory BoltOptCategory;
51
51
extern cl::opt<bool > Verbosity;
52
52
@@ -92,17 +92,6 @@ int32_t ITLBPageSize;
92
92
// while smaller values result in better i-cache performance
93
93
int32_t ITLBEntries;
94
94
95
- const char * cacheKindString (bool UseGainCache, bool UseShortCallCache) {
96
- if (UseGainCache && UseShortCallCache)
97
- return " gain + short call cache" ;
98
- else if (UseGainCache)
99
- return " gain cache" ;
100
- else if (UseShortCallCache)
101
- return " short call cache" ;
102
- else
103
- return " no cache" ;
104
- }
105
-
106
95
// This class maintains adjacency information for all Clusters being
107
96
// processed. It is used to invalidate cache entries when merging
108
97
// Clusters and for visiting all neighbors of any given Cluster.
@@ -215,17 +204,16 @@ class PrecomputedResults {
215
204
Valid[Index] = true ;
216
205
}
217
206
218
- void invalidate (const AdjacencyMatrix &Adjacent, const Cluster *C) {
219
- invalidate (C);
220
- Adjacent.forallAdjacent (C, [&](const Cluster *A) { invalidate (A); });
221
- }
222
- private:
223
207
void invalidate (const Cluster *C) {
224
208
Valid.reset (C->id () * Size, (C->id () + 1 ) * Size);
209
+ for (size_t Id = 0 ; Id < Size; Id++) {
210
+ Valid.reset (Id * Size + C->id ());
211
+ }
225
212
}
226
213
214
+ private:
227
215
size_t index (const Cluster *First, const Cluster *Second) const {
228
- return ( First->id () * Size) + Second->id ();
216
+ return First->id () * Size + Second->id ();
229
217
}
230
218
231
219
size_t Size;
@@ -347,12 +335,6 @@ class HFSortPlus {
347
335
* the same cache page
348
336
*/
349
337
double shortCalls (const Cluster *Cluster) const {
350
- if (UseShortCallCache) {
351
- auto Itr = ShortCallCache.find (Cluster);
352
- if (Itr != ShortCallCache.end ())
353
- return Itr->second ;
354
- }
355
-
356
338
double Calls = 0 ;
357
339
for (auto TargetId : Cluster->targets ()) {
358
340
for (auto Succ : Cg.successors (TargetId)) {
@@ -367,10 +349,6 @@ class HFSortPlus {
367
349
}
368
350
}
369
351
370
- if (UseShortCallCache) {
371
- ShortCallCache[Cluster] = Calls;
372
- }
373
-
374
352
return Calls;
375
353
}
376
354
@@ -380,11 +358,6 @@ class HFSortPlus {
380
358
*/
381
359
double shortCalls (const Cluster *ClusterPred,
382
360
const Cluster *ClusterSucc) const {
383
- if (UseShortCallCache &&
384
- ShortCallPairCache.contains (ClusterPred, ClusterSucc)) {
385
- return ShortCallPairCache.get (ClusterPred, ClusterSucc);
386
- }
387
-
388
361
double Calls = 0 ;
389
362
for (auto TargetId : ClusterPred->targets ()) {
390
363
for (auto Succ : Cg.successors (TargetId)) {
@@ -413,10 +386,6 @@ class HFSortPlus {
413
386
}
414
387
}
415
388
416
- if (UseShortCallCache) {
417
- ShortCallPairCache.set (ClusterPred, ClusterSucc, Calls);
418
- }
419
-
420
389
return Calls;
421
390
}
422
391
@@ -434,8 +403,8 @@ class HFSortPlus {
434
403
*/
435
404
double mergeGain (const Cluster *ClusterPred,
436
405
const Cluster *ClusterSucc) const {
437
- if (UseGainCache && Cache .contains (ClusterPred, ClusterSucc)) {
438
- return Cache .get (ClusterPred, ClusterSucc);
406
+ if (UseGainCache && GainCache .contains (ClusterPred, ClusterSucc)) {
407
+ return GainCache .get (ClusterPred, ClusterSucc);
439
408
}
440
409
441
410
// cache misses on the first cluster
@@ -460,7 +429,7 @@ class HFSortPlus {
460
429
Gain /= std::min (ClusterPred->size (), ClusterSucc->size ());
461
430
462
431
if (UseGainCache) {
463
- Cache .set (ClusterPred, ClusterSucc, Gain);
432
+ GainCache .set (ClusterPred, ClusterSucc, Gain);
464
433
}
465
434
466
435
return Gain;
@@ -513,7 +482,7 @@ class HFSortPlus {
513
482
const double ProbOut =
514
483
CallsFromPred > 0 ? CallsPredSucc / CallsFromPred : 0 ;
515
484
assert (0.0 <= ProbOut && ProbOut <= 1.0 && " incorrect probability" );
516
-
485
+
517
486
// probability that the second cluster is called from the first one
518
487
const double ProbIn =
519
488
CallsToSucc > 0 ? CallsPredSucc / CallsToSucc : 0 ;
@@ -601,13 +570,12 @@ class HFSortPlus {
601
570
*/
602
571
std::vector<Cluster> run () {
603
572
DEBUG (dbgs () << " Starting hfsort+ w/"
604
- << cacheKindString (UseGainCache, UseShortCallCache )
573
+ << (UseGainCache ? " gain cache " : " no cache " )
605
574
<< " for " << Clusters.size () << " clusters "
606
575
<< " with ITLBPageSize = " << ITLBPageSize << " , "
607
576
<< " ITLBEntries = " << ITLBEntries << " , "
608
577
<< " and MergeProbability = " << opts::MergeProbability << " \n " );
609
578
610
-
611
579
// Pass 1
612
580
runPassOne ();
613
581
@@ -628,19 +596,15 @@ class HFSortPlus {
628
596
return Result;
629
597
}
630
598
631
- HFSortPlus (const CallGraph &Cg,
632
- bool UseGainCache,
633
- bool UseShortCallCache)
599
+ HFSortPlus (const CallGraph &Cg, bool UseGainCache)
634
600
: Cg(Cg),
635
601
FuncCluster (Cg.numNodes(), nullptr),
636
602
Addr(Cg.numNodes(), InvalidAddr),
637
603
TotalSamples(0.0 ),
638
604
Clusters(initializeClusters()),
639
605
Adjacent(Cg, Clusters, FuncCluster),
640
606
UseGainCache(UseGainCache),
641
- UseShortCallCache(UseShortCallCache),
642
- Cache(Clusters.size()),
643
- ShortCallPairCache(Clusters.size()) {
607
+ GainCache(Clusters.size()) {
644
608
}
645
609
private:
646
610
@@ -696,31 +660,16 @@ class HFSortPlus {
696
660
CurAddr = ((CurAddr + Align - 1 ) / Align) * Align;
697
661
}
698
662
699
- // Update caches
700
- invalidateCaches (Into);
663
+ // Invalidate all cache entries associated with cluster Into
664
+ if (UseGainCache) {
665
+ GainCache.invalidate (Into);
666
+ }
701
667
702
668
// Remove cluster From from the list of active clusters
703
669
auto Iter = std::remove (Clusters.begin (), Clusters.end (), From);
704
670
Clusters.erase (Iter, Clusters.end ());
705
671
}
706
672
707
- /*
708
- * Invalidate all cache entries associated with cluster C and its neighbors.
709
- */
710
- void invalidateCaches (const Cluster *C) {
711
- if (UseShortCallCache) {
712
- maybeErase (ShortCallCache, C);
713
- Adjacent.forallAdjacent (C,
714
- [this ](const Cluster *A) {
715
- maybeErase (ShortCallCache, A);
716
- });
717
- ShortCallPairCache.invalidate (Adjacent, C);
718
- }
719
- if (UseGainCache) {
720
- Cache.invalidate (Adjacent, C);
721
- }
722
- }
723
-
724
673
// The call graph
725
674
const CallGraph &Cg;
726
675
@@ -746,32 +695,21 @@ class HFSortPlus {
746
695
// Use cache for mergeGain results
747
696
bool UseGainCache;
748
697
749
- // Use caches for shortCalls results
750
- bool UseShortCallCache;
751
-
752
698
// A cache that keeps precomputed values of mergeGain for pairs of clusters;
753
699
// when a pair of clusters (x,y) gets merged, we need to invalidate the pairs
754
700
// containing both x and y and all clusters adjacent to x and y (and recompute
755
701
// them on the next iteration).
756
- mutable PrecomputedResults Cache;
757
-
758
- // Cache for shortCalls for a single cluster.
759
- mutable std::unordered_map<const Cluster *, double > ShortCallCache;
760
-
761
- // Cache for shortCalls for a pair of Clusters
762
- mutable PrecomputedResults ShortCallPairCache;
702
+ mutable PrecomputedResults GainCache;
763
703
};
764
704
765
705
}
766
706
767
- std::vector<Cluster> hfsortPlus (CallGraph &Cg,
768
- bool UseGainCache,
769
- bool UseShortCallCache) {
707
+ std::vector<Cluster> hfsortPlus (CallGraph &Cg, bool UseGainCache) {
770
708
// It is required that the sum of incoming arc weights is not greater
771
709
// than the number of samples for every function.
772
710
// Ensuring the call graph obeys the property before running the algorithm.
773
711
Cg.adjustArcWeights ();
774
- return HFSortPlus (Cg, UseGainCache, UseShortCallCache ).run ();
712
+ return HFSortPlus (Cg, UseGainCache).run ();
775
713
}
776
714
777
715
}}
0 commit comments