@@ -407,9 +407,6 @@ static constexpr uint32_t MemCheckBypassWeights[] = {1, 127};
407
407
// Likelyhood of bypassing the vectorized loop because there are zero trips left
408
408
// after prolog. See `emitIterationCountCheck`.
409
409
static constexpr uint32_t MinItersBypassWeights[] = {1 , 127 };
410
- // Likelyhood of bypassing the vectorized loop because of zero trips necessary.
411
- // See `emitMinimumVectorEpilogueIterCountCheck`.
412
- static constexpr uint32_t EpilogueMinItersBypassWeights[] = {1 , 127 };
413
410
414
411
// / A helper function that returns true if the given type is irregular. The
415
412
// / type is irregular if its allocated size doesn't equal the store size of an
@@ -3163,9 +3160,8 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() {
3163
3160
// Assume that `Count % VectorTripCount` is equally distributed.
3164
3161
unsigned TripCount = UF * VF.getKnownMinValue ();
3165
3162
assert (TripCount > 0 && " trip count should not be zero" );
3166
- MDBuilder MDB (ScalarLatchTerm->getContext ());
3167
- MDNode *BranchWeights = MDB.createBranchWeights (1 , TripCount - 1 );
3168
- BI.setMetadata (LLVMContext::MD_prof, BranchWeights);
3163
+ const uint32_t Weights[] = {1 , TripCount - 1 };
3164
+ setBranchWeights (BI, Weights);
3169
3165
}
3170
3166
}
3171
3167
@@ -8093,8 +8089,19 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
8093
8089
8094
8090
BranchInst &BI =
8095
8091
*BranchInst::Create (Bypass, LoopVectorPreHeader, CheckMinIters);
8096
- if (hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ()))
8097
- setBranchWeights (BI, EpilogueMinItersBypassWeights);
8092
+ if (hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
8093
+ unsigned MainLoopStep = UF * VF.getKnownMinValue ();
8094
+ unsigned EpilogueLoopStep =
8095
+ EPI.EpilogueUF * EPI.EpilogueVF .getKnownMinValue ();
8096
+ // We assume the remaining `Count` is equally distributed in
8097
+ // [0, MainLoopStep)
8098
+ // So the probability for `Count < EpilogueLoopStep` should be
8099
+ // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
8100
+ unsigned EstimatedSkipCount = std::min (MainLoopStep, EpilogueLoopStep);
8101
+ const uint32_t Weights[] = {EstimatedSkipCount,
8102
+ MainLoopStep - EstimatedSkipCount};
8103
+ setBranchWeights (BI, Weights);
8104
+ }
8098
8105
ReplaceInstWithInst (Insert->getTerminator (), &BI);
8099
8106
8100
8107
LoopBypassBlocks.push_back (Insert);
0 commit comments