Skip to content

Commit 5966349

Browse files
author
Jan Hubicka
committed
Fix epilogue loop profile
Fix two bugs in scale_loop_profile which crept in during my cleanups and curiously enoug did not show on the testcases we have so far. The patch also adds the missing call to cap iteration count of the vectorized loop epilogues. Vectorizer profile needs more work, but I am trying to chase out obvious bugs first so the profile quality statistics become meaningful and we can try to improve on them. Now we get: Pass dump id and name |static mismatcdynamic mismatch |in count |in count 107t cunrolli | 3 +3| 17251 +17251 116t vrp | 5 +2| 30908 +16532 118t dce | 3 -2| 17251 -13657 127t ch | 13 +10| 17251 131t dom | 39 +26| 17251 133t isolate-paths | 47 +8| 17251 134t reassoc | 49 +2| 17251 136t forwprop | 53 +4| 202501 +185250 159t cddce | 61 +8| 216211 +13710 161t ldist | 62 +1| 216211 172t ifcvt | 66 +4| 373711 +157500 173t vect | 143 +77| 9801947 +9428236 176t cunroll | 149 +6| 12006408 +2204461 183t loopdone | 146 -3| 11944469 -61939 195t fre | 142 -4| 11944469 197t dom | 141 -1| 13038435 +1093966 199t threadfull | 143 +2| 13246410 +207975 200t vrp | 145 +2| 13444579 +198169 204t dce | 143 -2| 13371315 -73264 206t sink | 141 -2| 13371315 211t cddce | 147 +6| 1337275 +1440 255t optimized | 145 -2| 1337275 256r expand | 141 -4| 13371197 -1558 258r into_cfglayout | 139 -2| 13371197 275r loop2_unroll | 143 +4| 16792056 +3420859 291r ce2 | 141 -2| 16811462 312r pro_and_epilogue | 161 +20| 16873400 +61938 315r jump2 | 167 +6| 20910158 +4036758 323r bbro | 160 -7| 16559844 -4350314 Vect still introduces 77 profile mismatches (same as without this patch) however subsequent cunroll works much better with 6 new mismatches compared to 78. Overall it reduces 229 mismatches to 160. Also overall runtime estimate is now reduced by 6.9%. Previously the overall runtime estimate grew by 11% which was result of the fat that the epilogue profile was pretty much the same as profile of the original loop. Bootstrapped/regtested x86_64-linux, comitted. gcc/ChangeLog: * cfgloopmanip.cc (scale_loop_profile): Fix computation of count_in and scaling blocks after exit. * tree-vect-loop-manip.cc (vect_do_peeling): Scale loop profile of the epilogue if bound is known. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/vect-profile-upate.c: New test.
1 parent 6f428f1 commit 5966349

File tree

3 files changed

+21
-5
lines changed

3 files changed

+21
-5
lines changed

gcc/cfgloopmanip.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -548,18 +548,23 @@ scale_loop_profile (class loop *loop, profile_probability p,
548548
profile_count count_in = profile_count::zero ();
549549
edge e;
550550
edge_iterator ei;
551+
bool found_latch = false;
551552
FOR_EACH_EDGE (e, ei, loop->header->preds)
552-
count_in += e->count ();
553+
if (e->src != loop->latch)
554+
count_in += e->count ();
555+
else
556+
found_latch = true;
557+
gcc_checking_assert (found_latch);
553558

554559
/* Now scale the loop body so header count is
555560
count_in * (iteration_bound + 1) */
556561
profile_probability scale_prob
557-
= (count_in *= iteration_bound).probability_in (loop->header->count);
562+
= (count_in * (iteration_bound + 1)).probability_in (loop->header->count);
558563
if (dump_file && (dump_flags & TDF_DETAILS))
559564
{
560565
fprintf (dump_file, ";; Scaling loop %i with scale ",
561566
loop->num);
562-
p.dump (dump_file);
567+
scale_prob.dump (dump_file);
563568
fprintf (dump_file, " to reach upper bound %i\n",
564569
(int)iteration_bound);
565570
}
@@ -593,7 +598,6 @@ scale_loop_profile (class loop *loop, profile_probability p,
593598
bool found = false;
594599
FOR_EACH_EDGE (e, ei, exit_edge->src->succs)
595600
if (!(e->flags & EDGE_FAKE)
596-
&& !(e->probability == profile_probability::never ())
597601
&& !loop_exit_edge_p (loop, e))
598602
{
599603
if (found)
@@ -617,7 +621,8 @@ scale_loop_profile (class loop *loop, profile_probability p,
617621
for (unsigned int i = 0; i < loop->num_nodes; i++)
618622
if (body[i] != exit_edge->src
619623
&& dominated_by_p (CDI_DOMINATORS, body[i], exit_edge->src))
620-
body[i]->count.apply_scale (new_count, old_count);
624+
body[i]->count = body[i]->count.apply_scale (new_count,
625+
old_count);
621626

622627
free (body);
623628
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-O2 -fdump-tree-optimized-details-blocks" } */
3+
int a[99];
4+
void test()
5+
{
6+
for (int i = 0; i < 99; i++)
7+
a[i]++;
8+
}
9+
/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized"} } */

gcc/tree-vect-loop-manip.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3389,6 +3389,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
33893389
gcc_assert (bound != 0);
33903390
/* -1 to convert loop iterations to latch iterations. */
33913391
record_niter_bound (epilog, bound - 1, false, true);
3392+
scale_loop_profile (epilog, profile_probability::always (),
3393+
bound - 1);
33923394
}
33933395

33943396
delete_update_ssa ();

0 commit comments

Comments
 (0)