Skip to content

Commit c4a60c9

Browse files
committed
[CodeGen][ShrinkWrap] Enable PostShrinkWrap by default
This is an attempt to reland D42600 and enabling this optimisation by default. This also resolves the issue pointed out in the context of PGO build. Differential Revision: https://reviews.llvm.org/D42600
1 parent 20d6dee commit c4a60c9

39 files changed

+1228
-392
lines changed

llvm/lib/CodeGen/ShrinkWrap.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ static cl::opt<cl::boolOrDefault>
9999
EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
100100
cl::desc("enable the shrink-wrapping pass"));
101101
static cl::opt<bool> EnablePostShrinkWrapOpt(
102-
"enable-shrink-wrap-region-split", cl::init(false), cl::Hidden,
102+
"enable-shrink-wrap-region-split", cl::init(true), cl::Hidden,
103103
cl::desc("enable splitting of the restore block if possible"));
104104

105105
namespace {
@@ -635,7 +635,10 @@ bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
635635
FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
636636

637637
while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
638-
EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency()))
638+
EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency() ||
639+
/*Entry freq has been observed more than a loop block in
640+
some cases*/
641+
MLI->getLoopFor(NewSave)))
639642
NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
640643
false);
641644

llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,8 @@ define i16 @red_mla_dup_ext_u8_s8_s16(i8* noalias nocapture noundef readonly %A,
424424
; CHECK-NEXT: mov w8, wzr
425425
; CHECK-NEXT: b .LBB5_7
426426
; CHECK-NEXT: .LBB5_3:
427-
; CHECK-NEXT: mov w0, wzr
428-
; CHECK-NEXT: ret
427+
; CHECK-NEXT: mov w8, wzr
428+
; CHECK-NEXT: b .LBB5_9
429429
; CHECK-NEXT: .LBB5_4: // %vector.ph
430430
; CHECK-NEXT: and x11, x10, #0xfffffff0
431431
; CHECK-NEXT: add x8, x0, #8

llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,12 @@
66
; RUN: llc -x=mir -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s
77
; CHECK: name: compiler_pop_stack
88
; CHECK: frameInfo:
9-
; CHECK-NOT: savePoint:
10-
; CHECK-NOT: restorePoint:
9+
; CHECK: savePoint: '%bb.1'
10+
; CHECK: restorePoint: '%bb.7'
1111
; CHECK: name: compiler_pop_stack_no_memoperands
1212
; CHECK: frameInfo:
13-
; CHECK-NOT: savePoint:
14-
; CHECK-NOT: restorePoint:
15-
; CHECK: stack:
13+
; CHECK: savePoint: '%bb.1'
14+
; CHECK: restorePoint: '%bb.7'
1615
; CHECK: name: f
1716
; CHECK: frameInfo:
1817
; CHECK: savePoint: '%bb.2'

llvm/test/CodeGen/AArch64/ragreedy-csr.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,16 @@ declare i32 @__maskrune(i32, i64) #7
2121
define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly %b) #9 {
2222
; CHECK-LABEL: prune_match:
2323
; CHECK: ; %bb.0: ; %entry
24+
; CHECK-NEXT: ldrh w8, [x0]
25+
; CHECK-NEXT: ldrh w9, [x1]
26+
; CHECK-NEXT: cmp w8, w9
27+
; CHECK-NEXT: b.ne LBB0_47
28+
; CHECK-NEXT: ; %bb.1: ; %if.end
2429
; CHECK-NEXT: sub sp, sp, #64
2530
; CHECK-NEXT: .cfi_def_cfa_offset 64
2631
; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
2732
; CHECK-NEXT: .cfi_offset w30, -8
2833
; CHECK-NEXT: .cfi_offset w29, -16
29-
; CHECK-NEXT: ldrh w8, [x0]
30-
; CHECK-NEXT: ldrh w9, [x1]
31-
; CHECK-NEXT: cmp w8, w9
32-
; CHECK-NEXT: b.ne LBB0_42
33-
; CHECK-NEXT: ; %bb.1: ; %if.end
3434
; CHECK-NEXT: Lloh0:
3535
; CHECK-NEXT: adrp x14, __DefaultRuneLocale@GOTPAGE
3636
; CHECK-NEXT: mov x9, xzr
@@ -243,7 +243,7 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
243243
; CHECK-NEXT: b.eq LBB0_37
244244
; CHECK-NEXT: LBB0_42:
245245
; CHECK-NEXT: mov w0, wzr
246-
; CHECK-NEXT: LBB0_43: ; %return
246+
; CHECK-NEXT: LBB0_43:
247247
; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
248248
; CHECK-NEXT: add sp, sp, #64
249249
; CHECK-NEXT: ret
@@ -259,6 +259,12 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
259259
; CHECK-NEXT: ; %bb.46: ; %land.lhs.true52
260260
; CHECK-NEXT: cbz w8, LBB0_43
261261
; CHECK-NEXT: b LBB0_12
262+
; CHECK-NEXT: LBB0_47:
263+
; CHECK-NEXT: .cfi_def_cfa wsp, 0
264+
; CHECK-NEXT: .cfi_same_value w30
265+
; CHECK-NEXT: .cfi_same_value w29
266+
; CHECK-NEXT: mov w0, wzr
267+
; CHECK-NEXT: ret
262268
; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
263269
; CHECK-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
264270
; CHECK-NEXT: .loh AdrpLdrGot Lloh4, Lloh5

0 commit comments

Comments
 (0)