Skip to content

Commit 0629fff

Browse files
committed
[PHIElimination] Reuse existing COPY in predecessor basic block
The insertion point of COPY isn't always optimal and could eventually lead to a worse block layout, see the regression test. This change affects many architectures but the amount of total instructions in the test cases seems too be slightly lower.
1 parent bb2c23b commit 0629fff

File tree

3 files changed

+223
-1
lines changed

3 files changed

+223
-1
lines changed

llvm/lib/CodeGen/PHIElimination.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
541541
// Now loop over all of the incoming arguments, changing them to copy into the
542542
// IncomingReg register in the corresponding predecessor basic block.
543543
SmallPtrSet<MachineBasicBlock *, 8> MBBsInsertedInto;
544+
SmallVector<MachineInstr *, 8> InsertedCopies;
544545
for (int i = NumSrcs - 1; i >= 0; --i) {
545546
Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg();
546547
unsigned SrcSubReg = MPhi->getOperand(i * 2 + 1).getSubReg();
@@ -607,6 +608,7 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
607608
NewSrcInstr = TII->createPHISourceCopy(opBlock, InsertPos, nullptr,
608609
SrcReg, SrcSubReg, IncomingReg);
609610
}
611+
InsertedCopies.emplace_back(NewSrcInstr);
610612
}
611613

612614
// We only need to update the LiveVariables kill of SrcReg if this was the
@@ -730,6 +732,32 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
730732
}
731733
}
732734

735+
// Remove redundant COPY instruction chains, which were potentially added by
736+
// the code above. This can simplify the CFG which later on to prevent a
737+
// suboptimal block layout.
738+
for (MachineInstr *NewCopy : InsertedCopies) {
739+
if (NewCopy->isImplicitDef())
740+
continue;
741+
Register IncomingReg = NewCopy->getOperand(0).getReg();
742+
if (!IncomingReg.isVirtual())
743+
continue;
744+
Register SrcReg = NewCopy->getOperand(1).getReg();
745+
if (!MRI->hasOneNonDBGUse(SrcReg))
746+
continue;
747+
MachineInstr *DefMI = MRI->getUniqueVRegDef(SrcReg);
748+
if (!DefMI || !DefMI->isCopy() ||
749+
DefMI->getParent() != NewCopy->getParent())
750+
continue;
751+
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
752+
const TargetRegisterClass *IncomingRC = MRI->getRegClass(IncomingReg);
753+
if (!IncomingRC->hasSuperClassEq(SrcRC))
754+
continue;
755+
MRI->replaceRegWith(SrcReg, IncomingReg);
756+
NewCopy->removeFromParent();
757+
if (LV)
758+
LV->getVarInfo(SrcReg).AliveBlocks.clear();
759+
}
760+
733761
// Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
734762
if (EliminateNow) {
735763
if (LIS)
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -run-pass=livevars,phi-node-elimination -verify-machineinstrs -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
3+
4+
# Verify that the original COPY in bb.1 is reappropriated as the PHI source in bb.2,
5+
# instead of creating a new COPY with the same source register.
6+
7+
---
8+
name: copy_virtual_reg
9+
tracksRegLiveness: true
10+
body: |
11+
; CHECK-LABEL: name: copy_virtual_reg
12+
; CHECK: bb.0:
13+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
14+
; CHECK-NEXT: liveins: $nzcv, $w0
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: %a:gpr32 = COPY killed $w0
17+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
18+
; CHECK-NEXT: Bcc 8, %bb.2, implicit killed $nzcv
19+
; CHECK-NEXT: {{ $}}
20+
; CHECK-NEXT: bb.1:
21+
; CHECK-NEXT: successors: %bb.2(0x80000000)
22+
; CHECK-NEXT: {{ $}}
23+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = COPY killed %a
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: bb.2:
26+
; CHECK-NEXT: %c:gpr32 = COPY killed [[DEF]]
27+
; CHECK-NEXT: dead %d:gpr32 = COPY killed %c
28+
bb.0:
29+
liveins: $nzcv, $w0
30+
%a:gpr32 = COPY $w0
31+
Bcc 8, %bb.2, implicit $nzcv
32+
bb.1:
33+
%b:gpr32 = COPY %a:gpr32
34+
bb.2:
35+
%c:gpr32 = PHI %b:gpr32, %bb.1, undef %undef:gpr32, %bb.0
36+
%d:gpr32 = COPY %c:gpr32
37+
...
38+
39+
---
40+
name: copy_physical_reg
41+
tracksRegLiveness: true
42+
body: |
43+
; CHECK-LABEL: name: copy_physical_reg
44+
; CHECK: bb.0:
45+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
46+
; CHECK-NEXT: liveins: $nzcv, $w0
47+
; CHECK-NEXT: {{ $}}
48+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
49+
; CHECK-NEXT: Bcc 8, %bb.2, implicit killed $nzcv
50+
; CHECK-NEXT: {{ $}}
51+
; CHECK-NEXT: bb.1:
52+
; CHECK-NEXT: successors: %bb.2(0x80000000)
53+
; CHECK-NEXT: {{ $}}
54+
; CHECK-NEXT: dead $x0 = IMPLICIT_DEF implicit-def $w0
55+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = COPY killed $w0
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: bb.2:
58+
; CHECK-NEXT: dead %b:gpr32 = COPY killed [[DEF]]
59+
bb.0:
60+
liveins: $nzcv, $w0
61+
Bcc 8, %bb.2, implicit $nzcv
62+
bb.1:
63+
$x0 = IMPLICIT_DEF
64+
%a:gpr32 = COPY $w0
65+
bb.2:
66+
%b:gpr32 = PHI %a:gpr32, %bb.1, undef %undef:gpr32, %bb.0
67+
...
68+
69+
---
70+
name: copy_to_dead
71+
tracksRegLiveness: true
72+
body: |
73+
; CHECK-LABEL: name: copy_to_dead
74+
; CHECK: bb.0:
75+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
76+
; CHECK-NEXT: liveins: $wzr, $xzr
77+
; CHECK-NEXT: {{ $}}
78+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $wzr
79+
; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:gpr64 = COPY $xzr
80+
; CHECK-NEXT: TBZW killed [[COPY]], 0, %bb.2
81+
; CHECK-NEXT: B %bb.1
82+
; CHECK-NEXT: {{ $}}
83+
; CHECK-NEXT: bb.1:
84+
; CHECK-NEXT: successors: %bb.2(0x80000000)
85+
; CHECK-NEXT: {{ $}}
86+
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
87+
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:gpr64 = IMPLICIT_DEF
88+
; CHECK-NEXT: B %bb.2
89+
; CHECK-NEXT: {{ $}}
90+
; CHECK-NEXT: bb.2:
91+
; CHECK-NEXT: successors: %bb.1(0x80000000)
92+
; CHECK-NEXT: {{ $}}
93+
; CHECK-NEXT: dead [[DEF2:%[0-9]+]]:gpr64 = IMPLICIT_DEF
94+
; CHECK-NEXT: dead [[DEF3:%[0-9]+]]:gpr64 = IMPLICIT_DEF
95+
; CHECK-NEXT: B %bb.1
96+
bb.0:
97+
liveins: $wzr, $xzr
98+
99+
%9:gpr32 = COPY $wzr
100+
dead %5:gpr64 = COPY $xzr
101+
TBZW killed %9:gpr32, 0, %bb.2
102+
B %bb.1
103+
104+
bb.1:
105+
successors: %bb.2(0x80000000); %bb.2(100.00%)
106+
107+
dead %1:gpr64 = PHI undef %3:gpr64, %bb.2, undef %5:gpr64, %bb.0
108+
dead %2:gpr64 = PHI undef %4:gpr64, %bb.2, undef %5:gpr64, %bb.0
109+
B %bb.2
110+
111+
bb.2:
112+
successors: %bb.1(0x80000000); %bb.1(100.00%)
113+
114+
dead %3:gpr64 = PHI undef %1:gpr64, %bb.1, undef %5:gpr64, %bb.0
115+
dead %4:gpr64 = PHI undef %2:gpr64, %bb.1, undef %5:gpr64, %bb.0
116+
B %bb.1
117+
118+
...
119+
120+
---
121+
name: update_livevars
122+
tracksRegLiveness: true
123+
body: |
124+
; CHECK-LABEL: name: update_livevars
125+
; CHECK: bb.0:
126+
; CHECK-NEXT: successors: %bb.1(0x80000000)
127+
; CHECK-NEXT: liveins: $w0, $w1, $nzcv
128+
; CHECK-NEXT: {{ $}}
129+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY killed $w0
130+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY killed $w1
131+
; CHECK-NEXT: B %bb.1
132+
; CHECK-NEXT: {{ $}}
133+
; CHECK-NEXT: bb.1:
134+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
135+
; CHECK-NEXT: liveins: $nzcv
136+
; CHECK-NEXT: {{ $}}
137+
; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:gpr32 = COPY killed [[COPY1]]
138+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY]]
139+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY3]]
140+
; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
141+
; CHECK-NEXT: {{ $}}
142+
; CHECK-NEXT: bb.2:
143+
; CHECK-NEXT: successors: %bb.1(0x80000000)
144+
; CHECK-NEXT: liveins: $nzcv
145+
; CHECK-NEXT: {{ $}}
146+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY killed [[COPY3]]
147+
; CHECK-NEXT: B %bb.1
148+
bb.0:
149+
successors: %bb.1
150+
liveins: $w0, $w1, $nzcv
151+
152+
%0:gpr32 = COPY killed $w0
153+
%1:gpr32 = COPY killed $w1
154+
B %bb.1
155+
156+
bb.1:
157+
successors: %bb.2, %bb.1
158+
liveins: $nzcv
159+
160+
%2:gpr32 = PHI %3, %bb.2, %1, %bb.0, %3, %bb.1
161+
%3:gpr32 = COPY %0
162+
Bcc 1, %bb.1, implicit $nzcv
163+
164+
bb.2:
165+
successors: %bb.1
166+
liveins: $nzcv
167+
168+
B %bb.1
169+
...
170+
171+
---
172+
name: copy_subreg
173+
tracksRegLiveness: true
174+
body: |
175+
; CHECK-LABEL: name: copy_subreg
176+
; CHECK: bb.0:
177+
; CHECK-NEXT: successors: %bb.1(0x80000000)
178+
; CHECK-NEXT: liveins: $x0
179+
; CHECK-NEXT: {{ $}}
180+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY killed $x0
181+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY killed [[COPY]]
182+
; CHECK-NEXT: {{ $}}
183+
; CHECK-NEXT: bb.1:
184+
; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:gpr32 = COPY killed [[COPY1]].sub_32
185+
bb.0:
186+
successors: %bb.1
187+
liveins: $x0
188+
189+
%0:gpr64 = COPY killed $x0
190+
%1:gpr64 = COPY killed %0
191+
192+
bb.1:
193+
%2:gpr32 = PHI %1.sub_32, %bb.0
194+
...

llvm/test/CodeGen/PowerPC/vsx.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2487,7 +2487,7 @@ define double @test82(double %a, double %b, double %c, double %d) {
24872487
; CHECK-FISL-LABEL: test82:
24882488
; CHECK-FISL: # %bb.0: # %entry
24892489
; CHECK-FISL-NEXT: stfd f2, -16(r1) # 8-byte Folded Spill
2490-
; CHECK-FISL-NEXT: fmr f2, f1
2490+
; CHECK-FISL-NEXT: stfd f1, -8(r1) # 8-byte Folded Spill
24912491
; CHECK-FISL-NEXT: xscmpudp cr0, f3, f4
24922492
; CHECK-FISL-NEXT: stfd f2, -8(r1) # 8-byte Folded Spill
24932493
; CHECK-FISL-NEXT: beq cr0, .LBB67_2

0 commit comments

Comments
 (0)