Skip to content

Commit c61ae62

Browse files
committed
[PHIElimination] Reuse existing COPY in predecessor basic block
The insertion point of COPY isn't always optimal and could eventually lead to a worse block layout, see the regression test. This change affects many architectures but the amount of total instructions in the test cases seems too be slightly lower.
1 parent b7059eb commit c61ae62

File tree

141 files changed

+44573
-44147
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

141 files changed

+44573
-44147
lines changed

llvm/lib/CodeGen/PHIElimination.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/CodeGen/PHIElimination.h"
1616
#include "PHIEliminationUtils.h"
1717
#include "llvm/ADT/DenseMap.h"
18+
#include "llvm/ADT/STLExtras.h"
1819
#include "llvm/ADT/SmallPtrSet.h"
1920
#include "llvm/ADT/Statistic.h"
2021
#include "llvm/Analysis/LoopInfo.h"
@@ -541,6 +542,7 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
541542
// Now loop over all of the incoming arguments, changing them to copy into the
542543
// IncomingReg register in the corresponding predecessor basic block.
543544
SmallPtrSet<MachineBasicBlock *, 8> MBBsInsertedInto;
545+
SmallVector<MachineInstr *, 8> InsertedCopies;
544546
for (int i = NumSrcs - 1; i >= 0; --i) {
545547
Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg();
546548
unsigned SrcSubReg = MPhi->getOperand(i * 2 + 1).getSubReg();
@@ -607,6 +609,7 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
607609
NewSrcInstr = TII->createPHISourceCopy(opBlock, InsertPos, nullptr,
608610
SrcReg, SrcSubReg, IncomingReg);
609611
}
612+
InsertedCopies.emplace_back(NewSrcInstr);
610613
}
611614

612615
// We only need to update the LiveVariables kill of SrcReg if this was the
@@ -730,6 +733,38 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
730733
}
731734
}
732735

736+
// Remove redundant COPY instruction chains, which were potentially added by
737+
// the code above. This can prevent future passes from complicating the CFG
738+
// and cause a suboptimal block layout.
739+
for (MachineInstr *NewCopy : InsertedCopies) {
740+
if (NewCopy->isImplicitDef())
741+
continue;
742+
Register IncomingReg = NewCopy->getOperand(0).getReg();
743+
if (!IncomingReg.isVirtual())
744+
continue;
745+
Register SrcReg = NewCopy->getOperand(1).getReg();
746+
if (!MRI->hasOneNonDBGUse(SrcReg))
747+
continue;
748+
MachineInstr *DefMI = MRI->getUniqueVRegDef(SrcReg);
749+
if (!DefMI || !DefMI->isCopy() ||
750+
DefMI->getParent() != NewCopy->getParent())
751+
continue;
752+
auto InstrRange =
753+
make_range(std::next(DefMI->getIterator()), NewCopy->getIterator());
754+
if (any_of(InstrRange, [&](const MachineInstr &MI) {
755+
return MI.readsVirtualRegister(IncomingReg);
756+
}))
757+
continue;
758+
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
759+
const TargetRegisterClass *IncomingRC = MRI->getRegClass(IncomingReg);
760+
if (!IncomingRC->hasSuperClassEq(SrcRC))
761+
continue;
762+
MRI->replaceRegWith(SrcReg, IncomingReg);
763+
NewCopy->removeFromParent();
764+
if (LV)
765+
LV->getVarInfo(SrcReg).AliveBlocks.clear();
766+
}
767+
733768
// Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
734769
if (EliminateNow) {
735770
if (LIS)

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
118118
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
119119
; -O0-LABEL: store_atomic_i128_aligned_unordered:
120120
; -O0: bl __aarch64_cas16_relax
121-
; -O0: subs x10, x10, x11
122-
; -O0: ccmp x8, x9, #0, eq
121+
; -O0: subs x9, x0, x9
122+
; -O0: ccmp x1, x8, #0, eq
123123
;
124124
; -O1-LABEL: store_atomic_i128_aligned_unordered:
125125
; -O1: ldxp xzr, x8, [x2]
@@ -131,8 +131,8 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
131131
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
132132
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
133133
; -O0: bl __aarch64_cas16_relax
134-
; -O0: subs x10, x10, x11
135-
; -O0: ccmp x8, x9, #0, eq
134+
; -O0: subs x9, x0, x9
135+
; -O0: ccmp x1, x8, #0, eq
136136
;
137137
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
138138
; -O1: ldxp xzr, x8, [x2]
@@ -144,8 +144,8 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
144144
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
145145
; -O0-LABEL: store_atomic_i128_aligned_release:
146146
; -O0: bl __aarch64_cas16_rel
147-
; -O0: subs x10, x10, x11
148-
; -O0: ccmp x8, x9, #0, eq
147+
; -O0: subs x9, x0, x9
148+
; -O0: ccmp x1, x8, #0, eq
149149
;
150150
; -O1-LABEL: store_atomic_i128_aligned_release:
151151
; -O1: ldxp xzr, x8, [x2]
@@ -157,8 +157,8 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
157157
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
158158
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
159159
; -O0: bl __aarch64_cas16_acq_rel
160-
; -O0: subs x10, x10, x11
161-
; -O0: ccmp x8, x9, #0, eq
160+
; -O0: subs x9, x0, x9
161+
; -O0: ccmp x1, x8, #0, eq
162162
;
163163
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
164164
; -O1: ldaxp xzr, x8, [x2]

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,13 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
117117

118118
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
119119
; -O0-LABEL: store_atomic_i128_aligned_unordered:
120-
; -O0: ldxp x10, x12, [x9]
120+
; -O0: ldxp x8, x10, [x13]
121+
; -O0: cmp x8, x9
121122
; -O0: cmp x10, x11
122-
; -O0: cmp x12, x13
123-
; -O0: stxp w8, x14, x15, [x9]
124-
; -O0: stxp w8, x10, x12, [x9]
125-
; -O0: subs x12, x12, x13
126-
; -O0: ccmp x10, x11, #0, eq
123+
; -O0: stxp w12, x14, x15, [x13]
124+
; -O0: stxp w12, x8, x10, [x13]
125+
; -O0: subs x10, x10, x11
126+
; -O0: ccmp x8, x9, #0, eq
127127
;
128128
; -O1-LABEL: store_atomic_i128_aligned_unordered:
129129
; -O1: ldxp xzr, x8, [x2]
@@ -134,13 +134,13 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
134134

135135
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
136136
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
137-
; -O0: ldxp x10, x12, [x9]
137+
; -O0: ldxp x8, x10, [x13]
138+
; -O0: cmp x8, x9
138139
; -O0: cmp x10, x11
139-
; -O0: cmp x12, x13
140-
; -O0: stxp w8, x14, x15, [x9]
141-
; -O0: stxp w8, x10, x12, [x9]
142-
; -O0: subs x12, x12, x13
143-
; -O0: ccmp x10, x11, #0, eq
140+
; -O0: stxp w12, x14, x15, [x13]
141+
; -O0: stxp w12, x8, x10, [x13]
142+
; -O0: subs x10, x10, x11
143+
; -O0: ccmp x8, x9, #0, eq
144144
;
145145
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
146146
; -O1: ldxp xzr, x8, [x2]
@@ -151,13 +151,13 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
151151

152152
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
153153
; -O0-LABEL: store_atomic_i128_aligned_release:
154-
; -O0: ldxp x10, x12, [x9]
154+
; -O0: ldxp x8, x10, [x13]
155+
; -O0: cmp x8, x9
155156
; -O0: cmp x10, x11
156-
; -O0: cmp x12, x13
157-
; -O0: stlxp w8, x14, x15, [x9]
158-
; -O0: stlxp w8, x10, x12, [x9]
159-
; -O0: subs x12, x12, x13
160-
; -O0: ccmp x10, x11, #0, eq
157+
; -O0: stlxp w12, x14, x15, [x13]
158+
; -O0: stlxp w12, x8, x10, [x13]
159+
; -O0: subs x10, x10, x11
160+
; -O0: ccmp x8, x9, #0, eq
161161
;
162162
; -O1-LABEL: store_atomic_i128_aligned_release:
163163
; -O1: ldxp xzr, x8, [x2]
@@ -168,13 +168,13 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
168168

169169
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
170170
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
171-
; -O0: ldaxp x10, x12, [x9]
171+
; -O0: ldaxp x8, x10, [x13]
172+
; -O0: cmp x8, x9
172173
; -O0: cmp x10, x11
173-
; -O0: cmp x12, x13
174-
; -O0: stlxp w8, x14, x15, [x9]
175-
; -O0: stlxp w8, x10, x12, [x9]
176-
; -O0: subs x12, x12, x13
177-
; -O0: ccmp x10, x11, #0, eq
174+
; -O0: stlxp w12, x14, x15, [x13]
175+
; -O0: stlxp w12, x8, x10, [x13]
176+
; -O0: subs x10, x10, x11
177+
; -O0: ccmp x8, x9, #0, eq
178178
;
179179
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
180180
; -O1: ldaxp xzr, x8, [x2]

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,13 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
117117

118118
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
119119
; -O0-LABEL: store_atomic_i128_aligned_unordered:
120-
; -O0: ldxp x10, x12, [x9]
120+
; -O0: ldxp x8, x10, [x13]
121+
; -O0: cmp x8, x9
121122
; -O0: cmp x10, x11
122-
; -O0: cmp x12, x13
123-
; -O0: stxp w8, x14, x15, [x9]
124-
; -O0: stxp w8, x10, x12, [x9]
125-
; -O0: subs x12, x12, x13
126-
; -O0: ccmp x10, x11, #0, eq
123+
; -O0: stxp w12, x14, x15, [x13]
124+
; -O0: stxp w12, x8, x10, [x13]
125+
; -O0: subs x10, x10, x11
126+
; -O0: ccmp x8, x9, #0, eq
127127
;
128128
; -O1-LABEL: store_atomic_i128_aligned_unordered:
129129
; -O1: ldxp xzr, x8, [x2]
@@ -134,13 +134,13 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
134134

135135
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
136136
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
137-
; -O0: ldxp x10, x12, [x9]
137+
; -O0: ldxp x8, x10, [x13]
138+
; -O0: cmp x8, x9
138139
; -O0: cmp x10, x11
139-
; -O0: cmp x12, x13
140-
; -O0: stxp w8, x14, x15, [x9]
141-
; -O0: stxp w8, x10, x12, [x9]
142-
; -O0: subs x12, x12, x13
143-
; -O0: ccmp x10, x11, #0, eq
140+
; -O0: stxp w12, x14, x15, [x13]
141+
; -O0: stxp w12, x8, x10, [x13]
142+
; -O0: subs x10, x10, x11
143+
; -O0: ccmp x8, x9, #0, eq
144144
;
145145
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
146146
; -O1: ldxp xzr, x8, [x2]
@@ -151,13 +151,13 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
151151

152152
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
153153
; -O0-LABEL: store_atomic_i128_aligned_release:
154-
; -O0: ldxp x10, x12, [x9]
154+
; -O0: ldxp x8, x10, [x13]
155+
; -O0: cmp x8, x9
155156
; -O0: cmp x10, x11
156-
; -O0: cmp x12, x13
157-
; -O0: stlxp w8, x14, x15, [x9]
158-
; -O0: stlxp w8, x10, x12, [x9]
159-
; -O0: subs x12, x12, x13
160-
; -O0: ccmp x10, x11, #0, eq
157+
; -O0: stlxp w12, x14, x15, [x13]
158+
; -O0: stlxp w12, x8, x10, [x13]
159+
; -O0: subs x10, x10, x11
160+
; -O0: ccmp x8, x9, #0, eq
161161
;
162162
; -O1-LABEL: store_atomic_i128_aligned_release:
163163
; -O1: ldxp xzr, x8, [x2]
@@ -168,13 +168,13 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
168168

169169
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
170170
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
171-
; -O0: ldaxp x10, x12, [x9]
171+
; -O0: ldaxp x8, x10, [x13]
172+
; -O0: cmp x8, x9
172173
; -O0: cmp x10, x11
173-
; -O0: cmp x12, x13
174-
; -O0: stlxp w8, x14, x15, [x9]
175-
; -O0: stlxp w8, x10, x12, [x9]
176-
; -O0: subs x12, x12, x13
177-
; -O0: ccmp x10, x11, #0, eq
174+
; -O0: stlxp w12, x14, x15, [x13]
175+
; -O0: stlxp w12, x8, x10, [x13]
176+
; -O0: subs x10, x10, x11
177+
; -O0: ccmp x8, x9, #0, eq
178178
;
179179
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
180180
; -O1: ldaxp xzr, x8, [x2]

llvm/test/CodeGen/AArch64/PHIElimination-debugloc.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ body: |
3737
bb.1:
3838
%x:gpr32 = COPY $wzr
3939
; Test that the debug location is not copied into bb1!
40-
; CHECK: %3:gpr32 = COPY killed %x{{$}}
40+
; CHECK: %3:gpr32 = COPY $wzr
4141
; CHECK-LABEL: bb.2:
4242
bb.2:
4343
%y:gpr32 = PHI %x:gpr32, %bb.1, undef %undef:gpr32, %bb.0, debug-location !14

0 commit comments

Comments
 (0)