Skip to content

Commit 7714e03

Browse files
committed
RegAllocGreedy: Allow last chance recolor to retry overlapping tuples
Last chance recoloring didn't try recoloring a done register with the same class since it believed there was no point. This doesn't necessarily apply if the members in that class overlap. Allow the recoloring to proceed if the assigned interfering physical register overlaps with the candidate register. This avoids an allocation failure with overlapping tuples. This testcase could be handled better, and I don't believe should reach last chance recoloring. The failure only manifests with the mutually unsatisfiable register hints to overlapping tuples. The earlier assignment decisions probably should have figured out that using these hints was a bad idea.
1 parent 0386213 commit 7714e03

File tree

2 files changed

+108
-4
lines changed

2 files changed

+108
-4
lines changed

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1833,6 +1833,18 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
18331833
return false;
18341834
}
18351835

1836+
/// Return true if the existing assignment of \p Intf overlaps, but is not the
1837+
/// same, as \p PhysReg.
1838+
static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
1839+
const VirtRegMap &VRM,
1840+
MCRegister PhysReg,
1841+
const LiveInterval &Intf) {
1842+
MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1843+
if (PhysReg == AssignedReg)
1844+
return false;
1845+
return TRI.regsOverlap(PhysReg, AssignedReg);
1846+
}
1847+
18361848
/// mayRecolorAllInterferences - Check if the virtual registers that
18371849
/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
18381850
/// recolored to free \p PhysReg.
@@ -1858,12 +1870,20 @@ bool RAGreedy::mayRecolorAllInterferences(
18581870
return false;
18591871
}
18601872
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1861-
// If Intf is done and sit on the same register class as VirtReg,
1862-
// it would not be recolorable as it is in the same state as VirtReg.
1863-
// However, if VirtReg has tied defs and Intf doesn't, then
1873+
// If Intf is done and sits on the same register class as VirtReg, it
1874+
// would not be recolorable as it is in the same state as
1875+
// VirtReg. However there are at least two exceptions.
1876+
//
1877+
// If VirtReg has tied defs and Intf doesn't, then
18641878
// there is still a point in examining if it can be recolorable.
1879+
//
1880+
// Additionally, if the register class has overlapping tuple members, it
1881+
// may still be recolorable using a different tuple. This is more likely
1882+
// if the existing assignment aliases with the candidate.
1883+
//
18651884
if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1866-
MRI->getRegClass(Intf->reg()) == CurRC) &&
1885+
MRI->getRegClass(Intf->reg()) == CurRC &&
1886+
!assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
18671887
!(hasTiedDef(MRI, VirtReg.reg()) &&
18681888
!hasTiedDef(MRI, Intf->reg()))) ||
18691889
FixedRegisters.count(Intf->reg())) {
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=greedy -o - %s | FileCheck %s
3+
4+
# This testcase is restricted to use a maximum of 24 VGPRs. It is
5+
# therefore possible to allocate a maximum of 3 vreg_256s at a
6+
# time. The apparent number of registers in the class is larger, but
7+
# each one overlaps with the next. Allocating a vreg_64 will prevent a
8+
# full vreg_256 from being live at a given point.
9+
10+
# The hints are trying to force allocation of overlapping vreg_256s
11+
# which cannot be satisfied. The last S_NOP in %bb.0 with 2 vreg_256s
12+
# and a vreg_64 use can be satisfied as long as the hints are ignored.
13+
14+
# With the resulting allocation order, this ends up using last chance
15+
# recoloring for a vreg_256. We should try to recolor for completed
16+
# virtual registers with the same class, since the existing assignment
17+
# can only be corrected by adjusting to a non-overlapping register.
18+
19+
--- |
20+
define void @recolor_impossible_hint() #0 {
21+
ret void
22+
}
23+
24+
attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
25+
---
26+
27+
---
28+
name: recolor_impossible_hint
29+
alignment: 1
30+
tracksRegLiveness: true
31+
registers:
32+
- { id: 0, class: vreg_256, preferred-register: '$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7' }
33+
- { id: 1, class: vreg_256, preferred-register: '$vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8' }
34+
- { id: 2, class: vreg_256, preferred-register: '$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9' }
35+
- { id: 3, class: vreg_256, preferred-register: '$vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10' }
36+
machineFunctionInfo:
37+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
38+
stackPtrOffsetReg: '$sgpr32'
39+
occupancy: 10
40+
body: |
41+
; CHECK-LABEL: name: recolor_impossible_hint
42+
; CHECK: bb.0:
43+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
44+
; CHECK-NEXT: {{ $}}
45+
; CHECK-NEXT: S_NOP 0, implicit-def %7, implicit-def %19, implicit-def %5
46+
; CHECK-NEXT: SI_SPILL_V256_SAVE %19, %stack.3, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.3, align 4, addrspace 5)
47+
; CHECK-NEXT: SI_SPILL_V256_SAVE %7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.1, align 4, addrspace 5)
48+
; CHECK-NEXT: SI_SPILL_V256_SAVE %5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
49+
; CHECK-NEXT: S_NOP 0, implicit-def %17
50+
; CHECK-NEXT: SI_SPILL_V256_SAVE %17, %stack.2, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.2, align 4, addrspace 5)
51+
; CHECK-NEXT: S_NOP 0, implicit-def %4
52+
; CHECK-NEXT: [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.1, align 4, addrspace 5)
53+
; CHECK-NEXT: [[SI_SPILL_V256_RESTORE1:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5)
54+
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE]], implicit [[SI_SPILL_V256_RESTORE1]], implicit %4
55+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[SI_SPILL_V256_RESTORE1]]
56+
; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec
57+
; CHECK-NEXT: {{ $}}
58+
; CHECK-NEXT: bb.1:
59+
; CHECK-NEXT: successors: %bb.2(0x80000000)
60+
; CHECK-NEXT: {{ $}}
61+
; CHECK-NEXT: S_NOP 0, implicit [[COPY]]
62+
; CHECK-NEXT: [[SI_SPILL_V256_RESTORE2:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
63+
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE2]]
64+
; CHECK-NEXT: [[SI_SPILL_V256_RESTORE3:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.2, align 4, addrspace 5)
65+
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE3]]
66+
; CHECK-NEXT: {{ $}}
67+
; CHECK-NEXT: bb.2:
68+
; CHECK-NEXT: S_ENDPGM 0
69+
bb.0:
70+
S_NOP 0, implicit-def %0:vreg_256, implicit-def %1:vreg_256, implicit-def %2:vreg_256
71+
S_NOP 0, implicit-def %3:vreg_256
72+
S_NOP 0, implicit-def %4:vreg_64
73+
S_NOP 0, implicit %0, implicit %1, implicit %4
74+
S_CBRANCH_EXECNZ %bb.3, implicit $exec
75+
76+
bb.2:
77+
S_NOP 0, implicit %1
78+
S_NOP 0, implicit %2
79+
S_NOP 0, implicit %3
80+
81+
bb.3:
82+
S_ENDPGM 0
83+
84+
...

0 commit comments

Comments
 (0)