Skip to content

[RegAllocBase] Produce IMPLICIT_DEF instead of COPY undef during cleanupFailedVReg #147392

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion llvm/lib/CodeGen/RegAllocBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Spiller.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/IR/DiagnosticInfo.h"
Expand Down Expand Up @@ -60,6 +61,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
LiveRegMatrix &mat) {
TRI = &vrm.getTargetRegInfo();
MRI = &vrm.getRegInfo();
TII = vrm.getMachineFunction().getSubtarget().getInstrInfo();
VRM = &vrm;
LIS = &lis;
Matrix = &mat;
Expand Down Expand Up @@ -167,9 +169,15 @@ void RegAllocBase::cleanupFailedVReg(Register FailedReg, MCRegister PhysReg,
// We still should produce valid IR. Kill all the uses and reduce the live
// ranges so that we don't think it's possible to introduce kill flags later
// which will fail the verifier.

SmallVector<MachineInstr *, 4> UndefCopies;

for (MachineOperand &MO : MRI->reg_operands(FailedReg)) {
if (MO.readsReg())
if (MO.readsReg()) {
MO.setIsUndef(true);
if (MO.getParent()->isCopy() && MO.isUse())
UndefCopies.push_back(MO.getParent());
}
}

if (!MRI->isReserved(PhysReg)) {
Expand All @@ -180,12 +188,22 @@ void RegAllocBase::cleanupFailedVReg(Register FailedReg, MCRegister PhysReg,
for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
if (MO.readsReg()) {
MO.setIsUndef(true);
if (MO.getParent()->isCopy() && MO.isUse())
UndefCopies.push_back(MO.getParent());
LIS->removeAllRegUnitsForPhysReg(MO.getReg());
}
}
}
}

// If we have produced an undef copy, convert to IMPLICIT_DEF.
for (MachineInstr *UndefCopy : UndefCopies) {
assert(UndefCopy->isCopy() && UndefCopy->getNumOperands() == 2);
const MCInstrDesc &Desc = TII->get(TargetOpcode::IMPLICIT_DEF);
UndefCopy->removeOperand(1);
UndefCopy->setDesc(Desc);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found this to be the cleanest implementation for dealing with COPY bundles

}

// Directly perform the rewrite, and do not leave it to VirtRegRewriter as
// usual. This avoids trying to manage illegal overlapping assignments in
// LiveRegMatrix.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/RegAllocBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class RegAllocBase {

protected:
const TargetRegisterInfo *TRI = nullptr;
const TargetInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
VirtRegMap *VRM = nullptr;
LiveIntervals *LIS = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
# CHECK-NEXT: early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef $vgpr2_vgpr3 {
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef $vgpr0
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = IMPLICIT_DEF {
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = IMPLICIT_DEF
# CHECK-NEXT: }
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
Expand Down Expand Up @@ -120,8 +120,8 @@ body: |
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
# CHECK-NEXT: S_NOP 0, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef $vgpr2_vgpr3 {
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef $vgpr0
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = IMPLICIT_DEF {
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = IMPLICIT_DEF
# CHECK-NEXT: }
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
Expand Down
115 changes: 115 additions & 0 deletions llvm/test/CodeGen/AMDGPU/regalloc-undef-copy-fold.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 -o - -verify-regalloc %s 2> %t.err | FileCheck %s
# RUN: FileCheck -check-prefix=ERR %s < %t.err

# Make sure there's no machine verifier error after failure.

# ERR: error: inline assembly requires more registers than available
# ERR: error: inline assembly requires more registers than available
# ERR: error: inline assembly requires more registers than available
# ERR: error: inline assembly requires more registers than available

# This testcase cannot be compiled with the enforced register
# budget. Previously, tryLastChanceRecoloring would assert here. It
# was attempting to recolor a superregister with an overlapping
# subregister over the same range.

--- |
define void @dead_copy() #0 {
ret void
}

define void @copy_kill() #0 {
ret void
}

define void @copy_subreg() #0 {
ret void
}

define void @copy_subreg2() #0 {
ret void
}

attributes #0 = { "amdgpu-num-vgpr"="6" }

...

# CHECK-LABEL: name: dead_copy
# CHECK: renamable $agpr0_agpr1_agpr2 = IMPLICIT_DEF
# CHECK: dead renamable $vgpr0_vgpr1_vgpr2 = COPY renamable $agpr0_agpr1_agpr2

---
name: dead_copy
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 11534346 /* regdef:VReg_512 */, def %0:vreg_512, 10158090 /* regdef:VReg_256 */, def %1:vreg_256, 4784138 /* regdef:VReg_128 */, def %2:vreg_128, 3670026 /* regdef:VReg_96 */, def %3:vreg_96, 3670026 /* regdef:VReg_96 */, def %4:vreg_96
%5:vreg_96 = COPY %3
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3670025 /* reguse:VReg_96 */, %3
SI_RETURN
...


# CHECK-LABEL: name: copy_kill
# CHECK: renamable $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF

---
name: copy_kill
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 11534346 /* regdef:VReg_512 */, def %0:vreg_512, 10158090 /* regdef:VReg_256 */, def %1:vreg_256, 4784138 /* regdef:VReg_128 */, def %2:vreg_128, 3670026 /* regdef:VReg_96 */, def %3:vreg_96, 3670026 /* regdef:VReg_96 */, def %4:vreg_96
%5:vreg_96 = COPY %3
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3670025 /* reguse:VReg_96 */, %5
SI_RETURN
...

# CHECK-LABEL: name: copy_subreg
# CHECK: renamable $vgpr1_vgpr2 = IMPLICIT_DEF
# CHECK: renamable $vgpr0 = COPY renamable $vgpr1
---
name: copy_subreg
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 11534346 /* regdef:VReg_512 */, def %0:vreg_512, 10158090 /* regdef:VReg_256 */, def %1:vreg_256, 4784138 /* regdef:VReg_128 */, def %2:vreg_128, 3670026 /* regdef:VReg_96 */, def %3:vreg_96, 3670026 /* regdef:VReg_96 */, def %4:vreg_96
%3.sub0 = COPY %3.sub1
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3670025 /* reguse:VReg_96 */, %3
SI_RETURN
...

# CHECK-LABEL: name: copy_subreg2
# CHECK: renamable $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF

---
name: copy_subreg2
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 11534346 /* regdef:VReg_512 */, def %0:vreg_512, 10158090 /* regdef:VReg_256 */, def %1:vreg_256, 4784138 /* regdef:VReg_128 */, def %2:vreg_128, 3670026 /* regdef:VReg_96 */, def %3:vreg_96, 3670026 /* regdef:VReg_96 */, def %4:vreg_96
undef %5.sub0:vreg_96 = COPY %3.sub0
%5.sub1_sub2:vreg_96 = COPY %3.sub1_sub2
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3670025 /* reguse:VReg_96 */, %5
SI_RETURN
...
Loading