Skip to content

Commit e933cfc

Browse files
authored
[NVPTX] Fixup NVPTXPrologEpilogPass for opt-bisect-limit (#144136)
Currently, the NVPTXPrologEpilogPass will crash if LIFETIME_START or LIFETIME_END instructions are encountered. Usually this isn't a problem since a couple earlier passes will always remove them. However, when using opt-bisect-limit crashes can occur. This can hinder debugging and reveals a potential future problem if these optimization passes change their behavior. https://cuda.godbolt.org/z/E81xxKGdb This change updates NVPTXPrologEpilogPass and NVPTXRegisterInfo::eliminateFrameIndex to gracefully handle these instructions by simply removing them. While I'm here I also did some general fixup in NVPTXPrologEpilogPass to make it look more like PrologEpilogInserter (from which it was copied).
1 parent 37b0b0f commit e933cfc

File tree

5 files changed

+117
-35
lines changed

5 files changed

+117
-35
lines changed

llvm/lib/Target/NVPTX/NVPTX.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ void initializeNVPTXAAWrapperPassPass(PassRegistry &);
7676
void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
7777
void initializeNVPTXPeepholePass(PassRegistry &);
7878
void initializeNVPTXTagInvariantLoadLegacyPassPass(PassRegistry &);
79+
void initializeNVPTXPrologEpilogPassPass(PassRegistry &);
7980

8081
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
8182
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);

llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,52 @@ class NVPTXPrologEpilogPass : public MachineFunctionPass {
4141
private:
4242
void calculateFrameObjectOffsets(MachineFunction &Fn);
4343
};
44-
}
44+
} // end anonymous namespace
4545

4646
MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
4747
return new NVPTXPrologEpilogPass();
4848
}
4949

5050
char NVPTXPrologEpilogPass::ID = 0;
5151

52+
INITIALIZE_PASS(NVPTXPrologEpilogPass, DEBUG_TYPE,
53+
"NVPTX Prologue/Epilogue Insertion", false, false)
54+
55+
static bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
56+
unsigned OpIdx) {
57+
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
58+
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
59+
if (MI.isDebugValue()) {
60+
61+
MachineOperand &Op = MI.getOperand(OpIdx);
62+
assert(MI.isDebugOperand(&Op) &&
63+
"Frame indices can only appear as a debug operand in a DBG_VALUE*"
64+
" machine instruction");
65+
Register Reg;
66+
unsigned FrameIdx = Op.getIndex();
67+
68+
StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg);
69+
Op.ChangeToRegister(Reg, false /*isDef*/);
70+
71+
const DIExpression *DIExpr = MI.getDebugExpression();
72+
if (MI.isNonListDebugValue()) {
73+
DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(),
74+
DIExpression::ApplyOffset, Offset);
75+
} else {
76+
// The debug operand at DebugOpIndex was a frame index at offset
77+
// `Offset`; now the operand has been replaced with the frame
78+
// register, we must add Offset with `register x, plus Offset`.
79+
unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op);
80+
SmallVector<uint64_t, 3> Ops;
81+
TRI.getOffsetOpcodes(Offset, Ops);
82+
DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex);
83+
}
84+
MI.getDebugExpressionOp().setMetadata(DIExpr);
85+
return true;
86+
}
87+
return false;
88+
}
89+
5290
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
5391
const TargetSubtargetInfo &STI = MF.getSubtarget();
5492
const TargetFrameLowering &TFI = *STI.getFrameLowering();
@@ -57,41 +95,27 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
5795

5896
calculateFrameObjectOffsets(MF);
5997

60-
for (MachineBasicBlock &MBB : MF) {
61-
for (MachineInstr &MI : MBB) {
62-
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
63-
if (!MI.getOperand(i).isFI())
98+
for (MachineBasicBlock &BB : MF) {
99+
for (MachineBasicBlock::iterator I = BB.end(); I != BB.begin();) {
100+
MachineInstr &MI = *std::prev(I);
101+
102+
bool RemovedMI = false;
103+
for (const auto &[Idx, Op] : enumerate(MI.operands())) {
104+
if (!Op.isFI())
64105
continue;
65106

66-
// Frame indices in debug values are encoded in a target independent
67-
// way with simply the frame index and offset rather than any
68-
// target-specific addressing mode.
69-
if (MI.isDebugValue()) {
70-
MachineOperand &Op = MI.getOperand(i);
71-
assert(
72-
MI.isDebugOperand(&Op) &&
73-
"Frame indices can only appear as a debug operand in a DBG_VALUE*"
74-
" machine instruction");
75-
Register Reg;
76-
auto Offset =
77-
TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
78-
Op.ChangeToRegister(Reg, /*isDef=*/false);
79-
const DIExpression *DIExpr = MI.getDebugExpression();
80-
if (MI.isNonListDebugValue()) {
81-
DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
82-
} else {
83-
SmallVector<uint64_t, 3> Ops;
84-
TRI.getOffsetOpcodes(Offset, Ops);
85-
unsigned OpIdx = MI.getDebugOperandIndex(&Op);
86-
DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, OpIdx);
87-
}
88-
MI.getDebugExpressionOp().setMetadata(DIExpr);
107+
if (replaceFrameIndexDebugInstr(MF, MI, Idx))
89108
continue;
90-
}
91109

92-
TRI.eliminateFrameIndex(MI, 0, i, nullptr);
110+
// Eliminate this FrameIndex operand.
111+
RemovedMI = TRI.eliminateFrameIndex(MI, 0, Idx, nullptr);
93112
Modified = true;
113+
if (RemovedMI)
114+
break;
94115
}
116+
117+
if (!RemovedMI)
118+
--I;
95119
}
96120
}
97121

llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,20 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
103103

104104
bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
105105
int SPAdj, unsigned FIOperandNum,
106-
RegScavenger *RS) const {
106+
RegScavenger *) const {
107107
assert(SPAdj == 0 && "Unexpected");
108108

109109
MachineInstr &MI = *II;
110-
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
110+
if (MI.isLifetimeMarker()) {
111+
MI.eraseFromParent();
112+
return true;
113+
}
114+
115+
const int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
111116

112-
MachineFunction &MF = *MI.getParent()->getParent();
113-
int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
114-
MI.getOperand(FIOperandNum + 1).getImm();
117+
const MachineFunction &MF = *MI.getParent()->getParent();
118+
const int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
119+
MI.getOperand(FIOperandNum + 1).getImm();
115120

116121
// Using I0 as the frame pointer
117122
MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
115115
initializeNVPTXExternalAAWrapperPass(PR);
116116
initializeNVPTXPeepholePass(PR);
117117
initializeNVPTXTagInvariantLoadLegacyPassPass(PR);
118+
initializeNVPTXPrologEpilogPassPass(PR);
118119
}
119120

120121
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
2+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
3+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
4+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
5+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
6+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
7+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
8+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
9+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
10+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
11+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
12+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
13+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
14+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
15+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
16+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
17+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
18+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
19+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
20+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
21+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
22+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
23+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
24+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
25+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
26+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
27+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
28+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
29+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
30+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
31+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
32+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
33+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
34+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
35+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
36+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
37+
38+
;; This test is intended to verify that we don't crash when -opt-bisect-limit
39+
;; is used in conjunction with lifetime markers. Previously, later passes
40+
;; would not handle these intructions correctly and relied on earlier passes
41+
;; to remove them.
42+
43+
declare void @bar(ptr)
44+
45+
define void @foo() {
46+
%p = alloca i32
47+
call void @llvm.lifetime.start(i64 4, ptr %p)
48+
call void @bar(ptr %p)
49+
call void @llvm.lifetime.end(i64 4, ptr %p)
50+
ret void
51+
}

0 commit comments

Comments
 (0)