Skip to content

Commit 62fe5e4

Browse files
authored
[NFC][AMDGPU] print more info when debugging SIInsertWaitcnts pass (#144629)
1 parent 0e1aab1 commit 62fe5e4

File tree

1 file changed

+80
-32
lines changed

1 file changed

+80
-32
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 80 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -104,25 +104,38 @@ struct HardwareLimits {
104104
unsigned KmcntMax; // gfx12+ only.
105105
};
106106

107+
#define AMDGPU_DECLARE_WAIT_EVENTS(DECL) \
108+
DECL(VMEM_ACCESS) /* vmem read & write */ \
109+
DECL(VMEM_READ_ACCESS) /* vmem read */ \
110+
DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
111+
DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
112+
DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
113+
DECL(SCRATCH_WRITE_ACCESS) /* vmem write that may be scratch */ \
114+
DECL(LDS_ACCESS) /* lds read & write */ \
115+
DECL(GDS_ACCESS) /* gds read & write */ \
116+
DECL(SQ_MESSAGE) /* send message */ \
117+
DECL(SMEM_ACCESS) /* scalar-memory read & write */ \
118+
DECL(EXP_GPR_LOCK) /* export holding on its data src */ \
119+
DECL(GDS_GPR_LOCK) /* GDS holding on its data and addr src */ \
120+
DECL(EXP_POS_ACCESS) /* write to export position */ \
121+
DECL(EXP_PARAM_ACCESS) /* write to export parameter */ \
122+
DECL(VMW_GPR_LOCK) /* vmem write holding on its data src */ \
123+
DECL(EXP_LDS_ACCESS) /* read by ldsdir counting as export */
124+
125+
// clang-format off
126+
#define AMDGPU_EVENT_ENUM(Name) Name,
107127
enum WaitEventType {
108-
VMEM_ACCESS, // vector-memory read & write
109-
VMEM_READ_ACCESS, // vector-memory read
110-
VMEM_SAMPLER_READ_ACCESS, // vector-memory SAMPLER read (gfx12+ only)
111-
VMEM_BVH_READ_ACCESS, // vector-memory BVH read (gfx12+ only)
112-
VMEM_WRITE_ACCESS, // vector-memory write that is not scratch
113-
SCRATCH_WRITE_ACCESS, // vector-memory write that may be scratch
114-
LDS_ACCESS, // lds read & write
115-
GDS_ACCESS, // gds read & write
116-
SQ_MESSAGE, // send message
117-
SMEM_ACCESS, // scalar-memory read & write
118-
EXP_GPR_LOCK, // export holding on its data src
119-
GDS_GPR_LOCK, // GDS holding on its data and addr src
120-
EXP_POS_ACCESS, // write to export position
121-
EXP_PARAM_ACCESS, // write to export parameter
122-
VMW_GPR_LOCK, // vector-memory write holding on its data src
123-
EXP_LDS_ACCESS, // read by ldsdir counting as export
124-
NUM_WAIT_EVENTS,
128+
AMDGPU_DECLARE_WAIT_EVENTS(AMDGPU_EVENT_ENUM)
129+
NUM_WAIT_EVENTS
125130
};
131+
#undef AMDGPU_EVENT_ENUM
132+
133+
#define AMDGPU_EVENT_NAME(Name) #Name,
134+
static constexpr StringLiteral WaitEventTypeName[] = {
135+
AMDGPU_DECLARE_WAIT_EVENTS(AMDGPU_EVENT_NAME)
136+
};
137+
#undef AMDGPU_EVENT_NAME
138+
// clang-format on
126139

127140
// The mapping is:
128141
// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
@@ -1100,6 +1113,20 @@ void WaitcntBrackets::print(raw_ostream &OS) const {
11001113
}
11011114
OS << '\n';
11021115
}
1116+
1117+
OS << "Pending Events: ";
1118+
if (hasPendingEvent()) {
1119+
ListSeparator LS;
1120+
for (unsigned I = 0; I != NUM_WAIT_EVENTS; ++I) {
1121+
if (hasPendingEvent((WaitEventType)I)) {
1122+
OS << LS << WaitEventTypeName[I];
1123+
}
1124+
}
1125+
} else {
1126+
OS << "none";
1127+
}
1128+
OS << '\n';
1129+
11031130
OS << '\n';
11041131
}
11051132

@@ -1265,10 +1292,15 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
12651292
MachineInstr *WaitcntInstr = nullptr;
12661293
MachineInstr *WaitcntVsCntInstr = nullptr;
12671294

1295+
LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It);
1296+
12681297
for (auto &II :
12691298
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
1270-
if (II.isMetaInstruction())
1299+
LLVM_DEBUG(dbgs() << "pre-existing iter: " << II);
1300+
if (II.isMetaInstruction()) {
1301+
LLVM_DEBUG(dbgs() << "skipped meta instruction\n");
12711302
continue;
1303+
}
12721304

12731305
unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(II.getOpcode());
12741306
bool TrySimplify = Opcode != II.getOpcode() && !OptNone;
@@ -1320,9 +1352,9 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
13201352

13211353
LLVM_DEBUG(It == WaitcntInstr->getParent()->end()
13221354
? dbgs()
1323-
<< "applyPreexistingWaitcnt\n"
1355+
<< "applied pre-existing waitcnt\n"
13241356
<< "New Instr at block end: " << *WaitcntInstr << '\n'
1325-
: dbgs() << "applyPreexistingWaitcnt\n"
1357+
: dbgs() << "applied pre-existing waitcnt\n"
13261358
<< "Old Instr: " << *It
13271359
<< "New Instr: " << *WaitcntInstr << '\n');
13281360
}
@@ -1336,10 +1368,10 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
13361368
Wait.StoreCnt = ~0u;
13371369

13381370
LLVM_DEBUG(It == WaitcntVsCntInstr->getParent()->end()
1339-
? dbgs() << "applyPreexistingWaitcnt\n"
1371+
? dbgs() << "applied pre-existing waitcnt\n"
13401372
<< "New Instr at block end: " << *WaitcntVsCntInstr
13411373
<< '\n'
1342-
: dbgs() << "applyPreexistingWaitcnt\n"
1374+
: dbgs() << "applied pre-existing waitcnt\n"
13431375
<< "Old Instr: " << *It
13441376
<< "New Instr: " << *WaitcntVsCntInstr << '\n');
13451377
}
@@ -1413,10 +1445,15 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
14131445
MachineInstr *CombinedStoreDsCntInstr = nullptr;
14141446
MachineInstr *WaitInstrs[NUM_EXTENDED_INST_CNTS] = {};
14151447

1448+
LLVM_DEBUG(dbgs() << "GFX12Plus::applyPreexistingWaitcnt at: " << *It);
1449+
14161450
for (auto &II :
14171451
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
1418-
if (II.isMetaInstruction())
1452+
LLVM_DEBUG(dbgs() << "pre-existing iter: " << II);
1453+
if (II.isMetaInstruction()) {
1454+
LLVM_DEBUG(dbgs() << "skipped meta instruction\n");
14191455
continue;
1456+
}
14201457

14211458
MachineInstr **UpdatableInstr;
14221459

@@ -1486,10 +1523,10 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
14861523
Wait.DsCnt = ~0u;
14871524

14881525
LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
1489-
? dbgs() << "applyPreexistingWaitcnt\n"
1526+
? dbgs() << "applied pre-existing waitcnt\n"
14901527
<< "New Instr at block end: "
14911528
<< *CombinedLoadDsCntInstr << '\n'
1492-
: dbgs() << "applyPreexistingWaitcnt\n"
1529+
: dbgs() << "applied pre-existing waitcnt\n"
14931530
<< "Old Instr: " << *It << "New Instr: "
14941531
<< *CombinedLoadDsCntInstr << '\n');
14951532
} else {
@@ -1511,10 +1548,10 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
15111548
Wait.DsCnt = ~0u;
15121549

15131550
LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
1514-
? dbgs() << "applyPreexistingWaitcnt\n"
1551+
? dbgs() << "applied pre-existing waitcnt\n"
15151552
<< "New Instr at block end: "
15161553
<< *CombinedStoreDsCntInstr << '\n'
1517-
: dbgs() << "applyPreexistingWaitcnt\n"
1554+
: dbgs() << "applied pre-existing waitcnt\n"
15181555
<< "Old Instr: " << *It << "New Instr: "
15191556
<< *CombinedStoreDsCntInstr << '\n');
15201557
} else {
@@ -1570,10 +1607,10 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
15701607
setNoWait(Wait, CT);
15711608

15721609
LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
1573-
? dbgs() << "applyPreexistingWaitcnt\n"
1610+
? dbgs() << "applied pre-existing waitcnt\n"
15741611
<< "New Instr at block end: " << *WaitInstrs[CT]
15751612
<< '\n'
1576-
: dbgs() << "applyPreexistingWaitcnt\n"
1613+
: dbgs() << "applied pre-existing waitcnt\n"
15771614
<< "Old Instr: " << *It
15781615
<< "New Instr: " << *WaitInstrs[CT] << '\n');
15791616
} else {
@@ -2306,7 +2343,8 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
23062343
bool Modified = false;
23072344

23082345
LLVM_DEBUG({
2309-
dbgs() << "*** Block" << Block.getNumber() << " ***";
2346+
dbgs() << "*** Begin Block: ";
2347+
Block.printName(dbgs());
23102348
ScoreBrackets.dump();
23112349
});
23122350

@@ -2437,6 +2475,12 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
24372475
Modified |= generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets,
24382476
OldWaitcntInstr);
24392477

2478+
LLVM_DEBUG({
2479+
dbgs() << "*** End Block: ";
2480+
Block.printName(dbgs());
2481+
ScoreBrackets.dump();
2482+
});
2483+
24402484
return Modified;
24412485
}
24422486

@@ -2699,17 +2743,21 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
26992743
BlockInfo &SuccBI = SuccBII->second;
27002744
if (!SuccBI.Incoming) {
27012745
SuccBI.Dirty = true;
2702-
if (SuccBII <= BII)
2746+
if (SuccBII <= BII) {
2747+
LLVM_DEBUG(dbgs() << "repeat on backedge\n");
27032748
Repeat = true;
2749+
}
27042750
if (!MoveBracketsToSucc) {
27052751
MoveBracketsToSucc = &SuccBI;
27062752
} else {
27072753
SuccBI.Incoming = std::make_unique<WaitcntBrackets>(*Brackets);
27082754
}
27092755
} else if (SuccBI.Incoming->merge(*Brackets)) {
27102756
SuccBI.Dirty = true;
2711-
if (SuccBII <= BII)
2757+
if (SuccBII <= BII) {
2758+
LLVM_DEBUG(dbgs() << "repeat on backedge\n");
27122759
Repeat = true;
2760+
}
27132761
}
27142762
}
27152763
if (MoveBracketsToSucc)

0 commit comments

Comments
 (0)