Skip to content

Commit 6307b49

Browse files
authored
[AMDGPU] Add GCNRPTarget to track register pressure against a target (#145765)
This adds the `GCNRPTarget` class which models a register pressure target (i.e., maximum number of SGPRs/VGPRS) that one can track register savings against. The only current use of this class is in the scheduler's rematerialization stage. It replaces the more ad-hoc (and now deleted) `ExcessRP` class which used to serve the same purpose. This is only NFC~ish because `GCNRPTarget` tracks VGPR usage more accurately than `ExcessRP` used to. To estimate required combined VGPR savings we now additionally take into account the number of available VGPRs in both banks (ArchVGPR and AGPR) at the time where the RP target is created, whereas we used to only consider explicit savings made from the starting RP. This makes VGPR savings estimations more accurate in cases where we allow for savings in one VGPR bank to help towards reducing pressure in another VGPR bank (see `GCNRPTarget::CombineVGPRSavings`). This is the cause for unit test changes.
1 parent 9514901 commit 6307b49

File tree

4 files changed

+266
-277
lines changed

4 files changed

+266
-277
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,69 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
361361
return LastUseMask;
362362
}
363363

364+
////////////////////////////////////////////////////////////////////////////////
365+
// GCNRPTarget
366+
367+
GCNRPTarget::GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP,
368+
bool CombineVGPRSavings)
369+
: RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
370+
const Function &F = MF.getFunction();
371+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
372+
setRegLimits(ST.getMaxNumSGPRs(F), ST.getMaxNumVGPRs(F), MF);
373+
}
374+
375+
GCNRPTarget::GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs,
376+
const MachineFunction &MF, const GCNRegPressure &RP,
377+
bool CombineVGPRSavings)
378+
: RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
379+
setRegLimits(NumSGPRs, NumVGPRs, MF);
380+
}
381+
382+
GCNRPTarget::GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
383+
const GCNRegPressure &RP, bool CombineVGPRSavings)
384+
: RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
385+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
386+
unsigned DynamicVGPRBlockSize =
387+
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
388+
setRegLimits(ST.getMaxNumSGPRs(Occupancy, /*Addressable=*/false),
389+
ST.getMaxNumVGPRs(Occupancy, DynamicVGPRBlockSize), MF);
390+
}
391+
392+
void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
393+
const MachineFunction &MF) {
394+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
395+
unsigned DynamicVGPRBlockSize =
396+
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
397+
MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
398+
MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
399+
MaxUnifiedVGPRs =
400+
ST.hasGFX90AInsts()
401+
? std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs)
402+
: 0;
403+
}
404+
405+
bool GCNRPTarget::isSaveBeneficial(Register Reg,
406+
const MachineRegisterInfo &MRI) const {
407+
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
408+
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
409+
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
410+
411+
if (SRI->isSGPRClass(RC))
412+
return RP.getSGPRNum() > MaxSGPRs;
413+
unsigned NumVGPRs =
414+
SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
415+
return isVGPRBankSaveBeneficial(NumVGPRs);
416+
}
417+
418+
bool GCNRPTarget::satisfied() const {
419+
if (RP.getSGPRNum() > MaxSGPRs)
420+
return false;
421+
if (RP.getVGPRNum(false) > MaxVGPRs &&
422+
(!CombineVGPRSavings || !satisifiesVGPRBanksTarget()))
423+
return false;
424+
return satisfiesUnifiedTarget();
425+
}
426+
364427
///////////////////////////////////////////////////////////////////////////////
365428
// GCNRPTracker
366429

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,101 @@ inline GCNRegPressure operator-(const GCNRegPressure &P1,
162162
return Diff;
163163
}
164164

165+
////////////////////////////////////////////////////////////////////////////////
166+
// GCNRPTarget
167+
168+
/// Models a register pressure target, allowing to evaluate and track register
169+
/// savings against that target from a starting \ref GCNRegPressure.
170+
class GCNRPTarget {
171+
public:
172+
/// Sets up the target such that the register pressure starting at \p RP does
173+
/// not show register spilling on function \p MF (w.r.t. the function's
174+
/// mininum target occupancy).
175+
GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP,
176+
bool CombineVGPRSavings = false);
177+
178+
/// Sets up the target such that the register pressure starting at \p RP does
179+
/// not use more than \p NumSGPRs SGPRs and \p NumVGPRs VGPRs on function \p
180+
/// MF.
181+
GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs, const MachineFunction &MF,
182+
const GCNRegPressure &RP, bool CombineVGPRSavings = false);
183+
184+
/// Sets up the target such that the register pressure starting at \p RP does
185+
/// not prevent achieving an occupancy of at least \p Occupancy on function
186+
/// \p MF.
187+
GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
188+
const GCNRegPressure &RP, bool CombineVGPRSavings = false);
189+
190+
const GCNRegPressure &getCurrentRP() const { return RP; }
191+
192+
void setRP(const GCNRegPressure &NewRP) { RP = NewRP; }
193+
194+
/// Determines whether saving virtual register \p Reg will be beneficial
195+
/// towards achieving the RP target.
196+
bool isSaveBeneficial(Register Reg, const MachineRegisterInfo &MRI) const;
197+
198+
/// Saves virtual register \p Reg with lanemask \p Mask.
199+
void saveReg(Register Reg, LaneBitmask Mask, const MachineRegisterInfo &MRI) {
200+
RP.inc(Reg, Mask, LaneBitmask::getNone(), MRI);
201+
}
202+
203+
/// Whether the current RP is at or below the defined pressure target.
204+
bool satisfied() const;
205+
206+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
207+
friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {
208+
OS << "Actual/Target: " << Target.RP.getSGPRNum() << '/' << Target.MaxSGPRs
209+
<< " SGPRs, " << Target.RP.getArchVGPRNum() << '/' << Target.MaxVGPRs
210+
<< " ArchVGPRs, " << Target.RP.getAGPRNum() << '/' << Target.MaxVGPRs
211+
<< " AGPRs";
212+
213+
if (Target.MaxUnifiedVGPRs) {
214+
OS << ", " << Target.RP.getVGPRNum(true) << '/' << Target.MaxUnifiedVGPRs
215+
<< " VGPRs (unified)";
216+
} else if (Target.CombineVGPRSavings) {
217+
OS << ", " << Target.RP.getArchVGPRNum() + Target.RP.getAGPRNum() << '/'
218+
<< 2 * Target.MaxVGPRs << " VGPRs (combined target)";
219+
}
220+
return OS;
221+
}
222+
#endif
223+
224+
private:
225+
/// Current register pressure.
226+
GCNRegPressure RP;
227+
228+
/// Target number of SGPRs.
229+
unsigned MaxSGPRs;
230+
/// Target number of ArchVGPRs and AGPRs.
231+
unsigned MaxVGPRs;
232+
/// Target number of overall VGPRs for subtargets with unified RFs. Always 0
233+
/// for subtargets with non-unified RFs.
234+
unsigned MaxUnifiedVGPRs;
235+
/// Whether we consider that the register allocator will be able to swap
236+
/// between ArchVGPRs and AGPRs by copying them to a super register class.
237+
/// Concretely, this allows savings in one of the VGPR banks to help toward
238+
/// savings in the other VGPR bank.
239+
bool CombineVGPRSavings;
240+
241+
inline bool satisifiesVGPRBanksTarget() const {
242+
assert(CombineVGPRSavings && "only makes sense with combined savings");
243+
return RP.getArchVGPRNum() + RP.getAGPRNum() <= 2 * MaxVGPRs;
244+
}
245+
246+
/// Always satisified when the subtarget doesn't have a unified RF.
247+
inline bool satisfiesUnifiedTarget() const {
248+
return !MaxUnifiedVGPRs || RP.getVGPRNum(true) <= MaxUnifiedVGPRs;
249+
}
250+
251+
inline bool isVGPRBankSaveBeneficial(unsigned NumVGPRs) const {
252+
return NumVGPRs > MaxVGPRs || !satisfiesUnifiedTarget() ||
253+
(CombineVGPRSavings && !satisifiesVGPRBanksTarget());
254+
}
255+
256+
void setRegLimits(unsigned MaxSGPRs, unsigned MaxVGPRs,
257+
const MachineFunction &MF);
258+
};
259+
165260
///////////////////////////////////////////////////////////////////////////////
166261
// GCNRPTracker
167262

@@ -370,7 +465,7 @@ getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
370465
if (!LI.hasSubRanges()) {
371466
for (auto SI : LiveIdxs)
372467
LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] =
373-
MRI.getMaxLaneMaskForVReg(Reg);
468+
MRI.getMaxLaneMaskForVReg(Reg);
374469
} else
375470
for (const auto &S : LI.subranges()) {
376471
// constrain search for subranges by indexes live at main range

0 commit comments

Comments
 (0)