Skip to content

[AMDGPU] Examine instructions in pending queues during scheduling #147653

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 153 additions & 17 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ static cl::opt<bool> GCNTrackers(
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
cl::init(false));

static cl::opt<bool> ExaminePendingQueue(
"amdgpu-examine-pending-queue", cl::Hidden,
cl::desc(
"Examine instructions in the pending the pending queue when "
"scheduling. This makes instructions visible to heuristics that cannot "
"immediately be issued due to hardware resource constraints."),
cl::init(true));

const unsigned ScheduleMetrics::ScaleFactor = 100;

GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
Expand Down Expand Up @@ -319,17 +327,45 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
}
}

static bool shouldCheckPending(SchedBoundary &Zone,
const TargetSchedModel *SchedModel) {
const unsigned ReadyListLimit = 256;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you replace the bool flag with a value for this limit? Disable will be implied by 0

bool HasBufferedModel =
SchedModel->hasInstrSchedModel() && SchedModel->getMicroOpBufferSize();
return ExaminePendingQueue &&
Zone.Available.size() + Zone.Pending.size() <= ReadyListLimit &&
HasBufferedModel;
}

static SUnit *pickOnlyChoice(SchedBoundary &Zone,
const TargetSchedModel *SchedModel) {
if (!shouldCheckPending(Zone, SchedModel) || Zone.Pending.empty())
return Zone.pickOnlyChoice();
return nullptr;
}

#ifndef NDEBUG
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just leave it in? The body will be empty in release build anyway

void GCNSchedStrategy::printCandidateDecision(const SchedCandidate &Current,
const SchedCandidate &Preferred) {
LLVM_DEBUG(dbgs() << "Prefer:\t\t"; DAG->dumpNode(*Preferred.SU));
if (Current.SU)
LLVM_DEBUG(dbgs() << "Not:\t"; DAG->dumpNode(*Current.SU));
LLVM_DEBUG(dbgs() << "Reason:\t\t"; traceCandidate(Preferred));
Comment on lines +350 to +353
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use one debug LLVM_DEBUG({})

}
#endif

// This function is mostly cut and pasted from
// GenericScheduler::pickNodeFromQueue()
void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand,
SchedCandidate &Cand, bool &IsPending,
bool IsBottomUp) {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
IsPending = false;
if (DAG->isTrackingPressure()) {
if (!GCNTrackers) {
SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
Expand All @@ -342,8 +378,9 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
VGPRPressure = T->getPressure().getArchVGPRNum();
}
}
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
LLVM_DEBUG(dbgs() << "Available Q:\n");
ReadyQueue &AQ = Zone.Available;
for (SUnit *SU : AQ) {

SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
Expand All @@ -355,40 +392,74 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(Zone.DAG, SchedModel);
LLVM_DEBUG(printCandidateDecision(Cand, TryCand));
Cand.setBest(TryCand);
LLVM_DEBUG(traceCandidate(Cand));
}
#ifndef NDEBUG
else
printCandidateDecision(TryCand, Cand);
#endif
}

if (!shouldCheckPending(Zone, SchedModel))
return;

LLVM_DEBUG(dbgs() << "Pending Q:\n");
ReadyQueue &PQ = Zone.Pending;
for (SUnit *SU : PQ) {

SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
VGPRPressure, IsBottomUp);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
tryPendingCandidate(Cand, TryCand, ZoneArg);
if (TryCand.Reason != NoCand) {
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(Zone.DAG, SchedModel);
LLVM_DEBUG(printCandidateDecision(Cand, TryCand));
IsPending = true;
Cand.setBest(TryCand);
}
#ifndef NDEBUG
else
printCandidateDecision(TryCand, Cand);
#endif
Comment on lines +425 to +428
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't have a macro conditional else

}
}

// This function is mostly cut and pasted from
// GenericScheduler::pickNodeBidirectional()
SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode,
bool &PickedPending) {
// Schedule as far as possible in the direction of no choice. This is most
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
if (SUnit *SU = pickOnlyChoice(Bot, SchedModel)) {
IsTopNode = false;
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
if (SUnit *SU = pickOnlyChoice(Top, SchedModel)) {
IsTopNode = true;
return SU;
}
// Set the bottom-up policy based on the state of the current bottom zone and
// the instructions outside the zone, including the top zone.
// Set the bottom-up policy based on the state of the current bottom zone
// and the instructions outside the zone, including the top zone.
CandPolicy BotPolicy;
setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
// Set the top-down policy based on the state of the current top zone and
// the instructions outside the zone, including the bottom zone.
CandPolicy TopPolicy;
setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);

bool BotPending = false;
// See if BotCand is still valid (because we previously scheduled from Top).
LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
if (!BotCand.isValid() || BotCand.SU->isScheduled ||
BotCand.Policy != BotPolicy) {
BotCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,
BotPending,
/*IsBottomUp=*/true);
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
} else {
Expand All @@ -398,19 +469,22 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
BotPending,
/*IsBottomUp=*/true);
assert(TCand.SU == BotCand.SU &&
"Last pick result should correspond to re-picking right now");
}
#endif
}

bool TopPending = false;
// Check if the top Q has a better candidate.
LLVM_DEBUG(dbgs() << "Picking from Top:\n");
if (!TopCand.isValid() || TopCand.SU->isScheduled ||
TopCand.Policy != TopPolicy) {
TopCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,
TopPending,
/*IsBottomUp=*/false);
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
} else {
Expand All @@ -420,6 +494,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
SchedCandidate TCand;
TCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
TopPending,
/*IsBottomUp=*/false);
assert(TCand.SU == TopCand.SU &&
"Last pick result should correspond to re-picking right now");
Expand All @@ -430,12 +505,21 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
// Pick best from BotCand and TopCand.
LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
dbgs() << "Bot Cand: "; traceCandidate(BotCand););
SchedCandidate Cand = BotCand;
TopCand.Reason = NoCand;
tryCandidate(Cand, TopCand, nullptr);
if (TopCand.Reason != NoCand) {
Cand.setBest(TopCand);
SchedCandidate Cand = BotPending ? TopCand : BotCand;
SchedCandidate TryCand = BotPending ? BotCand : TopCand;
PickedPending = BotPending && TopPending;

TryCand.Reason = NoCand;
if (BotPending || TopPending) {
PickedPending |= tryPendingCandidate(Cand, TopCand, nullptr);
} else {
tryCandidate(Cand, TryCand, nullptr);
}

if (TryCand.Reason != NoCand) {
Cand.setBest(TryCand);
}

LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););

IsTopNode = Cand.AtTop;
Expand All @@ -450,35 +534,46 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
return nullptr;
}
bool PickedPending;
SUnit *SU;
do {
PickedPending = false;
if (RegionPolicy.OnlyTopDown) {
SU = Top.pickOnlyChoice();
SU = pickOnlyChoice(Top, SchedModel);
if (!SU) {
CandPolicy NoPolicy;
TopCand.reset(NoPolicy);
pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,
PickedPending,
/*IsBottomUp=*/false);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
SU = TopCand.SU;
}
IsTopNode = true;
} else if (RegionPolicy.OnlyBottomUp) {
SU = Bot.pickOnlyChoice();
SU = pickOnlyChoice(Bot, SchedModel);
if (!SU) {
CandPolicy NoPolicy;
BotCand.reset(NoPolicy);
pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,
PickedPending,
/*IsBottomUp=*/true);
assert(BotCand.Reason != NoCand && "failed to find a candidate");
SU = BotCand.SU;
}
IsTopNode = false;
} else {
SU = pickNodeBidirectional(IsTopNode);
SU = pickNodeBidirectional(IsTopNode, PickedPending);
}
} while (SU->isScheduled);

if (PickedPending) {
unsigned ReadyCycle = IsTopNode ? SU->TopReadyCycle : SU->BotReadyCycle;
SchedBoundary &Zone = IsTopNode ? Top : Bot;
Zone.bumpCycle(ReadyCycle);
Zone.releasePending();
}

if (SU->isTopReady())
Top.removeReady(SU);
if (SU->isBottomReady())
Expand Down Expand Up @@ -524,6 +619,47 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const {
return *std::next(CurrentStage);
}

bool GCNSchedStrategy::tryPendingCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary *Zone) const {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
return true;
}

// Bias PhysReg Defs and copies to their uses and defined respectively.
if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
return TryCand.Reason != NoCand;

// Avoid exceeding the target's limit.
if (DAG->isTrackingPressure() &&
tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
RegExcess, TRI, DAG->MF))
return TryCand.Reason != NoCand;

// Avoid increasing the max critical pressure in the scheduled region.
if (DAG->isTrackingPressure() &&
tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
TryCand, Cand, RegCritical, TRI, DAG->MF))
return TryCand.Reason != NoCand;

bool SameBoundary = Zone != nullptr;
if (SameBoundary) {
TryCand.initResourceDelta(DAG, SchedModel);
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
return TryCand.Reason != NoCand;
if (tryGreater(TryCand.ResDelta.DemandedResources,
Cand.ResDelta.DemandedResources, TryCand, Cand,
ResourceDemand))
return TryCand.Reason != NoCand;
}

return false;
}

GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
const MachineSchedContext *C, bool IsLegacyScheduler)
: GCNSchedStrategy(C) {
Expand Down
21 changes: 19 additions & 2 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,34 @@ raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
/// heuristics to determine excess/critical pressure sets.
class GCNSchedStrategy : public GenericScheduler {
protected:
SUnit *pickNodeBidirectional(bool &IsTopNode);
SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);

void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand, bool IsBottomUp);
SchedCandidate &Cand, bool &IsPending,
bool IsBottomUp);

void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI, unsigned SGPRPressure,
unsigned VGPRPressure, bool IsBottomUp);

/// Evaluates instructions in the pending queue using a subset of scheduling
/// heuristics.
///
/// Instructions that cannot be issued due to hardware constraints are placed
/// in the pending queue rather than the available queue, making them normally
/// invisible to scheduling heuristics. However, in certain scenarios (such as
/// avoiding register spilling), it may be beneficial to consider scheduling
/// these not-yet-ready instructions.
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
SchedBoundary *Zone) const;

#ifndef NDEBUG
void printCandidateDecision(const SchedCandidate &Current,
const SchedCandidate &Preferred);
#endif

std::vector<unsigned> Pressure;

std::vector<unsigned> MaxPressure;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,7 @@ define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:2040
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:2036
Expand Down Expand Up @@ -1201,7 +1202,6 @@ define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1040
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1036
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1032
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1024
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1016
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1012
Expand Down Expand Up @@ -1466,6 +1466,7 @@ define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:2040
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:2036
Expand Down Expand Up @@ -1720,7 +1721,6 @@ define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1040
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1036
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1032
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1024
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1016
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1012
Expand Down
Loading