Skip to content

Commit 82d7405

Browse files
authored
[AMDGPU] Use S_ADD_PC_I64 for long branches in gfx1250 (#148961)
1 parent 2d6534b commit 82d7405

File tree

7 files changed

+695
-6
lines changed

7 files changed

+695
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2667,6 +2667,9 @@ def HasDefaultComponentBroadcast
26672667
def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">,
26682668
AssemblerPredicate<(all_of FeatureDsSrc2Insts)>;
26692669

2670+
def HasAddPC64Inst : Predicate<"Subtarget->hasAddPC64Inst()">,
2671+
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
2672+
26702673
def EnableFlatScratch : Predicate<"Subtarget->enableFlatScratch()">;
26712674

26722675
def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">;

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,6 +1377,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13771377
return HasMinimum3Maximum3F16;
13781378
}
13791379

1380+
bool hasAddPC64Inst() const { return GFX1250Insts; }
1381+
13801382
bool hasMinimum3Maximum3PKF16() const {
13811383
return HasMinimum3Maximum3PKF16;
13821384
}

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2930,9 +2930,9 @@ bool SIInstrInfo::findCommutedOpIndices(const MCInstrDesc &Desc,
29302930

29312931
bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
29322932
int64_t BrOffset) const {
2933-
// BranchRelaxation should never have to check s_setpc_b64 because its dest
2934-
// block is unanalyzable.
2935-
assert(BranchOp != AMDGPU::S_SETPC_B64);
2933+
// BranchRelaxation should never have to check s_setpc_b64 or s_add_pc_i64
2934+
// because its dest block is unanalyzable.
2935+
assert(isSOPP(BranchOp) || isSOPK(BranchOp));
29362936

29372937
// Convert to dwords.
29382938
BrOffset /= 4;
@@ -2973,13 +2973,30 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
29732973
MachineFunction *MF = MBB.getParent();
29742974
MachineRegisterInfo &MRI = MF->getRegInfo();
29752975
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
2976+
auto I = MBB.end();
2977+
auto &MCCtx = MF->getContext();
2978+
2979+
if (ST.hasAddPC64Inst()) {
2980+
MCSymbol *Offset =
2981+
MCCtx.createTempSymbol("offset", /*AlwaysAddSuffix=*/true);
2982+
auto AddPC = BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_PC_I64))
2983+
.addSym(Offset, MO_FAR_BRANCH_OFFSET);
2984+
MCSymbol *PostAddPCLabel =
2985+
MCCtx.createTempSymbol("post_addpc", /*AlwaysAddSuffix=*/true);
2986+
AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2987+
auto *OffsetExpr = MCBinaryExpr::createSub(
2988+
MCSymbolRefExpr::create(DestBB.getSymbol(), MCCtx),
2989+
MCSymbolRefExpr::create(PostAddPCLabel, MCCtx), MCCtx);
2990+
Offset->setVariableValue(OffsetExpr);
2991+
return;
2992+
}
2993+
2994+
assert(RS && "RegScavenger required for long branching");
29762995

29772996
// FIXME: Virtual register workaround for RegScavenger not working with empty
29782997
// blocks.
29792998
Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
29802999

2981-
auto I = MBB.end();
2982-
29833000
// Note: as this is used after hazard recognizer we need to apply some hazard
29843001
// workarounds directly.
29853002
const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
@@ -2995,7 +3012,6 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
29953012
MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
29963013
ApplyHazardWorkarounds();
29973014

2998-
auto &MCCtx = MF->getContext();
29993015
MCSymbol *PostGetPCLabel =
30003016
MCCtx.createTempSymbol("post_getpc", /*AlwaysAddSuffix=*/true);
30013017
GetPC->setPostInstrSymbol(*MF, PostGetPCLabel);

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ class SOP1_1 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
154154
let has_sdst = 0;
155155
}
156156

157+
class SOP1_1_REGIMM64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
158+
opName, (outs), (ins SSrc_b64:$src0), "$src0", pattern> {
159+
let has_sdst = 0;
160+
}
157161

158162
class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
159163
(ops node:$src0),
@@ -317,6 +321,9 @@ let isTerminator = 1, isBarrier = 1, SchedRW = [WriteBranch] in {
317321

318322
let isBranch = 1, isIndirectBranch = 1 in {
319323
def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">;
324+
325+
let SubtargetPredicate = HasAddPC64Inst in
326+
def S_ADD_PC_I64 : SOP1_1_REGIMM64 <"s_add_pc_i64">;
320327
} // End isBranch = 1, isIndirectBranch = 1
321328

322329
let isReturn = 1 in {
@@ -2130,6 +2137,9 @@ defm S_GET_BARRIER_STATE_IMM : SOP1_IMM_Real_gfx12<0x050>;
21302137
defm S_ALLOC_VGPR : SOP1_Real_gfx12<0x053>;
21312138
defm S_SLEEP_VAR : SOP1_IMM_Real_gfx12<0x058>;
21322139

2140+
// GFX1250
2141+
defm S_ADD_PC_I64 : SOP1_Real_gfx12<0x04b>;
2142+
21332143
//===----------------------------------------------------------------------===//
21342144
// SOP1 - GFX1150, GFX12
21352145
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)