Skip to content

Commit 00a85e5

Browse files
authored
[AMDGPU] gfx1250: MC support for 64-bit literals (#147861)
1 parent 69ff853 commit 00a85e5

16 files changed

+997
-123
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,6 +1130,12 @@ def FeaturePointSampleAccel : SubtargetFeature<"point-sample-accel",
11301130
"Has point sample acceleration feature"
11311131
>;
11321132

1133+
def Feature64BitLiterals : SubtargetFeature<"64-bit-literals",
1134+
"Has64BitLiterals",
1135+
"true",
1136+
"Can use 64-bit literals with single DWORD instructions"
1137+
>;
1138+
11331139
def FeatureWaitXcnt : SubtargetFeature<"wait-xcnt",
11341140
"HasWaitXcnt",
11351141
"true",
@@ -1931,6 +1937,7 @@ def FeatureISAVersion12_50 : FeatureSet<
19311937
[FeatureGFX12,
19321938
FeatureGFX1250Insts,
19331939
FeatureCuMode,
1940+
Feature64BitLiterals,
19341941
FeatureLDSBankCount32,
19351942
FeatureDLInsts,
19361943
FeatureFmacF64Inst,
@@ -2678,6 +2685,9 @@ def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
26782685
def HasBVHDualAndBVH8Insts : Predicate<"Subtarget->hasBVHDualAndBVH8Insts()">,
26792686
AssemblerPredicate<(all_of FeatureBVHDualAndBVH8Insts)>;
26802687

2688+
def Has64BitLiterals : Predicate<"Subtarget->has64BitLiterals()">,
2689+
AssemblerPredicate<(all_of Feature64BitLiterals)>;
2690+
26812691
def HasWaitXcnt : Predicate<"Subtarget->hasWaitXcnt()">,
26822692
AssemblerPredicate<(all_of FeatureWaitXcnt)>;
26832693

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 95 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
8181
bool Neg = false;
8282
bool Sext = false;
8383
bool Lit = false;
84+
bool Lit64 = false;
8485

8586
bool hasFPModifiers() const { return Abs || Neg; }
8687
bool hasIntModifiers() const { return Sext; }
@@ -480,7 +481,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
480481
bool isSSrc_b64() const {
481482
// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
482483
// See isVSrc64().
483-
return isSCSrc_b64() || isLiteralImm(MVT::i64);
484+
return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
485+
(((const MCTargetAsmParser *)AsmParser)
486+
->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
487+
isExpr());
484488
}
485489

486490
bool isSSrc_f32() const {
@@ -1537,6 +1541,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
15371541
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
15381542
}
15391543

1544+
bool has64BitLiterals() const {
1545+
return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1546+
}
1547+
15401548
bool hasFlatOffsets() const {
15411549
return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
15421550
}
@@ -1663,10 +1671,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
16631671
bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
16641672
bool parseSP3NegModifier();
16651673
ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1666-
bool HasLit = false);
1674+
bool HasLit = false, bool HasLit64 = false);
16671675
ParseStatus parseReg(OperandVector &Operands);
16681676
ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1669-
bool HasLit = false);
1677+
bool HasLit = false, bool HasLit64 = false);
16701678
ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
16711679
bool AllowImm = true);
16721680
ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
@@ -2123,6 +2131,9 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
21232131
return false;
21242132
}
21252133

2134+
bool Allow64Bit =
2135+
(type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2136+
21262137
if (!Imm.IsFPImm) {
21272138
// We got int literal token.
21282139

@@ -2134,8 +2145,11 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
21342145
}
21352146

21362147
unsigned Size = type.getSizeInBits();
2137-
if (Size == 64)
2148+
if (Size == 64) {
2149+
if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2150+
return true;
21382151
Size = 32;
2152+
}
21392153

21402154
// FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
21412155
// types.
@@ -2287,12 +2301,18 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22872301
}
22882302

22892303
// Non-inlineable
2290-
if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2304+
if (AMDGPU::isSISrcFPOperand(InstDesc,
2305+
OpNum)) { // Expected 64-bit fp operand
2306+
bool HasMandatoryLiteral =
2307+
AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
22912308
// For fp operands we check if low 32 bits are zeros
2292-
if (Literal.getLoBits(32) != 0) {
2293-
const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2294-
"Can't encode literal as exact 64-bit floating-point operand. "
2295-
"Low 32-bits will be set to zero");
2309+
if (Literal.getLoBits(32) != 0 &&
2310+
(InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2311+
!HasMandatoryLiteral) {
2312+
const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2313+
Inst.getLoc(),
2314+
"Can't encode literal as exact 64-bit floating-point operand. "
2315+
"Low 32-bits will be set to zero");
22962316
Val &= 0xffffffff00000000u;
22972317
}
22982318

@@ -2392,8 +2412,25 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
23922412
return;
23932413

23942414
case AMDGPU::OPERAND_REG_IMM_INT64:
2395-
case AMDGPU::OPERAND_REG_IMM_FP64:
23962415
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2416+
if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2417+
Inst.addOperand(MCOperand::createImm(Val));
2418+
setImmKindConst();
2419+
return;
2420+
}
2421+
2422+
// When the 32 MSBs are not zero (effectively means it can't be safely
2423+
// truncated to uint32_t), if the target doesn't support 64-bit literals, or
2424+
// the lit modifier is explicitly used, we need to truncate it to the 32
2425+
// LSBs.
2426+
if (!AsmParser->has64BitLiterals() || getModifiers().Lit)
2427+
Val = Lo_32(Val);
2428+
2429+
Inst.addOperand(MCOperand::createImm(Val));
2430+
setImmKindLiteral();
2431+
return;
2432+
2433+
case AMDGPU::OPERAND_REG_IMM_FP64:
23972434
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
23982435
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
23992436
if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
@@ -2402,8 +2439,20 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
24022439
return;
24032440
}
24042441

2405-
Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2406-
: Lo_32(Val);
2442+
// If the target doesn't support 64-bit literals, we need to use the
2443+
// constant as the high 32 MSBs of a double-precision floating point value.
2444+
if (!AsmParser->has64BitLiterals()) {
2445+
Val = static_cast<uint64_t>(Val) << 32;
2446+
} else {
2447+
// Now the target does support 64-bit literals, there are two cases
2448+
// where we still want to use src_literal encoding:
2449+
// 1) explicitly forced by using lit modifier;
2450+
// 2) the value is a valid 32-bit representation (signed or unsigned),
2451+
// meanwhile not forced by lit64 modifier.
2452+
if (getModifiers().Lit ||
2453+
(!getModifiers().Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2454+
Val = static_cast<uint64_t>(Val) << 32;
2455+
}
24072456

24082457
Inst.addOperand(MCOperand::createImm(Val));
24092458
setImmKindLiteral();
@@ -3151,19 +3200,20 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
31513200
}
31523201

31533202
ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3154-
bool HasSP3AbsModifier, bool HasLit) {
3203+
bool HasSP3AbsModifier, bool HasLit,
3204+
bool HasLit64) {
31553205
// TODO: add syntactic sugar for 1/(2*PI)
31563206

3157-
if (isRegister())
3207+
if (isRegister() || isModifier())
31583208
return ParseStatus::NoMatch;
3159-
assert(!isModifier());
31603209

3161-
if (!HasLit) {
3162-
HasLit = trySkipId("lit");
3163-
if (HasLit) {
3210+
if (!HasLit && !HasLit64) {
3211+
HasLit64 = trySkipId("lit64");
3212+
HasLit = !HasLit64 && trySkipId("lit");
3213+
if (HasLit || HasLit64) {
31643214
if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
31653215
return ParseStatus::Failure;
3166-
ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3216+
ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit, HasLit64);
31673217
if (S.isSuccess() &&
31683218
!skipToken(AsmToken::RParen, "expected closing parentheses"))
31693219
return ParseStatus::Failure;
@@ -3185,6 +3235,7 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
31853235

31863236
AMDGPUOperand::Modifiers Mods;
31873237
Mods.Lit = HasLit;
3238+
Mods.Lit64 = HasLit64;
31883239

31893240
if (IsReal) {
31903241
// Floating-point expressions are not supported.
@@ -3235,7 +3286,7 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
32353286
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
32363287
Op.setModifiers(Mods);
32373288
} else {
3238-
if (HasLit)
3289+
if (HasLit || HasLit64)
32393290
return ParseStatus::NoMatch;
32403291
Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
32413292
}
@@ -3259,13 +3310,14 @@ ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
32593310
}
32603311

32613312
ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3262-
bool HasSP3AbsMod, bool HasLit) {
3313+
bool HasSP3AbsMod, bool HasLit,
3314+
bool HasLit64) {
32633315
ParseStatus Res = parseReg(Operands);
32643316
if (!Res.isNoMatch())
32653317
return Res;
32663318
if (isModifier())
32673319
return ParseStatus::NoMatch;
3268-
return parseImm(Operands, HasSP3AbsMod, HasLit);
3320+
return parseImm(Operands, HasSP3AbsMod, HasLit, HasLit64);
32693321
}
32703322

32713323
bool
@@ -3361,7 +3413,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
33613413
bool AllowImm) {
33623414
bool Neg, SP3Neg;
33633415
bool Abs, SP3Abs;
3364-
bool Lit;
3416+
bool Lit64, Lit;
33653417
SMLoc Loc;
33663418

33673419
// Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
@@ -3381,7 +3433,15 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
33813433
if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
33823434
return ParseStatus::Failure;
33833435

3384-
Lit = trySkipId("lit");
3436+
Lit64 = trySkipId("lit64");
3437+
if (Lit64) {
3438+
if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3439+
return ParseStatus::Failure;
3440+
if (!has64BitLiterals())
3441+
return Error(Loc, "lit64 is not supported on this GPU");
3442+
}
3443+
3444+
Lit = !Lit64 && trySkipId("lit");
33853445
if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
33863446
return ParseStatus::Failure;
33873447

@@ -3392,14 +3452,16 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
33923452

33933453
ParseStatus Res;
33943454
if (AllowImm) {
3395-
Res = parseRegOrImm(Operands, SP3Abs, Lit);
3455+
Res = parseRegOrImm(Operands, SP3Abs, Lit, Lit64);
33963456
} else {
33973457
Res = parseReg(Operands);
33983458
}
33993459
if (!Res.isSuccess())
3400-
return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3460+
return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64)
3461+
? ParseStatus::Failure
3462+
: Res;
34013463

3402-
if (Lit && !Operands.back()->isImm())
3464+
if ((Lit || Lit64) && !Operands.back()->isImm())
34033465
Error(Loc, "expected immediate with lit modifier");
34043466

34053467
if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
@@ -3408,15 +3470,17 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
34083470
return ParseStatus::Failure;
34093471
if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
34103472
return ParseStatus::Failure;
3411-
if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3473+
if ((Lit || Lit64) &&
3474+
!skipToken(AsmToken::RParen, "expected closing parentheses"))
34123475
return ParseStatus::Failure;
34133476

34143477
AMDGPUOperand::Modifiers Mods;
34153478
Mods.Abs = Abs || SP3Abs;
34163479
Mods.Neg = Neg || SP3Neg;
34173480
Mods.Lit = Lit;
3481+
Mods.Lit64 = Lit64;
34183482

3419-
if (Mods.hasFPModifiers() || Lit) {
3483+
if (Mods.hasFPModifiers() || Lit || Lit64) {
34203484
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
34213485
if (Op.isExpr())
34223486
return Error(Op.getStartLoc(), "expected an absolute expression");
@@ -4588,7 +4652,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
45884652

45894653
unsigned NumExprs = 0;
45904654
unsigned NumLiterals = 0;
4591-
uint32_t LiteralValue;
4655+
uint64_t LiteralValue;
45924656

45934657
for (int OpIdx : OpIndices) {
45944658
if (OpIdx == -1) break;
@@ -4597,7 +4661,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
45974661
// Exclude special imm operands (like that used by s_set_gpr_idx_on)
45984662
if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
45994663
if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4600-
uint32_t Value = static_cast<uint32_t>(MO.getImm());
4664+
uint64_t Value = static_cast<uint64_t>(MO.getImm());
46014665
if (NumLiterals == 0 || LiteralValue != Value) {
46024666
LiteralValue = Value;
46034667
++NumLiterals;

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1484,6 +1484,20 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
14841484
return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
14851485
}
14861486

1487+
MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const {
1488+
assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1489+
1490+
if (!HasLiteral) {
1491+
if (Bytes.size() < 8) {
1492+
return errOperand(0, "cannot read literal64, inst bytes left " +
1493+
Twine(Bytes.size()));
1494+
}
1495+
HasLiteral = true;
1496+
Literal64 = eatBytes<uint64_t>(Bytes);
1497+
}
1498+
return MCOperand::createImm(Literal64);
1499+
}
1500+
14871501
MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
14881502
using namespace AMDGPU::EncValues;
14891503

@@ -1767,6 +1781,10 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(unsigned Width,
17671781
Val == LITERAL_CONST)
17681782
return MCOperand::createImm(Val);
17691783

1784+
if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1785+
return decodeLiteral64Constant();
1786+
}
1787+
17701788
switch (Width) {
17711789
case 32:
17721790
case 16:

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ class AMDGPUDisassembler : public MCDisassembler {
179179

180180
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
181181
MCOperand decodeLiteralConstant(bool ExtendFP64) const;
182+
MCOperand decodeLiteral64Constant() const;
182183

183184
MCOperand decodeSrcOp(unsigned Width, unsigned Val) const;
184185

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
231231
bool HasSALUFloatInsts = false;
232232
bool HasPseudoScalarTrans = false;
233233
bool HasRestrictedSOffset = false;
234+
bool Has64BitLiterals = false;
234235
bool HasBitOp3Insts = false;
235236
bool HasTransposeLoadF4F6Insts = false;
236237
bool HasPrngInst = false;
@@ -1384,6 +1385,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13841385
/// GFX1250.
13851386
bool hasWaitXCnt() const { return HasWaitXcnt; }
13861387

1388+
// A single DWORD instructions can use a 64-bit literal.
1389+
bool has64BitLiterals() const { return Has64BitLiterals; }
1390+
13871391
bool hasPointSampleAccel() const { return HasPointSampleAccel; }
13881392

13891393
bool hasLdsBarrierArriveAtomic() const { return HasLdsBarrierArriveAtomic; }

0 commit comments

Comments
 (0)