-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[AMDGPU] VOPD/VOPD3 changes for gfx1250 #147602
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
rampitec
wants to merge
1
commit into
main
Choose a base branch
from
users/rampitec/07-08-_amdgpu_vopd_vopd3_mc_changes_for_gfx1250
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+66,391
−229
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -335,6 +335,20 @@ class AMDGPUOperand : public MCParsedAsmOperand { | |
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); | ||
} | ||
|
||
bool isRegOrInlineImmWithFP64InputMods() const { | ||
return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64); | ||
} | ||
|
||
bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); } | ||
|
||
bool isVRegWithFP32InputMods() const { | ||
return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID); | ||
} | ||
|
||
bool isVRegWithFP64InputMods() const { | ||
return isVRegWithInputMods(AMDGPU::VReg_64RegClassID); | ||
} | ||
|
||
bool isPackedFP16InputMods() const { | ||
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16); | ||
} | ||
|
@@ -527,7 +541,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { | |
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); | ||
} | ||
|
||
bool isVCSrcB64() const { | ||
bool isVCSrc_b64() const { | ||
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); | ||
} | ||
|
||
|
@@ -553,7 +567,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { | |
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); | ||
} | ||
|
||
bool isVCSrcF64() const { | ||
bool isVCSrc_f64() const { | ||
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); | ||
} | ||
|
||
|
@@ -601,7 +615,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { | |
return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr(); | ||
} | ||
|
||
bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); } | ||
bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); } | ||
|
||
bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); } | ||
|
||
|
@@ -617,23 +631,19 @@ class AMDGPUOperand : public MCParsedAsmOperand { | |
|
||
bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); } | ||
|
||
bool isVCSrcV2FP32() const { | ||
return isVCSrcF64(); | ||
} | ||
bool isVCSrcV2FP32() const { return isVCSrc_f64(); } | ||
|
||
bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); } | ||
|
||
bool isVCSrcV2INT32() const { | ||
return isVCSrcB64(); | ||
} | ||
bool isVCSrc_v2b32() const { return isVCSrc_b64(); } | ||
|
||
bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); } | ||
|
||
bool isVSrc_f32() const { | ||
return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr(); | ||
} | ||
|
||
bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); } | ||
bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); } | ||
|
||
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); } | ||
|
||
|
@@ -1527,6 +1537,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser { | |
|
||
bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); } | ||
|
||
bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); } | ||
|
||
bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } | ||
|
||
bool isGFX10_BEncoding() const { | ||
|
@@ -1774,8 +1786,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser { | |
bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); | ||
bool validateSOPLiteral(const MCInst &Inst) const; | ||
bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); | ||
bool validateVOPDRegBankConstraints(const MCInst &Inst, | ||
const OperandVector &Operands); | ||
std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst, | ||
bool AsVOPD3); | ||
bool validateVOPD(const MCInst &Inst, const OperandVector &Operands); | ||
bool tryVOPD(const MCInst &Inst); | ||
bool tryVOPD3(const MCInst &Inst); | ||
bool tryAnotherVOPDEncoding(const MCInst &Inst); | ||
|
||
bool validateIntClampSupported(const MCInst &Inst); | ||
bool validateMIMGAtomicDMask(const MCInst &Inst); | ||
bool validateMIMGGatherDMask(const MCInst &Inst); | ||
|
@@ -3505,6 +3522,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { | |
} | ||
} | ||
|
||
// Asm can first try to match VOPD or VOPD3. By failing early here with | ||
// Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD. | ||
// Checking later during validateInstruction does not give a chance to retry | ||
// parsing as a different encoding. | ||
if (tryAnotherVOPDEncoding(Inst)) | ||
return Match_InvalidOperand; | ||
|
||
return Match_Success; | ||
} | ||
|
||
|
@@ -3685,8 +3709,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode, | |
|
||
return {getNamedOperandIdx(Opcode, OpName::src0X), | ||
getNamedOperandIdx(Opcode, OpName::vsrc1X), | ||
getNamedOperandIdx(Opcode, OpName::vsrc2X), | ||
getNamedOperandIdx(Opcode, OpName::src0Y), | ||
getNamedOperandIdx(Opcode, OpName::vsrc1Y), | ||
getNamedOperandIdx(Opcode, OpName::vsrc2Y), | ||
ImmXIdx, | ||
ImmIdx}; | ||
} | ||
|
@@ -3816,12 +3842,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations( | |
return false; | ||
} | ||
|
||
bool AMDGPUAsmParser::validateVOPDRegBankConstraints( | ||
const MCInst &Inst, const OperandVector &Operands) { | ||
std::optional<unsigned> | ||
AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) { | ||
|
||
const unsigned Opcode = Inst.getOpcode(); | ||
if (!isVOPD(Opcode)) | ||
return true; | ||
return {}; | ||
|
||
const MCRegisterInfo *TRI = getContext().getRegisterInfo(); | ||
|
||
|
@@ -3833,23 +3859,73 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints( | |
}; | ||
|
||
// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. | ||
bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; | ||
bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 || | ||
Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 || | ||
Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250; | ||
bool AllowSameVGPR = isGFX1250(); | ||
|
||
if (AsVOPD3) { // Literal constants are not allowed with VOPD3. | ||
for (auto OpName : {OpName::src0X, OpName::src0Y}) { | ||
int I = getNamedOperandIdx(Opcode, OpName); | ||
const MCOperand &Op = Inst.getOperand(I); | ||
if (!Op.isImm()) | ||
continue; | ||
int64_t Imm = Op.getImm(); | ||
if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) && | ||
!AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm())) | ||
return I; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Returning an int, but the function signature is an unsigned. |
||
} | ||
|
||
for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X, | ||
OpName::vsrc2Y, OpName::imm}) { | ||
int I = getNamedOperandIdx(Opcode, OpName); | ||
if (I == -1) | ||
continue; | ||
const MCOperand &Op = Inst.getOperand(I); | ||
if (Op.isImm()) | ||
return I; | ||
} | ||
} | ||
|
||
const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); | ||
auto InvalidCompOprIdx = | ||
InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc); | ||
if (!InvalidCompOprIdx) | ||
auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex( | ||
getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3); | ||
|
||
return InvalidCompOprIdx; | ||
} | ||
|
||
bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst, | ||
const OperandVector &Operands) { | ||
|
||
unsigned Opcode = Inst.getOpcode(); | ||
bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3; | ||
|
||
if (AsVOPD3) { | ||
for (unsigned I = 0, E = Operands.size(); I != E; ++I) { | ||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); | ||
if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) && | ||
(Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS)) | ||
Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions"); | ||
} | ||
} | ||
|
||
auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3); | ||
if (!InvalidCompOprIdx.has_value()) | ||
return true; | ||
|
||
auto CompOprIdx = *InvalidCompOprIdx; | ||
const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); | ||
auto ParsedIdx = | ||
std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), | ||
InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); | ||
assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); | ||
|
||
auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); | ||
if (CompOprIdx == VOPD::Component::DST) { | ||
Error(Loc, "one dst register must be even and the other odd"); | ||
if (AsVOPD3) | ||
Error(Loc, "dst registers must be distinct"); | ||
else | ||
Error(Loc, "one dst register must be even and the other odd"); | ||
} else { | ||
auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; | ||
Error(Loc, Twine("src") + Twine(CompSrcIdx) + | ||
|
@@ -3859,6 +3935,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints( | |
return false; | ||
} | ||
|
||
// \returns true if \p Inst does not satisfy VOPD constraints, but can be | ||
// potentially used as VOPD3 with the same operands. | ||
bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) { | ||
// First check if it fits VOPD | ||
auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false); | ||
if (!InvalidCompOprIdx.has_value()) | ||
return false; | ||
|
||
// Then if it fits VOPD3 | ||
InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true); | ||
if (InvalidCompOprIdx.has_value()) { | ||
// If failed operand is dst it is better to show error about VOPD3 | ||
// instruction as it has more capabilities and error message will be | ||
// more informative. If the dst is not legal for VOPD3, then it is not | ||
// legal for VOPD either. | ||
if (*InvalidCompOprIdx == VOPD::Component::DST) | ||
return true; | ||
|
||
// Otherwise prefer VOPD as we may find ourselves in an awkward situation | ||
// with a conflict in tied implicit src2 of fmac and no asm operand to | ||
// to point to. | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
// \returns true is a VOPD3 instruction can be also represented as a shorter | ||
// VOPD encoding. | ||
bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) { | ||
const unsigned Opcode = Inst.getOpcode(); | ||
const auto &II = getVOPDInstInfo(Opcode, &MII); | ||
unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI()); | ||
if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X || | ||
!getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y) | ||
return false; | ||
|
||
// This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has | ||
// explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot | ||
// be parsed as VOPD which does not accept src2. | ||
if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 || | ||
II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32) | ||
return false; | ||
|
||
// If any modifiers are set this cannot be VOPD. | ||
for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers, | ||
OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers, | ||
OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) { | ||
int I = getNamedOperandIdx(Opcode, OpName); | ||
if (I == -1) | ||
continue; | ||
if (Inst.getOperand(I).getImm()) | ||
return false; | ||
} | ||
|
||
return !tryVOPD3(Inst); | ||
} | ||
|
||
// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD | ||
// form but switch to VOPD3 otherwise. | ||
bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) { | ||
const unsigned Opcode = Inst.getOpcode(); | ||
if (!isGFX1250() || !isVOPD(Opcode)) | ||
return false; | ||
|
||
if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3) | ||
return tryVOPD(Inst); | ||
return tryVOPD3(Inst); | ||
} | ||
|
||
bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { | ||
|
||
const unsigned Opc = Inst.getOpcode(); | ||
|
@@ -5179,7 +5324,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, | |
if (!validateConstantBusLimitations(Inst, Operands)) { | ||
return false; | ||
} | ||
if (!validateVOPDRegBankConstraints(Inst, Operands)) { | ||
if (!validateVOPD(Inst, Operands)) { | ||
return false; | ||
} | ||
if (!validateIntClampSupported(Inst)) { | ||
|
@@ -9180,8 +9325,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { | |
|
||
// Create VOPD MCInst operands using parsed assembler operands. | ||
void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { | ||
const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); | ||
|
||
auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer | ||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); | ||
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | ||
Op.addRegOrImmWithFPInputModsOperands(Inst, 2); | ||
return; | ||
} | ||
if (Op.isReg()) { | ||
Op.addRegOperands(Inst, 1); | ||
return; | ||
|
@@ -9210,6 +9361,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { | |
if (CInfo.hasSrc2Acc()) | ||
addOp(CInfo.getIndexOfDstInParsedOperands()); | ||
} | ||
|
||
int BitOp3Idx = | ||
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3); | ||
if (BitOp3Idx != -1) { | ||
OptionalImmIndexMap OptIdx; | ||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back()); | ||
if (Op.isImm()) | ||
OptIdx[Op.getImmTy()] = Operands.size() - 1; | ||
|
||
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3); | ||
} | ||
} | ||
|
||
//===----------------------------------------------------------------------===// | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?