Skip to content

Commit d0a4af7

Browse files
rampitecmbrkusanin
andauthored
[AMDGPU] Add FeatureIEEEMinimumMaximumInsts. NFCI. (#147594)
Co-authored-by: Mirko Brkušanin <Mirko.Brkusanin@amd.com>
1 parent 5b87718 commit d0a4af7

File tree

8 files changed

+35
-21
lines changed

8 files changed

+35
-21
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,12 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
143143
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
144144
>;
145145

146+
def FeatureIEEEMinimumMaximumInsts : SubtargetFeature<"ieee-minimum-maximum-insts",
147+
"HasIEEEMinimumMaximumInsts",
148+
"true",
149+
"Has v_minimum/maximum_f16/f32/f64, v_minimummaximum/maximumminimum_f16/f32 and v_pk_minimum/maximum_f16 instructions"
150+
>;
151+
146152
def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32",
147153
"HasMinimum3Maximum3F32",
148154
"true",
@@ -1471,8 +1477,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
14711477
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
14721478
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
14731479
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
1474-
FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
1475-
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
1480+
FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
1481+
FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics
14761482
]
14771483
>;
14781484

@@ -1907,6 +1913,7 @@ def FeatureISAVersion12 : FeatureSet<
19071913
FeatureImageInsts,
19081914
FeatureExtendedImageInsts,
19091915
FeatureFP8ConversionInsts,
1916+
FeatureIEEEMinimumMaximumInsts,
19101917
FeaturePackedTID,
19111918
FeatureVcmpxPermlaneHazard,
19121919
FeatureSALUFloatInsts,
@@ -2298,6 +2305,10 @@ def isNotGFX1250Plus :
22982305
Predicate<"!Subtarget->hasGFX1250Insts()">,
22992306
AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>;
23002307

2308+
def HasIEEEMinimumMaximumInsts :
2309+
Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">,
2310+
AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>;
2311+
23012312
def HasMinimum3Maximum3F32 :
23022313
Predicate<"Subtarget->hasMinimum3Maximum3F32()">,
23032314
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2095,7 +2095,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
20952095
G_SADDO, G_SSUBO})
20962096
.lower();
20972097

2098-
if (ST.hasIEEEMinMax()) {
2098+
if (ST.hasIEEEMinimumMaximumInsts()) {
20992099
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
21002100
.legalFor(FPTypesPK16)
21012101
.clampMaxNumElements(0, S16, 2)

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
259259
bool HasRequiredExportPriority = false;
260260
bool HasVmemWriteVgprInOrder = false;
261261
bool HasAshrPkInsts = false;
262+
bool HasIEEEMinimumMaximumInsts = false;
262263
bool HasMinimum3Maximum3F32 = false;
263264
bool HasMinimum3Maximum3F16 = false;
264265
bool HasMinimum3Maximum3PKF16 = false;
@@ -1466,10 +1467,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
14661467
bool hasIEEEMode() const { return getGeneration() < GFX12; }
14671468

14681469
// \returns true if the target has IEEE fminimum/fmaximum instructions
1469-
bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }
1470-
1471-
// \returns true if the target has IEEE fminimum3/fmaximum3 instructions
1472-
bool hasIEEEMinMax3() const { return hasIEEEMinMax(); }
1470+
bool hasIEEEMinimumMaximumInsts() const { return HasIEEEMinimumMaximumInsts; }
14731471

14741472
// \returns true if the target has WG_RR_MODE kernel descriptor mode bit
14751473
bool hasRrWGMode() const { return getGeneration() >= GFX12; }

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
877877
if (Subtarget->hasPrefetch() && Subtarget->hasSafeSmemPrefetch())
878878
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
879879

880-
if (Subtarget->hasIEEEMinMax()) {
880+
if (Subtarget->hasIEEEMinimumMaximumInsts()) {
881881
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
882882
{MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
883883
} else {
@@ -7129,7 +7129,8 @@ SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
71297129
if (VT.isVector())
71307130
return splitBinaryVectorOp(Op, DAG);
71317131

7132-
assert(!Subtarget->hasIEEEMinMax() && !Subtarget->hasMinimum3Maximum3F16() &&
7132+
assert(!Subtarget->hasIEEEMinimumMaximumInsts() &&
7133+
!Subtarget->hasMinimum3Maximum3F16() &&
71337134
Subtarget->hasMinimum3Maximum3PKF16() && VT == MVT::f16 &&
71347135
"should not need to widen f16 minimum/maximum to v2f16");
71357136

@@ -14042,7 +14043,7 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
1404214043
// operand form.
1404314044
const SDNodeFlags Flags = N->getFlags();
1404414045
if ((Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM) &&
14045-
!Subtarget->hasIEEEMinMax() && Flags.hasNoNaNs()) {
14046+
!Subtarget->hasIEEEMinimumMaximumInsts() && Flags.hasNoNaNs()) {
1404614047
unsigned NewOpc =
1404714048
Opc == ISD::FMINIMUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
1404814049
return DAG.getNode(NewOpc, SDLoc(N), VT, Op0, Op1, Flags);

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3956,21 +3956,21 @@ let True16Predicate = UseFakeTrue16Insts in {
39563956
}
39573957
} // End SubtargetPredicate = [isGFX9Plus]
39583958

3959-
let SubtargetPredicate = isGFX12Plus in {
3959+
let SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
39603960
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
39613961
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
39623962
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
39633963
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
39643964
}
39653965

3966-
let True16Predicate = UseRealTrue16Insts, SubtargetPredicate = isGFX12Plus in {
3966+
let True16Predicate = UseRealTrue16Insts, SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
39673967
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
39683968
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
39693969
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
39703970
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
39713971
}
39723972

3973-
let True16Predicate = UseFakeTrue16Insts, SubtargetPredicate = isGFX12Plus in {
3973+
let True16Predicate = UseFakeTrue16Insts, SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
39743974
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
39753975
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
39763976
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,14 +1395,18 @@ let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in {
13951395
defm V_ADD_F64_pseudo : VOP2Inst <"v_add_f64_pseudo", VOP_F64_F64_F64, any_fadd>;
13961396
defm V_MUL_F64_pseudo : VOP2Inst <"v_mul_f64_pseudo", VOP_F64_F64_F64, fmul>;
13971397
} // End FPDPRounding = 1
1398-
defm V_MIN_NUM_F64 : VOP2Inst <"v_min_num_f64", VOP_F64_F64_F64, fminnum_like>;
1399-
defm V_MAX_NUM_F64 : VOP2Inst <"v_max_num_f64", VOP_F64_F64_F64, fmaxnum_like>;
14001398
} // End SchedRW = [WriteDoubleAdd], isCommutable = 1
14011399
let SchedRW = [Write64Bit] in {
14021400
defm V_LSHLREV_B64_pseudo : VOP2Inst <"v_lshlrev_b64_pseudo", VOP_I64_I32_I64, clshl_rev_64>;
14031401
} // End SchedRW = [Write64Bit]
14041402
} // End SubtargetPredicate = isGFX12Plus, isReMaterializable = 1
14051403

1404+
let SubtargetPredicate = HasIEEEMinimumMaximumInsts, isReMaterializable = 1,
1405+
SchedRW = [WriteDoubleAdd], isCommutable = 1 in {
1406+
defm V_MIN_NUM_F64 : VOP2Inst <"v_min_num_f64", VOP_F64_F64_F64, fminnum_like>;
1407+
defm V_MAX_NUM_F64 : VOP2Inst <"v_max_num_f64", VOP_F64_F64_F64, fmaxnum_like>;
1408+
}
1409+
14061410
//===----------------------------------------------------------------------===//
14071411
// DPP Encodings
14081412
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF<VOP_I32_I32_I32>>;
171171
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs>;
172172
} // End SchedRW = [WriteIntMul]
173173

174-
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1 in {
174+
let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0, AddedComplexity = 1 in {
175175
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, fminimum>;
176176
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, fmaximum>;
177177
defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, fminimum>;
@@ -181,7 +181,7 @@ let SchedRW = [WriteDoubleAdd] in {
181181
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
182182
defm V_MAXIMUM_F64 : VOP3Inst <"v_maximum_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaximum>;
183183
} // End SchedRW = [WriteDoubleAdd]
184-
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1
184+
} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0, AddedComplexity = 1
185185

186186
} // End isReMaterializable = 1
187187

@@ -1532,12 +1532,12 @@ let SubtargetPredicate = HasF32ToF16BF16ConversionSRInsts in {
15321532
def : Cvt_Scale_Sr_F32ToBF16F16_Pat<int_amdgcn_cvt_sr_f16_f32, V_CVT_SR_F16_F32_e64, v2f16>;
15331533
}
15341534

1535-
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
1535+
let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0 in {
15361536
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
15371537
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
15381538
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16<"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
15391539
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst_t16<"v_minimummaximum_f16", VOP_F16_F16_F16_F16>;
1540-
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
1540+
} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0
15411541

15421542
let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
15431543
defm V_DOT2_F16_F16 : VOP3Inst_t16_with_profiles<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>,

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,10 @@ defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I1
115115
defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
116116
defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
117117

118-
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
118+
let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0 in {
119119
defm V_PK_MAXIMUM_F16 : VOP3PInst<"v_pk_maximum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16, VOP3_PACKED>, fmaximum>;
120120
defm V_PK_MINIMUM_F16 : VOP3PInst<"v_pk_minimum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16, VOP3_PACKED>, fminimum>;
121-
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
121+
} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0
122122
}
123123

124124
defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>;

0 commit comments

Comments
 (0)