Skip to content

Commit fd6f8b3

Browse files
authored
[AMDGPU] [GlobalIsel] Combine Fmul with Select into ldexp instruction. (#120104)
This combine pattern perform the below transformation. fmul x, select(y, A, B) -> fldexp (x, select i32 (y, a, b)) fmul x, select(y, -A, -B) -> fldexp ((fneg x), select i32 (y, a, b)) where, A=2^a & B=2^b ; a and b are integers. It is a follow-up PR to implement the above combine for globalIsel, as the corresponding DAG combine has been done for SelectionDAG Isel (#111109)
1 parent 9ce8f4b commit fd6f8b3

16 files changed

+8050
-5563
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,16 @@ def sign_extension_in_reg : GICombineRule<
124124
[{ return matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]),
125125
(apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>;
126126

127+
// Do the following combines :
128+
// fmul x, select(y, A, B) -> fldexp (x, select i32 (y, a, b))
129+
// fmul x, select(y, -A, -B) -> fldexp ((fneg x), select i32 (y, a, b))
130+
def combine_fmul_with_select_to_fldexp : GICombineRule<
131+
(defs root:$root, build_fn_matchinfo:$matchinfo),
132+
(match (G_FMUL $dst, $x, $select):$root,
133+
(G_SELECT $select, $y, $A, $B):$sel,
134+
[{ return Helper.matchCombineFmulWithSelectToFldexp(*${root}, *${sel}, ${matchinfo}); }]),
135+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
136+
127137

128138
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
129139
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
@@ -153,13 +163,13 @@ def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>;
153163

154164
def AMDGPUPreLegalizerCombiner: GICombiner<
155165
"AMDGPUPreLegalizerCombinerImpl",
156-
[all_combines, clamp_i64_to_i16, foldable_fneg]> {
166+
[all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16, foldable_fneg]> {
157167
let CombineAllMethodName = "tryCombineAllImpl";
158168
}
159169

160170
def AMDGPUPostLegalizerCombiner: GICombiner<
161171
"AMDGPUPostLegalizerCombinerImpl",
162-
[all_combines, gfx6gfx7_combines, gfx8_combines,
172+
[all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp,
163173
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
164174
rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64]> {
165175
let CombineAllMethodName = "tryCombineAllImpl";

llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@
1717
using namespace llvm;
1818
using namespace MIPatternMatch;
1919

20+
AMDGPUCombinerHelper::AMDGPUCombinerHelper(
21+
GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize,
22+
GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI,
23+
const GCNSubtarget &STI)
24+
: CombinerHelper(Observer, B, IsPreLegalize, KB, MDT, LI), STI(STI),
25+
TII(*STI.getInstrInfo()) {}
26+
2027
LLVM_READNONE
2128
static bool fnegFoldsIntoMI(const MachineInstr &MI) {
2229
switch (MI.getOpcode()) {
@@ -445,3 +452,67 @@ void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
445452
Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
446453
MI.eraseFromParent();
447454
}
455+
456+
bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
457+
MachineInstr &MI, MachineInstr &Sel,
458+
std::function<void(MachineIRBuilder &)> &MatchInfo) {
459+
assert(MI.getOpcode() == TargetOpcode::G_FMUL);
460+
assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
461+
assert(MI.getOperand(2).getReg() == Sel.getOperand(0).getReg());
462+
463+
Register Dst = MI.getOperand(0).getReg();
464+
LLT DestTy = MRI.getType(Dst);
465+
LLT ScalarDestTy = DestTy.getScalarType();
466+
467+
if ((ScalarDestTy != LLT::float64() && ScalarDestTy != LLT::float32() &&
468+
ScalarDestTy != LLT::float16()) ||
469+
!MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))
470+
return false;
471+
472+
Register SelectCondReg = Sel.getOperand(1).getReg();
473+
MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg());
474+
MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg());
475+
476+
const auto SelectTrueVal =
477+
isConstantOrConstantSplatVectorFP(*SelectTrue, MRI);
478+
if (!SelectTrueVal)
479+
return false;
480+
const auto SelectFalseVal =
481+
isConstantOrConstantSplatVectorFP(*SelectFalse, MRI);
482+
if (!SelectFalseVal)
483+
return false;
484+
485+
if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
486+
return false;
487+
488+
// For f32, only non-inline constants should be transformed.
489+
if (ScalarDestTy == LLT::float32() && TII.isInlineConstant(*SelectTrueVal) &&
490+
TII.isInlineConstant(*SelectFalseVal))
491+
return false;
492+
493+
int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
494+
if (SelectTrueLog2Val == INT_MIN)
495+
return false;
496+
int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
497+
if (SelectFalseLog2Val == INT_MIN)
498+
return false;
499+
500+
MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
501+
LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));
502+
auto NewSel = Builder.buildSelect(
503+
IntDestTy, SelectCondReg,
504+
Builder.buildConstant(IntDestTy, SelectTrueLog2Val),
505+
Builder.buildConstant(IntDestTy, SelectFalseLog2Val));
506+
507+
Register XReg = MI.getOperand(1).getReg();
508+
if (SelectTrueVal->isNegative()) {
509+
auto NegX =
510+
Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags());
511+
Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
512+
} else {
513+
Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags());
514+
}
515+
};
516+
517+
return true;
518+
}

llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,22 @@
1515
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
1616
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
1717

18+
#include "GCNSubtarget.h"
1819
#include "llvm/CodeGen/GlobalISel/Combiner.h"
1920
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
2021

2122
namespace llvm {
2223
class AMDGPUCombinerHelper : public CombinerHelper {
24+
protected:
25+
const GCNSubtarget &STI;
26+
const SIInstrInfo &TII;
27+
2328
public:
2429
using CombinerHelper::CombinerHelper;
30+
AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
31+
bool IsPreLegalize, GISelKnownBits *KB,
32+
MachineDominatorTree *MDT, const LegalizerInfo *LI,
33+
const GCNSubtarget &STI);
2534

2635
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
2736
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
@@ -30,6 +39,10 @@ class AMDGPUCombinerHelper : public CombinerHelper {
3039
Register Src1, Register Src2);
3140
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0,
3241
Register Src1, Register Src2);
42+
43+
bool matchCombineFmulWithSelectToFldexp(
44+
MachineInstr &MI, MachineInstr &Sel,
45+
std::function<void(MachineIRBuilder &)> &MatchInfo);
3346
};
3447

3548
} // namespace llvm

llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
134134
const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
135135
: Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
136136
TII(*STI.getInstrInfo()),
137-
Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
137+
Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI, STI),
138138
#define GET_GICOMBINER_CONSTRUCTOR_INITS
139139
#include "AMDGPUGenPostLegalizeGICombiner.inc"
140140
#undef GET_GICOMBINER_CONSTRUCTOR_INITS

llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
9494
const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
9595
const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
9696
: Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
97-
Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
97+
Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI, STI),
9898
#define GET_GICOMBINER_CONSTRUCTOR_INITS
9999
#include "AMDGPUGenPreLegalizeGICombiner.inc"
100100
#undef GET_GICOMBINER_CONSTRUCTOR_INITS

0 commit comments

Comments
 (0)