@@ -21722,6 +21722,8 @@ class HorizontalReduction {
21722
21722
/// Checks if the optimization of original scalar identity operations on
21723
21723
/// matched horizontal reductions is enabled and allowed.
21724
21724
bool IsSupportedHorRdxIdentityOp = false;
21725
+ /// The minimum number of the reduced values.
21726
+ const unsigned ReductionLimit = VectorizeNonPowerOf2 ? 3 : 4;
21725
21727
/// Contains vector values for reduction including their scale factor and
21726
21728
/// signedness.
21727
21729
SmallVector<std::tuple<Value *, unsigned, bool>> VectorValuesAndScales;
@@ -21740,7 +21742,8 @@ class HorizontalReduction {
21740
21742
}
21741
21743
21742
21744
/// Checks if instruction is associative and can be vectorized.
21743
- static bool isVectorizable(RecurKind Kind, Instruction *I) {
21745
+ static bool isVectorizable(RecurKind Kind, Instruction *I,
21746
+ bool TwoElementReduction = false) {
21744
21747
if (Kind == RecurKind::None)
21745
21748
return false;
21746
21749
@@ -21749,6 +21752,10 @@ class HorizontalReduction {
21749
21752
isBoolLogicOp(I))
21750
21753
return true;
21751
21754
21755
+ // No need to check for associativity, if 2 reduced values.
21756
+ if (TwoElementReduction)
21757
+ return true;
21758
+
21752
21759
if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) {
21753
21760
// FP min/max are associative except for NaN and -0.0. We do not
21754
21761
// have to rule out -0.0 here because the intrinsic semantics do not
@@ -22020,6 +22027,27 @@ class HorizontalReduction {
22020
22027
22021
22028
public:
22022
22029
HorizontalReduction() = default;
22030
+ HorizontalReduction(Instruction *I, ArrayRef<Value *> Ops)
22031
+ : ReductionRoot(I), ReductionLimit(2) {
22032
+ RdxKind = HorizontalReduction::getRdxKind(I);
22033
+ ReductionOps.emplace_back().push_back(I);
22034
+ ReducedVals.emplace_back().assign(Ops.begin(), Ops.end());
22035
+ for (Value *V : Ops)
22036
+ ReducedValsToOps[V].push_back(I);
22037
+ }
22038
+
22039
+ bool matchReductionForOperands() const {
22040
+ // Analyze "regular" integer/FP types for reductions - no target-specific
22041
+ // types or pointers.
22042
+ assert(ReductionRoot && "Reduction root is not set!");
22043
+ if (!isVectorizable(RdxKind, cast<Instruction>(ReductionRoot),
22044
+ all_of(ReducedVals, [](ArrayRef<Value *> Ops) {
22045
+ return Ops.size() == 2;
22046
+ })))
22047
+ return false;
22048
+
22049
+ return true;
22050
+ }
22023
22051
22024
22052
/// Try to find a reduction tree.
22025
22053
bool matchAssociativeReduction(BoUpSLP &R, Instruction *Root,
@@ -22187,7 +22215,6 @@ class HorizontalReduction {
22187
22215
/// Attempt to vectorize the tree found by matchAssociativeReduction.
22188
22216
Value *tryToReduce(BoUpSLP &V, const DataLayout &DL, TargetTransformInfo *TTI,
22189
22217
const TargetLibraryInfo &TLI, AssumptionCache *AC) {
22190
- const unsigned ReductionLimit = VectorizeNonPowerOf2 ? 3 : 4;
22191
22218
constexpr unsigned RegMaxNumber = 4;
22192
22219
constexpr unsigned RedValsMaxNumber = 128;
22193
22220
// If there are a sufficient number of reduction values, reduce
@@ -23736,15 +23763,60 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
23736
23763
Candidates.emplace_back(A1, B);
23737
23764
}
23738
23765
23766
+ auto TryToReduce = [this, &R, &TTI = *TTI](Instruction *Inst,
23767
+ ArrayRef<Value *> Ops) {
23768
+ if (!isReductionCandidate(Inst))
23769
+ return false;
23770
+ Type *Ty = Inst->getType();
23771
+ if (!isValidElementType(Ty) || Ty->isPointerTy())
23772
+ return false;
23773
+ HorizontalReduction HorRdx(Inst, Ops);
23774
+ if (!HorRdx.matchReductionForOperands())
23775
+ return false;
23776
+ // Check the cost of operations.
23777
+ VectorType *VecTy = getWidenedType(Ty, Ops.size());
23778
+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
23779
+ InstructionCost ScalarCost =
23780
+ TTI.getScalarizationOverhead(
23781
+ VecTy, APInt::getAllOnes(getNumElements(VecTy)), /*Insert=*/false,
23782
+ /*Extract=*/true, CostKind) +
23783
+ TTI.getInstructionCost(Inst, CostKind);
23784
+ InstructionCost RedCost;
23785
+ switch (::getRdxKind(Inst)) {
23786
+ case RecurKind::Add:
23787
+ case RecurKind::Mul:
23788
+ case RecurKind::Or:
23789
+ case RecurKind::And:
23790
+ case RecurKind::Xor:
23791
+ case RecurKind::FAdd:
23792
+ case RecurKind::FMul: {
23793
+ FastMathFlags FMF;
23794
+ if (auto *FPCI = dyn_cast<FPMathOperator>(Inst))
23795
+ FMF = FPCI->getFastMathFlags();
23796
+ RedCost = TTI.getArithmeticReductionCost(Inst->getOpcode(), VecTy, FMF,
23797
+ CostKind);
23798
+ break;
23799
+ }
23800
+ default:
23801
+ return false;
23802
+ }
23803
+ if (RedCost >= ScalarCost)
23804
+ return false;
23805
+
23806
+ return HorRdx.tryToReduce(R, *DL, &TTI, *TLI, AC) != nullptr;
23807
+ };
23739
23808
if (Candidates.size() == 1)
23740
- return tryToVectorizeList({Op0, Op1}, R);
23809
+ return TryToReduce(I, {Op0, Op1}) || tryToVectorizeList({Op0, Op1}, R);
23741
23810
23742
23811
// We have multiple options. Try to pick the single best.
23743
23812
std::optional<int> BestCandidate = R.findBestRootPair(Candidates);
23744
23813
if (!BestCandidate)
23745
23814
return false;
23746
- return tryToVectorizeList(
23747
- {Candidates[*BestCandidate].first, Candidates[*BestCandidate].second}, R);
23815
+ return TryToReduce(I, {Candidates[*BestCandidate].first,
23816
+ Candidates[*BestCandidate].second}) ||
23817
+ tryToVectorizeList({Candidates[*BestCandidate].first,
23818
+ Candidates[*BestCandidate].second},
23819
+ R);
23748
23820
}
23749
23821
23750
23822
bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Instruction *Root,
0 commit comments