@@ -528,11 +528,10 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
528
528
case VPRecipeBase::VPDerivedIVSC:
529
529
case VPRecipeBase::VPEVLBasedIVPHISC:
530
530
case VPRecipeBase::VPExpandSCEVSC:
531
+ case VPRecipeBase::VPExpressionSC:
531
532
case VPRecipeBase::VPInstructionSC:
532
533
case VPRecipeBase::VPReductionEVLSC:
533
534
case VPRecipeBase::VPReductionSC:
534
- case VPRecipeBase::VPMulAccumulateReductionSC:
535
- case VPRecipeBase::VPExtendedReductionSC:
536
535
case VPRecipeBase::VPReplicateSC:
537
536
case VPRecipeBase::VPScalarIVStepsSC:
538
537
case VPRecipeBase::VPVectorPointerSC:
@@ -852,9 +851,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
852
851
R->getVPDefID () == VPRecipeBase::VPReductionEVLSC ||
853
852
R->getVPDefID () == VPRecipeBase::VPReplicateSC ||
854
853
R->getVPDefID () == VPRecipeBase::VPVectorEndPointerSC ||
855
- R->getVPDefID () == VPRecipeBase::VPVectorPointerSC ||
856
- R->getVPDefID () == VPRecipeBase::VPExtendedReductionSC ||
857
- R->getVPDefID () == VPRecipeBase::VPMulAccumulateReductionSC;
854
+ R->getVPDefID () == VPRecipeBase::VPVectorPointerSC;
858
855
}
859
856
860
857
static inline bool classof (const VPUser *U) {
@@ -2440,28 +2437,6 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
2440
2437
setUnderlyingValue (I);
2441
2438
}
2442
2439
2443
- // / For VPExtendedReductionRecipe.
2444
- // / Note that the debug location is from the extend.
2445
- VPReductionRecipe (const unsigned char SC, const RecurKind RdxKind,
2446
- ArrayRef<VPValue *> Operands, VPValue *CondOp,
2447
- bool IsOrdered, DebugLoc DL)
2448
- : VPRecipeWithIRFlags(SC, Operands, DL), RdxKind(RdxKind),
2449
- IsOrdered(IsOrdered), IsConditional(CondOp) {
2450
- if (CondOp)
2451
- addOperand (CondOp);
2452
- }
2453
-
2454
- // / For VPMulAccumulateReductionRecipe.
2455
- // / Note that the NUW/NSW flags and the debug location are from the Mul.
2456
- VPReductionRecipe (const unsigned char SC, const RecurKind RdxKind,
2457
- ArrayRef<VPValue *> Operands, VPValue *CondOp,
2458
- bool IsOrdered, WrapFlagsTy WrapFlags, DebugLoc DL)
2459
- : VPRecipeWithIRFlags(SC, Operands, WrapFlags, DL), RdxKind(RdxKind),
2460
- IsOrdered(IsOrdered), IsConditional(CondOp) {
2461
- if (CondOp)
2462
- addOperand (CondOp);
2463
- }
2464
-
2465
2440
public:
2466
2441
VPReductionRecipe (RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
2467
2442
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
@@ -2487,9 +2462,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
2487
2462
2488
2463
static inline bool classof (const VPRecipeBase *R) {
2489
2464
return R->getVPDefID () == VPRecipeBase::VPReductionSC ||
2490
- R->getVPDefID () == VPRecipeBase::VPReductionEVLSC ||
2491
- R->getVPDefID () == VPRecipeBase::VPExtendedReductionSC ||
2492
- R->getVPDefID () == VPRecipeBase::VPMulAccumulateReductionSC;
2465
+ R->getVPDefID () == VPRecipeBase::VPReductionEVLSC;
2493
2466
}
2494
2467
2495
2468
static inline bool classof (const VPUser *U) {
@@ -2628,190 +2601,6 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
2628
2601
}
2629
2602
};
2630
2603
2631
- // / A recipe to represent inloop extended reduction operations, performing a
2632
- // / reduction on a extended vector operand into a scalar value, and adding the
2633
- // / result to a chain. This recipe is abstract and needs to be lowered to
2634
- // / concrete recipes before codegen. The operands are {ChainOp, VecOp,
2635
- // / [Condition]}.
2636
- class VPExtendedReductionRecipe : public VPReductionRecipe {
2637
- // / Opcode of the extend for VecOp.
2638
- Instruction::CastOps ExtOp;
2639
-
2640
- // / The scalar type after extending.
2641
- Type *ResultTy;
2642
-
2643
- // / For cloning VPExtendedReductionRecipe.
2644
- VPExtendedReductionRecipe (VPExtendedReductionRecipe *ExtRed)
2645
- : VPReductionRecipe(
2646
- VPDef::VPExtendedReductionSC, ExtRed->getRecurrenceKind (),
2647
- {ExtRed->getChainOp (), ExtRed->getVecOp ()}, ExtRed->getCondOp (),
2648
- ExtRed->isOrdered(), ExtRed->getDebugLoc()),
2649
- ExtOp(ExtRed->getExtOpcode ()), ResultTy(ExtRed->getResultType ()) {
2650
- transferFlags (*ExtRed);
2651
- setUnderlyingValue (ExtRed->getUnderlyingValue ());
2652
- }
2653
-
2654
- public:
2655
- VPExtendedReductionRecipe (VPReductionRecipe *R, VPWidenCastRecipe *Ext)
2656
- : VPReductionRecipe(VPDef::VPExtendedReductionSC, R->getRecurrenceKind (),
2657
- {R->getChainOp (), Ext->getOperand (0 )}, R->getCondOp (),
2658
- R->isOrdered(), Ext->getDebugLoc()),
2659
- ExtOp(Ext->getOpcode ()), ResultTy(Ext->getResultType ()) {
2660
- assert ((ExtOp == Instruction::CastOps::ZExt ||
2661
- ExtOp == Instruction::CastOps::SExt) &&
2662
- " VPExtendedReductionRecipe only supports zext and sext." );
2663
-
2664
- transferFlags (*Ext);
2665
- setUnderlyingValue (R->getUnderlyingValue ());
2666
- }
2667
-
2668
- ~VPExtendedReductionRecipe () override = default ;
2669
-
2670
- VPExtendedReductionRecipe *clone () override {
2671
- return new VPExtendedReductionRecipe (this );
2672
- }
2673
-
2674
- VP_CLASSOF_IMPL (VPDef::VPExtendedReductionSC);
2675
-
2676
- void execute (VPTransformState &State) override {
2677
- llvm_unreachable (" VPExtendedReductionRecipe should be transform to "
2678
- " VPExtendedRecipe + VPReductionRecipe before execution." );
2679
- };
2680
-
2681
- // / Return the cost of VPExtendedReductionRecipe.
2682
- InstructionCost computeCost (ElementCount VF,
2683
- VPCostContext &Ctx) const override ;
2684
-
2685
- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2686
- // / Print the recipe.
2687
- void print (raw_ostream &O, const Twine &Indent,
2688
- VPSlotTracker &SlotTracker) const override ;
2689
- #endif
2690
-
2691
- // / The scalar type after extending.
2692
- Type *getResultType () const { return ResultTy; }
2693
-
2694
- // / Is the extend ZExt?
2695
- bool isZExt () const { return getExtOpcode () == Instruction::ZExt; }
2696
-
2697
- // / Get the opcode of the extend for VecOp.
2698
- Instruction::CastOps getExtOpcode () const { return ExtOp; }
2699
- };
2700
-
2701
- // / A recipe to represent inloop MulAccumulateReduction operations, multiplying
2702
- // / the vector operands (which may be extended), performing a reduction.add on
2703
- // / the result, and adding the scalar result to a chain. This recipe is abstract
2704
- // / and needs to be lowered to concrete recipes before codegen. The operands are
2705
- // / {ChainOp, VecOp1, VecOp2, [Condition]}.
2706
- class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
2707
- // / Opcode of the extend for VecOp1 and VecOp2.
2708
- Instruction::CastOps ExtOp;
2709
-
2710
- // / Non-neg flag of the extend recipe.
2711
- bool IsNonNeg = false ;
2712
-
2713
- // / The scalar type after extending.
2714
- Type *ResultTy = nullptr ;
2715
-
2716
- // / For cloning VPMulAccumulateReductionRecipe.
2717
- VPMulAccumulateReductionRecipe (VPMulAccumulateReductionRecipe *MulAcc)
2718
- : VPReductionRecipe(
2719
- VPDef::VPMulAccumulateReductionSC, MulAcc->getRecurrenceKind (),
2720
- {MulAcc->getChainOp (), MulAcc->getVecOp0 (), MulAcc->getVecOp1 ()},
2721
- MulAcc->getCondOp (), MulAcc->isOrdered(),
2722
- WrapFlagsTy(MulAcc->hasNoUnsignedWrap (), MulAcc->hasNoSignedWrap()),
2723
- MulAcc->getDebugLoc()),
2724
- ExtOp(MulAcc->getExtOpcode ()), IsNonNeg(MulAcc->isNonNeg ()),
2725
- ResultTy(MulAcc->getResultType ()) {
2726
- transferFlags (*MulAcc);
2727
- setUnderlyingValue (MulAcc->getUnderlyingValue ());
2728
- }
2729
-
2730
- public:
2731
- VPMulAccumulateReductionRecipe (VPReductionRecipe *R, VPWidenRecipe *Mul,
2732
- VPWidenCastRecipe *Ext0,
2733
- VPWidenCastRecipe *Ext1, Type *ResultTy)
2734
- : VPReductionRecipe(
2735
- VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind (),
2736
- {R->getChainOp (), Ext0->getOperand (0 ), Ext1->getOperand (0 )},
2737
- R->getCondOp (), R->isOrdered(),
2738
- WrapFlagsTy(Mul->hasNoUnsignedWrap (), Mul->hasNoSignedWrap()),
2739
- R->getDebugLoc()),
2740
- ExtOp(Ext0->getOpcode ()), ResultTy(ResultTy) {
2741
- assert (RecurrenceDescriptor::getOpcode (getRecurrenceKind ()) ==
2742
- Instruction::Add &&
2743
- " The reduction instruction in MulAccumulateteReductionRecipe must "
2744
- " be Add" );
2745
- assert ((ExtOp == Instruction::CastOps::ZExt ||
2746
- ExtOp == Instruction::CastOps::SExt) &&
2747
- " VPMulAccumulateReductionRecipe only supports zext and sext." );
2748
- setUnderlyingValue (R->getUnderlyingValue ());
2749
- // Only set the non-negative flag if the original recipe contains.
2750
- if (Ext0->hasNonNegFlag ())
2751
- IsNonNeg = Ext0->isNonNeg ();
2752
- }
2753
-
2754
- VPMulAccumulateReductionRecipe (VPReductionRecipe *R, VPWidenRecipe *Mul,
2755
- Type *ResultTy)
2756
- : VPReductionRecipe(
2757
- VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind (),
2758
- {R->getChainOp (), Mul->getOperand (0 ), Mul->getOperand (1 )},
2759
- R->getCondOp (), R->isOrdered(),
2760
- WrapFlagsTy(Mul->hasNoUnsignedWrap (), Mul->hasNoSignedWrap()),
2761
- R->getDebugLoc()),
2762
- ExtOp(Instruction::CastOps::CastOpsEnd), ResultTy(ResultTy) {
2763
- assert (RecurrenceDescriptor::getOpcode (getRecurrenceKind ()) ==
2764
- Instruction::Add &&
2765
- " The reduction instruction in MulAccumulateReductionRecipe must be "
2766
- " Add" );
2767
- setUnderlyingValue (R->getUnderlyingValue ());
2768
- }
2769
-
2770
- ~VPMulAccumulateReductionRecipe () override = default ;
2771
-
2772
- VPMulAccumulateReductionRecipe *clone () override {
2773
- return new VPMulAccumulateReductionRecipe (this );
2774
- }
2775
-
2776
- VP_CLASSOF_IMPL (VPDef::VPMulAccumulateReductionSC);
2777
-
2778
- void execute (VPTransformState &State) override {
2779
- llvm_unreachable (" VPMulAccumulateReductionRecipe should transform to "
2780
- " VPWidenCastRecipe + "
2781
- " VPWidenRecipe + VPReductionRecipe before execution" );
2782
- }
2783
-
2784
- // / Return the cost of VPMulAccumulateReductionRecipe.
2785
- InstructionCost computeCost (ElementCount VF,
2786
- VPCostContext &Ctx) const override ;
2787
-
2788
- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2789
- // / Print the recipe.
2790
- void print (raw_ostream &O, const Twine &Indent,
2791
- VPSlotTracker &SlotTracker) const override ;
2792
- #endif
2793
-
2794
- Type *getResultType () const { return ResultTy; }
2795
-
2796
- // / The first vector value to be extended and reduced.
2797
- VPValue *getVecOp0 () const { return getOperand (1 ); }
2798
-
2799
- // / The second vector value to be extended and reduced.
2800
- VPValue *getVecOp1 () const { return getOperand (2 ); }
2801
-
2802
- // / Return true if this recipe contains extended operands.
2803
- bool isExtended () const { return ExtOp != Instruction::CastOps::CastOpsEnd; }
2804
-
2805
- // / Return the opcode of the extends for the operands.
2806
- Instruction::CastOps getExtOpcode () const { return ExtOp; }
2807
-
2808
- // / Return if the operands are zero-extended.
2809
- bool isZExt () const { return ExtOp == Instruction::CastOps::ZExt; }
2810
-
2811
- // / Return true if the operand extends have the non-negative flag.
2812
- bool isNonNeg () const { return IsNonNeg; }
2813
- };
2814
-
2815
2604
// / VPReplicateRecipe replicates a given instruction producing multiple scalar
2816
2605
// / copies of the original scalar type, one per lane, instead of producing a
2817
2606
// / single copy of widened type for all lanes. If the instruction is known to be
@@ -2930,6 +2719,122 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
2930
2719
}
2931
2720
};
2932
2721
2722
+ // / A recipe to combine multiple recipes into a single 'expression' recipe,
2723
+ // / which should be considered a single entity for cost-modeling and transforms.
2724
+ // / The recipe needs to be 'decomposed', i.e. replaced by its individual
2725
+ // / expression recipes, before execute. The individual expression recipes are
2726
+ // / completely disconnected from the def-use graph of other recipes not part of
2727
+ // / the expression. Def-use edges between pairs of expression recipes remain
2728
+ // / intact, whereas every edge between an expression recipe and a recipe outside
2729
+ // / the expression is elevated to connect the non-expression recipe with the
2730
+ // / VPExpressionRecipe itself.
2731
+ class VPExpressionRecipe : public VPSingleDefRecipe {
2732
+ // / Recipes included in this VPExpressionRecipe.
2733
+ SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
2734
+
2735
+ // / Temporary VPValues used for external operands of the expression, i.e.
2736
+ // / operands not defined by recipes in the expression.
2737
+ SmallVector<VPValue *> LiveInPlaceholders;
2738
+
2739
+ enum class ExpressionTypes {
2740
+ // / Represents an inloop extended reduction operation, performing a
2741
+ // / reduction on an extended vector operand into a scalar value, and adding
2742
+ // / the result to a chain.
2743
+ ExtendedReduction,
2744
+ // / Represent an inloop multiply-accumulate reduction, multiplying the
2745
+ // / extended vector operands, performing a reduction.add on the result, and
2746
+ // / adding the scalar result to a chain.
2747
+ ExtMulAccReduction,
2748
+ // / Represent an inloop multiply-accumulate reduction, multiplying the
2749
+ // / vector operands, performing a reduction.add on the result, and adding
2750
+ // / the scalar result to a chain.
2751
+ MulAccReduction,
2752
+ };
2753
+
2754
+ // / Type of the expression.
2755
+ ExpressionTypes ExpressionType;
2756
+
2757
+ // / Construct a new VPExpressionRecipe by internalizing recipes in \p
2758
+ // / ExpressionRecipes. External operands (i.e. not defined by another recipe
2759
+ // / in the expression) are replaced by temporary VPValues and the original
2760
+ // / operands are transferred to the VPExpressionRecipe itself. Clone recipes
2761
+ // / as needed (excluding last) to ensure they are only used by other recipes
2762
+ // / in the expression.
2763
+ VPExpressionRecipe (ExpressionTypes ExpressionType,
2764
+ ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
2765
+
2766
+ public:
2767
+ VPExpressionRecipe (VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
2768
+ : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
2769
+ VPExpressionRecipe (VPWidenRecipe *Mul, VPReductionRecipe *Red)
2770
+ : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
2771
+ VPExpressionRecipe (VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2772
+ VPWidenRecipe *Mul, VPReductionRecipe *Red)
2773
+ : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
2774
+ {Ext0, Ext1, Mul, Red}) {}
2775
+
2776
+ ~VPExpressionRecipe () override {
2777
+ for (auto *R : reverse (ExpressionRecipes))
2778
+ delete R;
2779
+ for (VPValue *T : LiveInPlaceholders)
2780
+ delete T;
2781
+ }
2782
+
2783
+ VP_CLASSOF_IMPL (VPDef::VPExpressionSC)
2784
+
2785
+ VPExpressionRecipe *clone () override {
2786
+ assert (!ExpressionRecipes.empty () && " empty expressions should be removed" );
2787
+ SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
2788
+ for (auto *R : ExpressionRecipes)
2789
+ NewExpressiondRecipes.push_back (R->clone ());
2790
+ for (auto *New : NewExpressiondRecipes) {
2791
+ for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
2792
+ New->replaceUsesOfWith (Old, NewExpressiondRecipes[Idx]);
2793
+ // Update placeholder operands in the cloned recipe to use the external
2794
+ // operands, to be internalized when the cloned expression is constructed.
2795
+ for (const auto &[Placeholder, OutsideOp] :
2796
+ zip (LiveInPlaceholders, operands ()))
2797
+ New->replaceUsesOfWith (Placeholder, OutsideOp);
2798
+ }
2799
+ return new VPExpressionRecipe (ExpressionType, NewExpressiondRecipes);
2800
+ }
2801
+
2802
+ // / Return the VPValue to use to infer the result type of the recipe.
2803
+ VPValue *getOperandOfResultType () const {
2804
+ unsigned OpIdx =
2805
+ cast<VPReductionRecipe>(ExpressionRecipes.back ())->isConditional () ? 2
2806
+ : 1 ;
2807
+ return getOperand (getNumOperands () - OpIdx);
2808
+ }
2809
+
2810
+ // / Insert the recipes of the expression back into the VPlan, directly before
2811
+ // / the current recipe. Leaves the expression recipe empty, which must be
2812
+ // / removed before codegen.
2813
+ void decompose ();
2814
+
2815
+ // / Method for generating code, must not be called as this recipe is abstract.
2816
+ void execute (VPTransformState &State) override {
2817
+ llvm_unreachable (" recipe must be removed before execute" );
2818
+ }
2819
+
2820
+ InstructionCost computeCost (ElementCount VF,
2821
+ VPCostContext &Ctx) const override ;
2822
+
2823
+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2824
+ // / Print the recipe.
2825
+ void print (raw_ostream &O, const Twine &Indent,
2826
+ VPSlotTracker &SlotTracker) const override ;
2827
+ #endif
2828
+
2829
+ // / Returns true if this expression contains recipes that may read from or
2830
+ // / write to memory.
2831
+ bool mayReadOrWriteMemory () const ;
2832
+
2833
+ // / Returns true if this expression contains recipes that may have side
2834
+ // / effects.
2835
+ bool mayHaveSideEffects () const ;
2836
+ };
2837
+
2933
2838
// / VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2934
2839
// / control converges back from a Branch-on-Mask. The phi nodes are needed in
2935
2840
// / order to merge values that are set under such a branch and feed their uses.
0 commit comments