@@ -525,14 +525,13 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
525
525
526
526
static inline bool classof (const VPRecipeBase *R) {
527
527
switch (R->getVPDefID ()) {
528
+ case VPRecipeBase::VPBundleSC:
528
529
case VPRecipeBase::VPDerivedIVSC:
529
530
case VPRecipeBase::VPEVLBasedIVPHISC:
530
531
case VPRecipeBase::VPExpandSCEVSC:
531
532
case VPRecipeBase::VPInstructionSC:
532
533
case VPRecipeBase::VPReductionEVLSC:
533
534
case VPRecipeBase::VPReductionSC:
534
- case VPRecipeBase::VPMulAccumulateReductionSC:
535
- case VPRecipeBase::VPExtendedReductionSC:
536
535
case VPRecipeBase::VPReplicateSC:
537
536
case VPRecipeBase::VPScalarIVStepsSC:
538
537
case VPRecipeBase::VPVectorPointerSC:
@@ -852,9 +851,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
852
851
R->getVPDefID () == VPRecipeBase::VPReductionEVLSC ||
853
852
R->getVPDefID () == VPRecipeBase::VPReplicateSC ||
854
853
R->getVPDefID () == VPRecipeBase::VPVectorEndPointerSC ||
855
- R->getVPDefID () == VPRecipeBase::VPVectorPointerSC ||
856
- R->getVPDefID () == VPRecipeBase::VPExtendedReductionSC ||
857
- R->getVPDefID () == VPRecipeBase::VPMulAccumulateReductionSC;
854
+ R->getVPDefID () == VPRecipeBase::VPVectorPointerSC;
858
855
}
859
856
860
857
static inline bool classof (const VPUser *U) {
@@ -2431,29 +2428,6 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
2431
2428
}
2432
2429
setUnderlyingValue (I);
2433
2430
}
2434
-
2435
- // / For VPExtendedReductionRecipe.
2436
- // / Note that the debug location is from the extend.
2437
- VPReductionRecipe (const unsigned char SC, const RecurKind RdxKind,
2438
- ArrayRef<VPValue *> Operands, VPValue *CondOp,
2439
- bool IsOrdered, DebugLoc DL)
2440
- : VPRecipeWithIRFlags(SC, Operands, DL), RdxKind(RdxKind),
2441
- IsOrdered(IsOrdered), IsConditional(CondOp) {
2442
- if (CondOp)
2443
- addOperand (CondOp);
2444
- }
2445
-
2446
- // / For VPMulAccumulateReductionRecipe.
2447
- // / Note that the NUW/NSW flags and the debug location are from the Mul.
2448
- VPReductionRecipe (const unsigned char SC, const RecurKind RdxKind,
2449
- ArrayRef<VPValue *> Operands, VPValue *CondOp,
2450
- bool IsOrdered, WrapFlagsTy WrapFlags, DebugLoc DL)
2451
- : VPRecipeWithIRFlags(SC, Operands, WrapFlags, DL), RdxKind(RdxKind),
2452
- IsOrdered(IsOrdered), IsConditional(CondOp) {
2453
- if (CondOp)
2454
- addOperand (CondOp);
2455
- }
2456
-
2457
2431
public:
2458
2432
VPReductionRecipe (RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
2459
2433
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
@@ -2479,9 +2453,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
2479
2453
2480
2454
static inline bool classof (const VPRecipeBase *R) {
2481
2455
return R->getVPDefID () == VPRecipeBase::VPReductionSC ||
2482
- R->getVPDefID () == VPRecipeBase::VPReductionEVLSC ||
2483
- R->getVPDefID () == VPRecipeBase::VPExtendedReductionSC ||
2484
- R->getVPDefID () == VPRecipeBase::VPMulAccumulateReductionSC;
2456
+ R->getVPDefID () == VPRecipeBase::VPReductionEVLSC;
2485
2457
}
2486
2458
2487
2459
static inline bool classof (const VPUser *U) {
@@ -2620,190 +2592,6 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
2620
2592
}
2621
2593
};
2622
2594
2623
- // / A recipe to represent inloop extended reduction operations, performing a
2624
- // / reduction on a extended vector operand into a scalar value, and adding the
2625
- // / result to a chain. This recipe is abstract and needs to be lowered to
2626
- // / concrete recipes before codegen. The operands are {ChainOp, VecOp,
2627
- // / [Condition]}.
2628
- class VPExtendedReductionRecipe : public VPReductionRecipe {
2629
- // / Opcode of the extend for VecOp.
2630
- Instruction::CastOps ExtOp;
2631
-
2632
- // / The scalar type after extending.
2633
- Type *ResultTy;
2634
-
2635
- // / For cloning VPExtendedReductionRecipe.
2636
- VPExtendedReductionRecipe (VPExtendedReductionRecipe *ExtRed)
2637
- : VPReductionRecipe(
2638
- VPDef::VPExtendedReductionSC, ExtRed->getRecurrenceKind (),
2639
- {ExtRed->getChainOp (), ExtRed->getVecOp ()}, ExtRed->getCondOp (),
2640
- ExtRed->isOrdered(), ExtRed->getDebugLoc()),
2641
- ExtOp(ExtRed->getExtOpcode ()), ResultTy(ExtRed->getResultType ()) {
2642
- transferFlags (*ExtRed);
2643
- setUnderlyingValue (ExtRed->getUnderlyingValue ());
2644
- }
2645
-
2646
- public:
2647
- VPExtendedReductionRecipe (VPReductionRecipe *R, VPWidenCastRecipe *Ext)
2648
- : VPReductionRecipe(VPDef::VPExtendedReductionSC, R->getRecurrenceKind (),
2649
- {R->getChainOp (), Ext->getOperand (0 )}, R->getCondOp (),
2650
- R->isOrdered(), Ext->getDebugLoc()),
2651
- ExtOp(Ext->getOpcode ()), ResultTy(Ext->getResultType ()) {
2652
- assert ((ExtOp == Instruction::CastOps::ZExt ||
2653
- ExtOp == Instruction::CastOps::SExt) &&
2654
- " VPExtendedReductionRecipe only supports zext and sext." );
2655
-
2656
- transferFlags (*Ext);
2657
- setUnderlyingValue (R->getUnderlyingValue ());
2658
- }
2659
-
2660
- ~VPExtendedReductionRecipe () override = default ;
2661
-
2662
- VPExtendedReductionRecipe *clone () override {
2663
- return new VPExtendedReductionRecipe (this );
2664
- }
2665
-
2666
- VP_CLASSOF_IMPL (VPDef::VPExtendedReductionSC);
2667
-
2668
- void execute (VPTransformState &State) override {
2669
- llvm_unreachable (" VPExtendedReductionRecipe should be transform to "
2670
- " VPExtendedRecipe + VPReductionRecipe before execution." );
2671
- };
2672
-
2673
- // / Return the cost of VPExtendedReductionRecipe.
2674
- InstructionCost computeCost (ElementCount VF,
2675
- VPCostContext &Ctx) const override ;
2676
-
2677
- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2678
- // / Print the recipe.
2679
- void print (raw_ostream &O, const Twine &Indent,
2680
- VPSlotTracker &SlotTracker) const override ;
2681
- #endif
2682
-
2683
- // / The scalar type after extending.
2684
- Type *getResultType () const { return ResultTy; }
2685
-
2686
- // / Is the extend ZExt?
2687
- bool isZExt () const { return getExtOpcode () == Instruction::ZExt; }
2688
-
2689
- // / Get the opcode of the extend for VecOp.
2690
- Instruction::CastOps getExtOpcode () const { return ExtOp; }
2691
- };
2692
-
2693
- // / A recipe to represent inloop MulAccumulateReduction operations, multiplying
2694
- // / the vector operands (which may be extended), performing a reduction.add on
2695
- // / the result, and adding the scalar result to a chain. This recipe is abstract
2696
- // / and needs to be lowered to concrete recipes before codegen. The operands are
2697
- // / {ChainOp, VecOp1, VecOp2, [Condition]}.
2698
- class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
2699
- // / Opcode of the extend for VecOp1 and VecOp2.
2700
- Instruction::CastOps ExtOp;
2701
-
2702
- // / Non-neg flag of the extend recipe.
2703
- bool IsNonNeg = false ;
2704
-
2705
- // / The scalar type after extending.
2706
- Type *ResultTy = nullptr ;
2707
-
2708
- // / For cloning VPMulAccumulateReductionRecipe.
2709
- VPMulAccumulateReductionRecipe (VPMulAccumulateReductionRecipe *MulAcc)
2710
- : VPReductionRecipe(
2711
- VPDef::VPMulAccumulateReductionSC, MulAcc->getRecurrenceKind (),
2712
- {MulAcc->getChainOp (), MulAcc->getVecOp0 (), MulAcc->getVecOp1 ()},
2713
- MulAcc->getCondOp (), MulAcc->isOrdered(),
2714
- WrapFlagsTy(MulAcc->hasNoUnsignedWrap (), MulAcc->hasNoSignedWrap()),
2715
- MulAcc->getDebugLoc()),
2716
- ExtOp(MulAcc->getExtOpcode ()), IsNonNeg(MulAcc->isNonNeg ()),
2717
- ResultTy(MulAcc->getResultType ()) {
2718
- transferFlags (*MulAcc);
2719
- setUnderlyingValue (MulAcc->getUnderlyingValue ());
2720
- }
2721
-
2722
- public:
2723
- VPMulAccumulateReductionRecipe (VPReductionRecipe *R, VPWidenRecipe *Mul,
2724
- VPWidenCastRecipe *Ext0,
2725
- VPWidenCastRecipe *Ext1, Type *ResultTy)
2726
- : VPReductionRecipe(
2727
- VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind (),
2728
- {R->getChainOp (), Ext0->getOperand (0 ), Ext1->getOperand (0 )},
2729
- R->getCondOp (), R->isOrdered(),
2730
- WrapFlagsTy(Mul->hasNoUnsignedWrap (), Mul->hasNoSignedWrap()),
2731
- R->getDebugLoc()),
2732
- ExtOp(Ext0->getOpcode ()), ResultTy(ResultTy) {
2733
- assert (RecurrenceDescriptor::getOpcode (getRecurrenceKind ()) ==
2734
- Instruction::Add &&
2735
- " The reduction instruction in MulAccumulateteReductionRecipe must "
2736
- " be Add" );
2737
- assert ((ExtOp == Instruction::CastOps::ZExt ||
2738
- ExtOp == Instruction::CastOps::SExt) &&
2739
- " VPMulAccumulateReductionRecipe only supports zext and sext." );
2740
- setUnderlyingValue (R->getUnderlyingValue ());
2741
- // Only set the non-negative flag if the original recipe contains.
2742
- if (Ext0->hasNonNegFlag ())
2743
- IsNonNeg = Ext0->isNonNeg ();
2744
- }
2745
-
2746
- VPMulAccumulateReductionRecipe (VPReductionRecipe *R, VPWidenRecipe *Mul,
2747
- Type *ResultTy)
2748
- : VPReductionRecipe(
2749
- VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind (),
2750
- {R->getChainOp (), Mul->getOperand (0 ), Mul->getOperand (1 )},
2751
- R->getCondOp (), R->isOrdered(),
2752
- WrapFlagsTy(Mul->hasNoUnsignedWrap (), Mul->hasNoSignedWrap()),
2753
- R->getDebugLoc()),
2754
- ExtOp(Instruction::CastOps::CastOpsEnd), ResultTy(ResultTy) {
2755
- assert (RecurrenceDescriptor::getOpcode (getRecurrenceKind ()) ==
2756
- Instruction::Add &&
2757
- " The reduction instruction in MulAccumulateReductionRecipe must be "
2758
- " Add" );
2759
- setUnderlyingValue (R->getUnderlyingValue ());
2760
- }
2761
-
2762
- ~VPMulAccumulateReductionRecipe () override = default ;
2763
-
2764
- VPMulAccumulateReductionRecipe *clone () override {
2765
- return new VPMulAccumulateReductionRecipe (this );
2766
- }
2767
-
2768
- VP_CLASSOF_IMPL (VPDef::VPMulAccumulateReductionSC);
2769
-
2770
- void execute (VPTransformState &State) override {
2771
- llvm_unreachable (" VPMulAccumulateReductionRecipe should transform to "
2772
- " VPWidenCastRecipe + "
2773
- " VPWidenRecipe + VPReductionRecipe before execution" );
2774
- }
2775
-
2776
- // / Return the cost of VPMulAccumulateReductionRecipe.
2777
- InstructionCost computeCost (ElementCount VF,
2778
- VPCostContext &Ctx) const override ;
2779
-
2780
- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2781
- // / Print the recipe.
2782
- void print (raw_ostream &O, const Twine &Indent,
2783
- VPSlotTracker &SlotTracker) const override ;
2784
- #endif
2785
-
2786
- Type *getResultType () const { return ResultTy; }
2787
-
2788
- // / The first vector value to be extended and reduced.
2789
- VPValue *getVecOp0 () const { return getOperand (1 ); }
2790
-
2791
- // / The second vector value to be extended and reduced.
2792
- VPValue *getVecOp1 () const { return getOperand (2 ); }
2793
-
2794
- // / Return true if this recipe contains extended operands.
2795
- bool isExtended () const { return ExtOp != Instruction::CastOps::CastOpsEnd; }
2796
-
2797
- // / Return the opcode of the extends for the operands.
2798
- Instruction::CastOps getExtOpcode () const { return ExtOp; }
2799
-
2800
- // / Return if the operands are zero-extended.
2801
- bool isZExt () const { return ExtOp == Instruction::CastOps::ZExt; }
2802
-
2803
- // / Return true if the operand extends have the non-negative flag.
2804
- bool isNonNeg () const { return IsNonNeg; }
2805
- };
2806
-
2807
2595
// / VPReplicateRecipe replicates a given instruction producing multiple scalar
2808
2596
// / copies of the original scalar type, one per lane, instead of producing a
2809
2597
// / single copy of widened type for all lanes. If the instruction is known to be
@@ -2922,6 +2710,123 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
2922
2710
}
2923
2711
};
2924
2712
2713
+ // / A recipe to combine multiple recipes into a 'bundle' recipe, which should be
2714
+ // / considered as single entity for cost-modeling and transforms. The recipe
2715
+ // / needs to be 'unbundled', i.e. replaced by its individual recipes before
2716
+ // / execute.
2717
+ class VPBundleRecipe : public VPSingleDefRecipe {
2718
+ enum class BundleTypes {
2719
+ ExtendedReduction,
2720
+ MulAccumulateReduction,
2721
+ };
2722
+
2723
+ // / Recipes bundled together in this VPBundleRecipe.
2724
+ SmallVector<VPSingleDefRecipe *> BundledOps;
2725
+
2726
+ // / Temporary VPValues used for external operands of the bundle, i.e. operands
2727
+ // / not defined by recipes in the bundle.
2728
+ SmallVector<VPValue *> TmpValues;
2729
+
2730
+ // / Type of the bundle.
2731
+ BundleTypes BundleType;
2732
+
2733
+ VPBundleRecipe (BundleTypes BundleType, ArrayRef<VPSingleDefRecipe *> ToBundle)
2734
+ : VPSingleDefRecipe(VPDef::VPBundleSC, {}, {}), BundledOps(ToBundle),
2735
+ BundleType (BundleType) {
2736
+ // Bundle up the operand recipes.
2737
+ SmallPtrSet<VPUser *, 4 > BundledUsers;
2738
+ for (auto *R : ToBundle)
2739
+ BundledUsers.insert (R);
2740
+
2741
+ // Recipes in the bundle, expect the last one, must only be used inside the
2742
+ // bundle. If there other external users, clone the recipes for the bundle.
2743
+ for (const auto &[Idx, R] : enumerate(drop_end (ToBundle))) {
2744
+ if (all_of (R->users (), [&BundledUsers](VPUser *U) {
2745
+ return BundledUsers.contains (U);
2746
+ })) {
2747
+ if (R->getParent ())
2748
+ R->removeFromParent ();
2749
+ continue ;
2750
+ }
2751
+ // There users external to the bundle. Clone the recipe for use in the
2752
+ // bundle and update all its in-bundle users.
2753
+ this ->BundledOps [Idx] = R->clone ();
2754
+ BundledUsers.insert (this ->BundledOps [Idx]);
2755
+ R->replaceUsesWithIf (this ->BundledOps [Idx],
2756
+ [&BundledUsers](VPUser &U, unsigned ) {
2757
+ return BundledUsers.contains (&U);
2758
+ });
2759
+ }
2760
+ BundledOps.back ()->removeFromParent ();
2761
+
2762
+ // Internalize all external operands to the bundled operations. To do so,
2763
+ // create new temporary VPValues for all operands not defined by recipe in
2764
+ // the bundle. The original operands are added as operands of the
2765
+ // VPBundleRecipe.
2766
+ for (auto *R : this ->BundledOps ) {
2767
+ for (const auto &[Idx, Op] : enumerate(R->operands ())) {
2768
+ auto *Def = Op->getDefiningRecipe ();
2769
+ if (Def && BundledUsers.contains (Def))
2770
+ continue ;
2771
+ addOperand (Op);
2772
+ TmpValues.push_back (new VPValue ());
2773
+ R->setOperand (Idx, TmpValues.back ());
2774
+ }
2775
+ }
2776
+ }
2777
+
2778
+ public:
2779
+ VPBundleRecipe (VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
2780
+ : VPBundleRecipe(BundleTypes::ExtendedReduction, {Ext, Red}) {}
2781
+ VPBundleRecipe (VPWidenRecipe *Mul, VPReductionRecipe *Red)
2782
+ : VPBundleRecipe(BundleTypes::MulAccumulateReduction, {Mul, Red}) {}
2783
+ VPBundleRecipe (VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2784
+ VPWidenRecipe *Mul, VPReductionRecipe *Red)
2785
+ : VPBundleRecipe(BundleTypes::MulAccumulateReduction,
2786
+ {Ext0, Ext1, Mul, Red}) {}
2787
+ VPBundleRecipe (VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2788
+ VPWidenRecipe *Mul, VPWidenCastRecipe *Ext2,
2789
+ VPReductionRecipe *Red)
2790
+ : VPBundleRecipe(BundleTypes::MulAccumulateReduction,
2791
+ {Ext0, Ext1, Mul, Ext2, Red}) {}
2792
+
2793
+ ~VPBundleRecipe () override {
2794
+ SmallPtrSet<VPRecipeBase *, 4 > Seen;
2795
+ for (auto *R : reverse (BundledOps))
2796
+ if (Seen.insert (R).second )
2797
+ delete R;
2798
+ for (VPValue *T : TmpValues)
2799
+ delete T;
2800
+ }
2801
+
2802
+ VP_CLASSOF_IMPL (VPDef::VPBundleSC)
2803
+
2804
+ VPBundleRecipe *clone () override {
2805
+ return new VPBundleRecipe (BundleType, BundledOps);
2806
+ }
2807
+
2808
+ // / Return the VPSingleDefRecipe producing the final result of the bundled
2809
+ // / recipe.
2810
+ VPSingleDefRecipe *getResultOp () const { return BundledOps.back (); }
2811
+
2812
+ void unbundle ();
2813
+
2814
+ // / Generate the extraction of the appropriate bit from the block mask and the
2815
+ // / conditional branch.
2816
+ void execute (VPTransformState &State) override {
2817
+ llvm_unreachable (" recipe must be removed before execute" );
2818
+ }
2819
+
2820
+ InstructionCost computeCost (ElementCount VF,
2821
+ VPCostContext &Ctx) const override ;
2822
+
2823
+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2824
+ // / Print the recipe.
2825
+ void print (raw_ostream &O, const Twine &Indent,
2826
+ VPSlotTracker &SlotTracker) const override ;
2827
+ #endif
2828
+ };
2829
+
2925
2830
// / VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2926
2831
// / control converges back from a Branch-on-Mask. The phi nodes are needed in
2927
2832
// / order to merge values that are set under such a branch and feed their uses.
0 commit comments