@@ -2440,24 +2440,74 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
2440
2440
Ctx.CostKind );
2441
2441
}
2442
2442
2443
+ void VPBundleRecipe::bundle (ArrayRef<VPValue *> Operands) {
2444
+ assert (!BundledRecipes.empty () && " Nothing to bundle?" );
2445
+
2446
+ // Bundle up the operand recipes.
2447
+ SmallPtrSet<VPUser *, 4 > BundledUsers;
2448
+ for (auto *R : BundledRecipes)
2449
+ BundledUsers.insert (R);
2450
+
2451
+ // Recipes in the bundle, except the last one, must only be used inside the
2452
+ // bundle. If there other external users, clone the recipes for the bundle.
2453
+ for (unsigned Idx = 0 ; Idx != BundledRecipes.size () - 1 ; ++Idx) {
2454
+ VPSingleDefRecipe *R = BundledRecipes[Idx];
2455
+ if (all_of (R->users (), [&BundledUsers](VPUser *U) {
2456
+ return BundledUsers.contains (U);
2457
+ })) {
2458
+ if (R->getParent ())
2459
+ R->removeFromParent ();
2460
+ continue ;
2461
+ }
2462
+ // The users external to the bundle. Clone the recipe for use in the
2463
+ // bundle and update all its in-bundle users.
2464
+ VPSingleDefRecipe *Copy = R->clone ();
2465
+ BundledRecipes[Idx] = Copy;
2466
+ BundledUsers.insert (Copy);
2467
+ R->replaceUsesWithIf (Copy, [&BundledUsers](VPUser &U, unsigned ) {
2468
+ return BundledUsers.contains (&U);
2469
+ });
2470
+ }
2471
+ if (BundledRecipes.back ()->getParent ())
2472
+ BundledRecipes.back ()->removeFromParent ();
2473
+
2474
+ // Internalize all external operands to the bundled operations. To do so,
2475
+ // create new temporary VPValues for all operands not defined by recipe in
2476
+ // the bundle. The original operands are added as operands of the
2477
+ // VPBundleRecipe.
2478
+ for (auto *R : BundledRecipes) {
2479
+ for (const auto &[Idx, Op] : enumerate(R->operands ())) {
2480
+ auto *Def = Op->getDefiningRecipe ();
2481
+ if (Def && BundledUsers.contains (Def))
2482
+ continue ;
2483
+ if (Operands.empty ())
2484
+ addOperand (Op);
2485
+ else
2486
+ addOperand (Operands[TmpValues.size ()]);
2487
+ TmpValues.push_back (new VPValue ());
2488
+ R->setOperand (Idx, TmpValues.back ());
2489
+ }
2490
+ }
2491
+ }
2492
+
2443
2493
void VPBundleRecipe::unbundle () {
2444
- for (auto *Op : BundledOps )
2445
- if (!Op ->getParent ())
2446
- Op ->insertBefore (this );
2494
+ for (auto *R : BundledRecipes )
2495
+ if (!R ->getParent ())
2496
+ R ->insertBefore (this );
2447
2497
2448
2498
for (const auto &[Idx, Op] : enumerate(operands ()))
2449
2499
TmpValues[Idx]->replaceAllUsesWith (Op);
2450
2500
2451
- replaceAllUsesWith (getResultOp ());
2501
+ replaceAllUsesWith (getResultRecipe ());
2452
2502
2453
2503
if (BundleType == BundleTypes::MulAccumulateReduction &&
2454
- BundledOps .size () == 5 ) {
2504
+ BundledRecipes .size () == 5 ) {
2455
2505
// Note that we will drop the extend after mul which transforms
2456
2506
// reduce.add(ext(mul(ext, ext))) to reduce.add(mul(ext, ext)).
2457
2507
// TODO: This transform should be done separately from bundling/unbundling.
2458
- auto *Ext0 = cast<VPWidenCastRecipe>(BundledOps [0 ]);
2459
- auto *Ext1 = cast<VPWidenCastRecipe>(BundledOps [1 ]);
2460
- auto *Ext2 = cast<VPWidenCastRecipe>(BundledOps [3 ]);
2508
+ auto *Ext0 = cast<VPWidenCastRecipe>(BundledRecipes [0 ]);
2509
+ auto *Ext1 = cast<VPWidenCastRecipe>(BundledRecipes [1 ]);
2510
+ auto *Ext2 = cast<VPWidenCastRecipe>(BundledRecipes [3 ]);
2461
2511
auto *Op0 =
2462
2512
new VPWidenCastRecipe (Ext0->getOpcode (), Ext0->getOperand (0 ),
2463
2513
Ext2->getResultType (), *Ext0, getDebugLoc ());
@@ -2469,8 +2519,8 @@ void VPBundleRecipe::unbundle() {
2469
2519
Ext2->getResultType (), *Ext1, getDebugLoc ());
2470
2520
Op1->insertBefore (Ext1);
2471
2521
}
2472
- auto *Mul = cast<VPWidenRecipe>(BundledOps [2 ]);
2473
- auto *Red = cast<VPReductionRecipe>(BundledOps [4 ]);
2522
+ auto *Mul = cast<VPWidenRecipe>(BundledRecipes [2 ]);
2523
+ auto *Red = cast<VPReductionRecipe>(BundledRecipes [4 ]);
2474
2524
Mul->setOperand (0 , Op0);
2475
2525
Mul->setOperand (1 , Op1);
2476
2526
Red->setOperand (1 , Mul);
@@ -2479,7 +2529,7 @@ void VPBundleRecipe::unbundle() {
2479
2529
if (Ext0 != Ext1)
2480
2530
Ext1->eraseFromParent ();
2481
2531
}
2482
- BundledOps .clear ();
2532
+ BundledRecipes .clear ();
2483
2533
}
2484
2534
2485
2535
InstructionCost VPBundleRecipe::computeCost (ElementCount VF,
@@ -2492,17 +2542,17 @@ InstructionCost VPBundleRecipe::computeCost(ElementCount VF,
2492
2542
switch (BundleType) {
2493
2543
case BundleTypes::ExtendedReduction: {
2494
2544
unsigned Opcode = RecurrenceDescriptor::getOpcode (
2495
- cast<VPReductionRecipe>(BundledOps [1 ])->getRecurrenceKind ());
2545
+ cast<VPReductionRecipe>(BundledRecipes [1 ])->getRecurrenceKind ());
2496
2546
return Ctx.TTI .getExtendedReductionCost (
2497
2547
Opcode,
2498
- cast<VPWidenCastRecipe>(BundledOps .front ())->getOpcode () ==
2548
+ cast<VPWidenCastRecipe>(BundledRecipes .front ())->getOpcode () ==
2499
2549
Instruction::ZExt,
2500
2550
RedTy, SrcVecTy, std::nullopt, Ctx.CostKind );
2501
2551
}
2502
2552
case BundleTypes::MulAccumulateReduction:
2503
2553
return Ctx.TTI .getMulAccReductionCost (
2504
- BundledOps .size () > 2
2505
- ? cast<VPWidenCastRecipe>(BundledOps .front ())->getOpcode () ==
2554
+ BundledRecipes .size () > 2
2555
+ ? cast<VPWidenCastRecipe>(BundledRecipes .front ())->getOpcode () ==
2506
2556
Instruction::ZExt
2507
2557
: false ,
2508
2558
RedTy, SrcVecTy, Ctx.CostKind );
@@ -2516,7 +2566,7 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
2516
2566
O << Indent << " BUNDLE " ;
2517
2567
printAsOperand (O, SlotTracker);
2518
2568
O << " = " ;
2519
- auto *Red = cast<VPReductionRecipe>(BundledOps .back ());
2569
+ auto *Red = cast<VPReductionRecipe>(BundledRecipes .back ());
2520
2570
unsigned Opcode = RecurrenceDescriptor::getOpcode (Red->getRecurrenceKind ());
2521
2571
2522
2572
switch (BundleType) {
@@ -2527,7 +2577,7 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
2527
2577
getOperand (0 )->printAsOperand (O, SlotTracker);
2528
2578
Red->printFlags (O);
2529
2579
2530
- auto *Ext0 = cast<VPWidenCastRecipe>(BundledOps [0 ]);
2580
+ auto *Ext0 = cast<VPWidenCastRecipe>(BundledRecipes [0 ]);
2531
2581
O << Instruction::getOpcodeName (Ext0->getOpcode ()) << " to "
2532
2582
<< *Ext0->getResultType ();
2533
2583
if (Red->isConditional ()) {
@@ -2545,16 +2595,16 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
2545
2595
RecurrenceDescriptor::getOpcode (Red->getRecurrenceKind ()))
2546
2596
<< " (" ;
2547
2597
O << " mul" ;
2548
- auto *Mul = cast<VPWidenRecipe>(BundledOps. size () == 2 ? BundledOps[ 0 ]
2549
- : BundledOps [2 ]);
2598
+ auto *Mul = cast<VPWidenRecipe>(
2599
+ BundledRecipes. size () == 2 ? BundledRecipes[ 0 ] : BundledRecipes [2 ]);
2550
2600
Mul->printFlags (O);
2551
- bool IsExtended = BundledOps .size () > 2 ;
2601
+ bool IsExtended = BundledRecipes .size () > 2 ;
2552
2602
if (IsExtended)
2553
2603
O << " (" ;
2554
2604
getOperand (0 )->printAsOperand (O, SlotTracker);
2555
2605
if (IsExtended) {
2556
2606
auto *Ext0 = cast<VPWidenCastRecipe>(
2557
- BundledOps .size () == 5 ? BundledOps [3 ] : BundledOps [0 ]);
2607
+ BundledRecipes .size () == 5 ? BundledRecipes [3 ] : BundledRecipes [0 ]);
2558
2608
O << " " << Instruction::getOpcodeName (Ext0->getOpcode ()) << " to "
2559
2609
<< *Ext0->getResultType () << " ), (" ;
2560
2610
} else {
@@ -2563,7 +2613,7 @@ void VPBundleRecipe::print(raw_ostream &O, const Twine &Indent,
2563
2613
getOperand (1 )->printAsOperand (O, SlotTracker);
2564
2614
if (IsExtended) {
2565
2615
auto *Ext1 = cast<VPWidenCastRecipe>(
2566
- BundledOps .size () == 5 ? BundledOps [3 ] : BundledOps [1 ]);
2616
+ BundledRecipes .size () == 5 ? BundledRecipes [3 ] : BundledRecipes [1 ]);
2567
2617
O << " " << Instruction::getOpcodeName (Ext1->getOpcode ()) << " to "
2568
2618
<< *Ext1->getResultType () << " )" ;
2569
2619
}
0 commit comments