@@ -496,6 +496,116 @@ bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
496
496
return true ;
497
497
}
498
498
499
+ void llvm::processShuffleMasks (
500
+ ArrayRef<int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
501
+ unsigned NumOfUsedRegs, function_ref<void ()> NoInputAction,
502
+ function_ref<void(ArrayRef<int >, unsigned )> SingleInputAction,
503
+ function_ref<void(ArrayRef<int >, unsigned , unsigned )> ManyInputsAction) {
504
+ SmallVector<SmallVector<SmallVector<int >>> Res (NumOfDestRegs);
505
+ // Try to perform better estimation of the permutation.
506
+ // 1. Split the source/destination vectors into real registers.
507
+ // 2. Do the mask analysis to identify which real registers are
508
+ // permuted.
509
+ int Sz = Mask.size ();
510
+ unsigned SzDest = Sz / NumOfDestRegs;
511
+ unsigned SzSrc = Sz / NumOfSrcRegs;
512
+ for (unsigned I = 0 ; I < NumOfDestRegs; ++I) {
513
+ auto &RegMasks = Res[I];
514
+ RegMasks.assign (NumOfSrcRegs, {});
515
+ // Check that the values in dest registers are in the one src
516
+ // register.
517
+ for (unsigned K = 0 ; K < SzDest; ++K) {
518
+ int Idx = I * SzDest + K;
519
+ if (Idx == Sz)
520
+ break ;
521
+ if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
522
+ continue ;
523
+ int SrcRegIdx = Mask[Idx] / SzSrc;
524
+ // Add a cost of PermuteTwoSrc for each new source register permute,
525
+ // if we have more than one source registers.
526
+ if (RegMasks[SrcRegIdx].empty ())
527
+ RegMasks[SrcRegIdx].assign (SzDest, UndefMaskElem);
528
+ RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
529
+ }
530
+ }
531
+ // Process split mask.
532
+ for (unsigned I = 0 ; I < NumOfUsedRegs; ++I) {
533
+ auto &Dest = Res[I];
534
+ int NumSrcRegs =
535
+ count_if (Dest, [](ArrayRef<int > Mask) { return !Mask.empty (); });
536
+ switch (NumSrcRegs) {
537
+ case 0 :
538
+ // No input vectors were used!
539
+ NoInputAction ();
540
+ break ;
541
+ case 1 : {
542
+ // Find the only mask with at least single undef mask elem.
543
+ auto *It =
544
+ find_if (Dest, [](ArrayRef<int > Mask) { return !Mask.empty (); });
545
+ unsigned SrcReg = std::distance (Dest.begin (), It);
546
+ SingleInputAction (*It, SrcReg);
547
+ break ;
548
+ }
549
+ default : {
550
+ // The first mask is a permutation of a single register. Since we have >2
551
+ // input registers to shuffle, we merge the masks for 2 first registers
552
+ // and generate a shuffle of 2 registers rather than the reordering of the
553
+ // first register and then shuffle with the second register. Next,
554
+ // generate the shuffles of the resulting register + the remaining
555
+ // registers from the list.
556
+ auto &&CombineMasks = [](MutableArrayRef<int > FirstMask,
557
+ ArrayRef<int > SecondMask) {
558
+ for (int Idx = 0 , VF = FirstMask.size (); Idx < VF; ++Idx) {
559
+ if (SecondMask[Idx] != UndefMaskElem) {
560
+ assert (FirstMask[Idx] == UndefMaskElem &&
561
+ " Expected undefined mask element." );
562
+ FirstMask[Idx] = SecondMask[Idx] + VF;
563
+ }
564
+ }
565
+ };
566
+ auto &&NormalizeMask = [](MutableArrayRef<int > Mask) {
567
+ for (int Idx = 0 , VF = Mask.size (); Idx < VF; ++Idx) {
568
+ if (Mask[Idx] != UndefMaskElem)
569
+ Mask[Idx] = Idx;
570
+ }
571
+ };
572
+ int SecondIdx;
573
+ do {
574
+ int FirstIdx = -1 ;
575
+ SecondIdx = -1 ;
576
+ MutableArrayRef<int > FirstMask, SecondMask;
577
+ for (unsigned I = 0 ; I < NumOfDestRegs; ++I) {
578
+ SmallVectorImpl<int > &RegMask = Dest[I];
579
+ if (RegMask.empty ())
580
+ continue ;
581
+
582
+ if (FirstIdx == SecondIdx) {
583
+ FirstIdx = I;
584
+ FirstMask = RegMask;
585
+ continue ;
586
+ }
587
+ SecondIdx = I;
588
+ SecondMask = RegMask;
589
+ CombineMasks (FirstMask, SecondMask);
590
+ ManyInputsAction (FirstMask, FirstIdx, SecondIdx);
591
+ NormalizeMask (FirstMask);
592
+ RegMask.clear ();
593
+ SecondMask = FirstMask;
594
+ SecondIdx = FirstIdx;
595
+ }
596
+ if (FirstIdx != SecondIdx && SecondIdx >= 0 ) {
597
+ CombineMasks (SecondMask, FirstMask);
598
+ ManyInputsAction (SecondMask, SecondIdx, FirstIdx);
599
+ Dest[FirstIdx].clear ();
600
+ NormalizeMask (SecondMask);
601
+ }
602
+ } while (SecondIdx >= 0 );
603
+ break ;
604
+ }
605
+ }
606
+ }
607
+ }
608
+
499
609
MapVector<Instruction *, uint64_t >
500
610
llvm::computeMinimumValueSizes (ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
501
611
const TargetTransformInfo *TTI) {
0 commit comments