@@ -542,6 +542,132 @@ fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
542
542
return true ;
543
543
}
544
544
545
+ // / Compute whether each element of a shuffle is zeroable.
546
+ // /
547
+ // / A "zeroable" vector shuffle element is one which can be lowered to zero.
548
+ static void computeZeroableShuffleElements (ArrayRef<int > Mask, SDValue V1,
549
+ SDValue V2, APInt &KnownUndef,
550
+ APInt &KnownZero) {
551
+ int Size = Mask.size ();
552
+ KnownUndef = KnownZero = APInt::getZero (Size);
553
+
554
+ V1 = peekThroughBitcasts (V1);
555
+ V2 = peekThroughBitcasts (V2);
556
+
557
+ bool V1IsZero = ISD::isBuildVectorAllZeros (V1.getNode ());
558
+ bool V2IsZero = ISD::isBuildVectorAllZeros (V2.getNode ());
559
+
560
+ int VectorSizeInBits = V1.getValueSizeInBits ();
561
+ int ScalarSizeInBits = VectorSizeInBits / Size;
562
+ assert (!(VectorSizeInBits % ScalarSizeInBits) && " Illegal shuffle mask size" );
563
+
564
+ for (int i = 0 ; i < Size; ++i) {
565
+ int M = Mask[i];
566
+ if (M < 0 ) {
567
+ KnownUndef.setBit (i);
568
+ continue ;
569
+ }
570
+ if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
571
+ KnownZero.setBit (i);
572
+ continue ;
573
+ }
574
+ }
575
+ }
576
+
577
+ // / Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
578
+ // /
579
+ // / For example:
580
+ // / %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
581
+ // / <4 x i32> <i32 0, i32 4, i32 1, i32 4>
582
+ // / %3 = bitcast <4 x i32> %2 to <2 x i64>
583
+ // / is lowered to:
584
+ // / (VREPLI $v1, 0)
585
+ // / (VILVL $v0, $v1, $v0)
586
+ static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (const SDLoc &DL,
587
+ ArrayRef<int > Mask, MVT VT,
588
+ SDValue V1, SDValue V2,
589
+ SelectionDAG &DAG) {
590
+ int Bits = VT.getSizeInBits ();
591
+ int EltBits = VT.getScalarSizeInBits ();
592
+ int NumElements = VT.getVectorNumElements ();
593
+
594
+ APInt KnownUndef, KnownZero;
595
+ computeZeroableShuffleElements (Mask, V1, V2, KnownUndef, KnownZero);
596
+ APInt Zeroable = KnownUndef | KnownZero;
597
+ if (Zeroable.isAllOnes ())
598
+ return DAG.getConstant (0 , DL, VT);
599
+
600
+ // Define a helper function to check a particular ext-scale and lower to it if
601
+ // valid.
602
+ auto Lower = [&](int Scale) -> SDValue {
603
+ SDValue InputV;
604
+ bool AnyExt = true ;
605
+ int Offset = 0 ;
606
+ for (int i = 0 ; i < NumElements; i++) {
607
+ int M = Mask[i];
608
+ if (M < 0 )
609
+ continue ;
610
+ if (i % Scale != 0 ) {
611
+ // Each of the extended elements need to be zeroable.
612
+ if (!Zeroable[i])
613
+ return SDValue ();
614
+
615
+ AnyExt = false ;
616
+ continue ;
617
+ }
618
+
619
+ // Each of the base elements needs to be consecutive indices into the
620
+ // same input vector.
621
+ SDValue V = M < NumElements ? V1 : V2;
622
+ M = M % NumElements;
623
+ if (!InputV) {
624
+ InputV = V;
625
+ Offset = M - (i / Scale);
626
+
627
+ // These offset can't be handled
628
+ if (Offset % (NumElements / Scale))
629
+ return SDValue ();
630
+ } else if (InputV != V)
631
+ return SDValue ();
632
+
633
+ if (M != (Offset + (i / Scale)))
634
+ return SDValue (); // Non-consecutive strided elements.
635
+ }
636
+
637
+ // If we fail to find an input, we have a zero-shuffle which should always
638
+ // have already been handled.
639
+ if (!InputV)
640
+ return SDValue ();
641
+
642
+ do {
643
+ unsigned VilVLoHi = LoongArchISD::VILVL;
644
+ if (Offset >= (NumElements / 2 )) {
645
+ VilVLoHi = LoongArchISD::VILVH;
646
+ Offset -= (NumElements / 2 );
647
+ }
648
+
649
+ MVT InputVT = MVT::getVectorVT (MVT::getIntegerVT (EltBits), NumElements);
650
+ SDValue Ext =
651
+ AnyExt ? DAG.getFreeze (InputV) : DAG.getConstant (0 , DL, InputVT);
652
+ InputV = DAG.getBitcast (InputVT, InputV);
653
+ InputV = DAG.getNode (VilVLoHi, DL, InputVT, Ext, InputV);
654
+ Scale /= 2 ;
655
+ EltBits *= 2 ;
656
+ NumElements /= 2 ;
657
+ } while (Scale > 1 );
658
+ return DAG.getBitcast (VT, InputV);
659
+ };
660
+
661
+ // Each iteration, try extending the elements half as much, but into twice as
662
+ // many elements.
663
+ for (int NumExtElements = Bits / 64 ; NumExtElements < NumElements;
664
+ NumExtElements *= 2 ) {
665
+ if (SDValue V = Lower (NumElements / NumExtElements))
666
+ return V;
667
+ }
668
+ return SDValue ();
669
+ }
670
+
545
671
// / Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
546
672
// /
547
673
// / VREPLVEI performs vector broadcast based on an element specified by an
@@ -956,6 +1082,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
956
1082
return Result;
957
1083
if ((Result = lowerVECTOR_SHUFFLE_VPICKOD (DL, Mask, VT, V1, V2, DAG)))
958
1084
return Result;
1085
+ if ((Result =
1086
+ lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (DL, Mask, VT, V1, V2, DAG)))
1087
+ return Result;
959
1088
if ((Result = lowerVECTOR_SHUFFLE_VSHUF (DL, Mask, VT, V1, V2, DAG)))
960
1089
return Result;
961
1090
0 commit comments