@@ -4563,8 +4563,10 @@ static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
4563
4563
/// way through the source.
4564
4564
static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4565
4565
int &OddSrc, const RISCVSubtarget &Subtarget) {
4566
- // We need to be able to widen elements to the next larger integer type.
4567
- if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4566
+ // We need to be able to widen elements to the next larger integer type or
4567
+ // use the zip2a instruction at e64.
4568
+ if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4569
+ !Subtarget.hasVendorXRivosVizip())
4568
4570
return false;
4569
4571
4570
4572
int Size = Mask.size();
@@ -4621,6 +4623,48 @@ static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4621
4623
SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4622
4624
}
4623
4625
4626
+ static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4627
+ ArrayRef<int> Mask, bool RequiredPolarity) {
4628
+ int NumElts = Mask.size();
4629
+ for (unsigned i = 0; i != NumElts; ++i) {
4630
+ int M = Mask[i];
4631
+ if (M < 0)
4632
+ continue;
4633
+ int Src = M >= NumElts;
4634
+ int Diff = (int)i - (M % NumElts);
4635
+ bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
4636
+ assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
4637
+ "Must match exactly one of the two slides");
4638
+ if (RequiredPolarity != (C == i % 2))
4639
+ return false;
4640
+ }
4641
+ return true;
4642
+ }
4643
+
4644
+ /// Given a shuffle which can be represented as a pair of two slides,
4645
+ /// see if it is a zipeven idiom. Zipeven is:
4646
+ /// vs2: a0 a1 a2 a3
4647
+ /// vs1: b0 b1 b2 b3
4648
+ /// vd: a0 b0 a2 b2
4649
+ static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4650
+ ArrayRef<int> Mask) {
4651
+ return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 &&
4652
+ isAlternating(SrcInfo, Mask, true);
4653
+ }
4654
+
4655
+ /// Given a shuffle which can be represented as a pair of two slides,
4656
+ /// see if it is a zipodd idiom. Zipodd is:
4657
+ /// vs2: a0 a1 a2 a3
4658
+ /// vs1: b0 b1 b2 b3
4659
+ /// vd: a1 b1 a3 b3
4660
+ /// Note that the operand order is swapped due to the way we canonicalize
4661
+ /// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4662
+ static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4663
+ ArrayRef<int> Mask) {
4664
+ return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 &&
4665
+ isAlternating(SrcInfo, Mask, false);
4666
+ }
4667
+
4624
4668
// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4625
4669
// 2, 4, 8 and the integer type Factor-times larger than VT's
4626
4670
// element type must be a legal element type.
@@ -4880,6 +4924,34 @@ static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4880
4924
return true;
4881
4925
}
4882
4926
4927
+ static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
4928
+ const SDLoc &DL, SelectionDAG &DAG,
4929
+ const RISCVSubtarget &Subtarget) {
4930
+ assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
4931
+ RISCVISD::RI_VZIP2A_VL == Opc);
4932
+ assert(Op0.getSimpleValueType() == Op1.getSimpleValueType());
4933
+
4934
+ MVT VT = Op0.getSimpleValueType();
4935
+ MVT IntVT = VT.changeVectorElementTypeToInteger();
4936
+ Op0 = DAG.getBitcast(IntVT, Op0);
4937
+ Op1 = DAG.getBitcast(IntVT, Op1);
4938
+
4939
+ MVT ContainerVT = IntVT;
4940
+ if (VT.isFixedLengthVector()) {
4941
+ ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
4942
+ Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
4943
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
4944
+ }
4945
+
4946
+ auto [Mask, VL] = getDefaultVLOps(IntVT, ContainerVT, DL, DAG, Subtarget);
4947
+ SDValue Passthru = DAG.getUNDEF(ContainerVT);
4948
+ SDValue Res = DAG.getNode(Opc, DL, ContainerVT, Op0, Op1, Passthru, Mask, VL);
4949
+ if (IntVT.isFixedLengthVector())
4950
+ Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
4951
+ Res = DAG.getBitcast(VT, Res);
4952
+ return Res;
4953
+ }
4954
+
4883
4955
// Given a vector a, b, c, d return a vector Factor times longer
4884
4956
// with Factor-1 undef's between elements. Ex:
4885
4957
// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
@@ -5619,6 +5691,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
5619
5691
DAG.getVectorIdxConstant(OddSrc % Size, DL));
5620
5692
}
5621
5693
5694
+ // Prefer vzip2a if available.
5695
+ // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
5696
+ if (Subtarget.hasVendorXRivosVizip()) {
5697
+ EvenV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
5698
+ EvenV, DAG.getVectorIdxConstant(0, DL));
5699
+ OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), OddV,
5700
+ DAG.getVectorIdxConstant(0, DL));
5701
+ return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
5702
+ }
5622
5703
return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5623
5704
}
5624
5705
@@ -5670,6 +5751,18 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
5670
5751
return convertFromScalableVector(VT, Res, DAG, Subtarget);
5671
5752
}
5672
5753
5754
+ if (Subtarget.hasVendorXRivosVizip() && isZipEven(SrcInfo, Mask)) {
5755
+ SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
5756
+ SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
5757
+ return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
5758
+ Subtarget);
5759
+ }
5760
+ if (Subtarget.hasVendorXRivosVizip() && isZipOdd(SrcInfo, Mask)) {
5761
+ SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
5762
+ SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
5763
+ return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, Subtarget);
5764
+ }
5765
+
5673
5766
// Build the mask. Note that vslideup unconditionally preserves elements
5674
5767
// below the slide amount in the destination, and thus those elements are
5675
5768
// undefined in the mask. If the mask ends up all true (or undef), it
@@ -6733,7 +6826,7 @@ static bool hasPassthruOp(unsigned Opcode) {
6733
6826
Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
6734
6827
"not a RISC-V target specific op");
6735
6828
static_assert(
6736
- RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
6829
+ RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 130 &&
6737
6830
RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
6738
6831
"adding target specific op should update this function");
6739
6832
if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
@@ -6757,12 +6850,13 @@ static bool hasMaskOp(unsigned Opcode) {
6757
6850
Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
6758
6851
"not a RISC-V target specific op");
6759
6852
static_assert(
6760
- RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
6853
+ RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 130 &&
6761
6854
RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
6762
6855
"adding target specific op should update this function");
6763
6856
if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6764
6857
return true;
6765
- if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6858
+ if (Opcode >= RISCVISD::VRGATHER_VX_VL &&
6859
+ Opcode <= RISCVISD::LAST_VL_VECTOR_OP)
6766
6860
return true;
6767
6861
if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6768
6862
Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
@@ -21807,6 +21901,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
21807
21901
NODE_NAME_CASE(VZEXT_VL)
21808
21902
NODE_NAME_CASE(VCPOP_VL)
21809
21903
NODE_NAME_CASE(VFIRST_VL)
21904
+ NODE_NAME_CASE(RI_VZIPEVEN_VL)
21905
+ NODE_NAME_CASE(RI_VZIPODD_VL)
21906
+ NODE_NAME_CASE(RI_VZIP2A_VL)
21810
21907
NODE_NAME_CASE(READ_CSR)
21811
21908
NODE_NAME_CASE(WRITE_CSR)
21812
21909
NODE_NAME_CASE(SWAP_CSR)
0 commit comments