@@ -10312,6 +10312,11 @@ static bool isNonZeroElementsInOrder(const APInt &Zeroable,
10312
10312
return true;
10313
10313
}
10314
10314
10315
+ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
10316
+ ArrayRef<SDValue> Ops, SelectionDAG &DAG,
10317
+ const X86Subtarget &Subtarget,
10318
+ unsigned Depth = 0);
10319
+
10315
10320
/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
10316
10321
static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
10317
10322
ArrayRef<int> Mask, SDValue V1,
@@ -10692,7 +10697,8 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
10692
10697
SelectionDAG &DAG) {
10693
10698
assert((VT.is128BitVector() || VT.is256BitVector()) &&
10694
10699
"Unexpected VTRUNC type");
10695
- if (!Subtarget.hasAVX512())
10700
+ if (!Subtarget.hasAVX512() ||
10701
+ (VT.is256BitVector() && !Subtarget.useAVX512Regs()))
10696
10702
return SDValue();
10697
10703
10698
10704
unsigned NumElts = VT.getVectorNumElements();
@@ -10721,30 +10727,19 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
10721
10727
bool UndefUppers =
10722
10728
UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts);
10723
10729
10730
+ // As we're using both sources then we need to concat them together
10731
+ // and truncate from the double-sized src.
10732
+ MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
10733
+
10724
10734
// For offset truncations, ensure that the concat is cheap.
10725
- if (Offset) {
10726
- auto IsCheapConcat = [&](SDValue Lo, SDValue Hi) {
10727
- if (Lo.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
10728
- Hi.getOpcode() == ISD::EXTRACT_SUBVECTOR)
10729
- return Lo.getOperand(0) == Hi.getOperand(0);
10730
- if (ISD::isNormalLoad(Lo.getNode()) &&
10731
- ISD::isNormalLoad(Hi.getNode())) {
10732
- auto *LDLo = cast<LoadSDNode>(Lo);
10733
- auto *LDHi = cast<LoadSDNode>(Hi);
10734
- return DAG.areNonVolatileConsecutiveLoads(
10735
- LDHi, LDLo, Lo.getValueType().getStoreSize(), 1);
10736
- }
10737
- return false;
10738
- };
10739
- if (!IsCheapConcat(peekThroughBitcasts(V1), peekThroughBitcasts(V2)))
10735
+ SDValue Src =
10736
+ combineConcatVectorOps(DL, ConcatVT, {V1, V2}, DAG, Subtarget);
10737
+ if (!Src) {
10738
+ if (Offset)
10740
10739
continue;
10740
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
10741
10741
}
10742
10742
10743
- // As we're using both sources then we need to concat them together
10744
- // and truncate from the double-sized src.
10745
- MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2);
10746
- SDValue Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
10747
-
10748
10743
MVT SrcSVT = MVT::getIntegerVT(SrcEltBits);
10749
10744
MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts);
10750
10745
Src = DAG.getBitcast(SrcVT, Src);
@@ -42183,11 +42178,6 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
42183
42178
return SDValue();
42184
42179
}
42185
42180
42186
- static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
42187
- ArrayRef<SDValue> Ops, SelectionDAG &DAG,
42188
- const X86Subtarget &Subtarget,
42189
- unsigned Depth = 0);
42190
-
42191
42181
/// Try to combine x86 target specific shuffles.
42192
42182
static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42193
42183
SelectionDAG &DAG,
0 commit comments