Skip to content

Commit fe4b403

Browse files
authored
[X86] lowerShuffleAsVTRUNC - use combineConcatVectorOps to catch more "cheap" concats (#145876)
1 parent cc1eae6 commit fe4b403

File tree

3 files changed

+3687
-4265
lines changed

3 files changed

+3687
-4265
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10312,6 +10312,11 @@ static bool isNonZeroElementsInOrder(const APInt &Zeroable,
1031210312
return true;
1031310313
}
1031410314

10315+
static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
10316+
ArrayRef<SDValue> Ops, SelectionDAG &DAG,
10317+
const X86Subtarget &Subtarget,
10318+
unsigned Depth = 0);
10319+
1031510320
/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
1031610321
static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
1031710322
ArrayRef<int> Mask, SDValue V1,
@@ -10692,7 +10697,8 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
1069210697
SelectionDAG &DAG) {
1069310698
assert((VT.is128BitVector() || VT.is256BitVector()) &&
1069410699
"Unexpected VTRUNC type");
10695-
if (!Subtarget.hasAVX512())
10700+
if (!Subtarget.hasAVX512() ||
10701+
(VT.is256BitVector() && !Subtarget.useAVX512Regs()))
1069610702
return SDValue();
1069710703

1069810704
unsigned NumElts = VT.getVectorNumElements();
@@ -10721,30 +10727,19 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
1072110727
bool UndefUppers =
1072210728
UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts);
1072310729

10730+
// As we're using both sources then we need to concat them together
10731+
// and truncate from the double-sized src.
10732+
MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
10733+
1072410734
// For offset truncations, ensure that the concat is cheap.
10725-
if (Offset) {
10726-
auto IsCheapConcat = [&](SDValue Lo, SDValue Hi) {
10727-
if (Lo.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
10728-
Hi.getOpcode() == ISD::EXTRACT_SUBVECTOR)
10729-
return Lo.getOperand(0) == Hi.getOperand(0);
10730-
if (ISD::isNormalLoad(Lo.getNode()) &&
10731-
ISD::isNormalLoad(Hi.getNode())) {
10732-
auto *LDLo = cast<LoadSDNode>(Lo);
10733-
auto *LDHi = cast<LoadSDNode>(Hi);
10734-
return DAG.areNonVolatileConsecutiveLoads(
10735-
LDHi, LDLo, Lo.getValueType().getStoreSize(), 1);
10736-
}
10737-
return false;
10738-
};
10739-
if (!IsCheapConcat(peekThroughBitcasts(V1), peekThroughBitcasts(V2)))
10735+
SDValue Src =
10736+
combineConcatVectorOps(DL, ConcatVT, {V1, V2}, DAG, Subtarget);
10737+
if (!Src) {
10738+
if (Offset)
1074010739
continue;
10740+
Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
1074110741
}
1074210742

10743-
// As we're using both sources then we need to concat them together
10744-
// and truncate from the double-sized src.
10745-
MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2);
10746-
SDValue Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
10747-
1074810743
MVT SrcSVT = MVT::getIntegerVT(SrcEltBits);
1074910744
MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts);
1075010745
Src = DAG.getBitcast(SrcVT, Src);
@@ -42183,11 +42178,6 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
4218342178
return SDValue();
4218442179
}
4218542180

42186-
static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
42187-
ArrayRef<SDValue> Ops, SelectionDAG &DAG,
42188-
const X86Subtarget &Subtarget,
42189-
unsigned Depth = 0);
42190-
4219142181
/// Try to combine x86 target specific shuffles.
4219242182
static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4219342183
SelectionDAG &DAG,

0 commit comments

Comments
 (0)