Skip to content

Commit 2cca53c

Browse files
committed
[DAG]Introduce llvm::processShuffleMasks and use it for shuffles in DAG Type Legalizer.
We can process the long shuffles (working across several actual vector registers) in the best way if we take the actual register represantion into account. We can build more correct representation of register shuffles, improve number of recognised buildvector sequences. Also, same function can be used to improve the cost model for the shuffles. in future patches. Part of D100486 Differential Revision: https://reviews.llvm.org/D115653
1 parent 37a47aa commit 2cca53c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+12001
-10423
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,24 @@ void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
398398
bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
399399
SmallVectorImpl<int> &ScaledMask);
400400

401+
/// Splits and processes shuffle mask depending on the number of input and
402+
/// output registers. The function does 2 main things: 1) splits the
403+
/// source/destination vectors into real registers; 2) do the mask analysis to
404+
/// identify which real registers are permuted. Then the function processes
405+
/// resulting registers mask using provided action items. If no input register
406+
/// is defined, \p NoInputAction action is used. If only 1 input register is
407+
/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
408+
/// process > 2 input registers and masks.
409+
/// \param Mask Original shuffle mask.
410+
/// \param NumOfSrcRegs Number of source registers.
411+
/// \param NumOfDestRegs Number of destination registers.
412+
/// \param NumOfUsedRegs Number of actually used destination registers.
413+
void processShuffleMasks(
414+
ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
415+
unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
416+
function_ref<void(ArrayRef<int>, unsigned)> SingleInputAction,
417+
function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
418+
401419
/// Compute a map of integer instructions to their minimum legal type
402420
/// size.
403421
///

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,116 @@ bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
496496
return true;
497497
}
498498

499+
void llvm::processShuffleMasks(
500+
ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
501+
unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
502+
function_ref<void(ArrayRef<int>, unsigned)> SingleInputAction,
503+
function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
504+
SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
505+
// Try to perform better estimation of the permutation.
506+
// 1. Split the source/destination vectors into real registers.
507+
// 2. Do the mask analysis to identify which real registers are
508+
// permuted.
509+
int Sz = Mask.size();
510+
unsigned SzDest = Sz / NumOfDestRegs;
511+
unsigned SzSrc = Sz / NumOfSrcRegs;
512+
for (unsigned I = 0; I < NumOfDestRegs; ++I) {
513+
auto &RegMasks = Res[I];
514+
RegMasks.assign(NumOfSrcRegs, {});
515+
// Check that the values in dest registers are in the one src
516+
// register.
517+
for (unsigned K = 0; K < SzDest; ++K) {
518+
int Idx = I * SzDest + K;
519+
if (Idx == Sz)
520+
break;
521+
if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
522+
continue;
523+
int SrcRegIdx = Mask[Idx] / SzSrc;
524+
// Add a cost of PermuteTwoSrc for each new source register permute,
525+
// if we have more than one source registers.
526+
if (RegMasks[SrcRegIdx].empty())
527+
RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
528+
RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
529+
}
530+
}
531+
// Process split mask.
532+
for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
533+
auto &Dest = Res[I];
534+
int NumSrcRegs =
535+
count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
536+
switch (NumSrcRegs) {
537+
case 0:
538+
// No input vectors were used!
539+
NoInputAction();
540+
break;
541+
case 1: {
542+
// Find the only mask with at least single undef mask elem.
543+
auto *It =
544+
find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
545+
unsigned SrcReg = std::distance(Dest.begin(), It);
546+
SingleInputAction(*It, SrcReg);
547+
break;
548+
}
549+
default: {
550+
// The first mask is a permutation of a single register. Since we have >2
551+
// input registers to shuffle, we merge the masks for 2 first registers
552+
// and generate a shuffle of 2 registers rather than the reordering of the
553+
// first register and then shuffle with the second register. Next,
554+
// generate the shuffles of the resulting register + the remaining
555+
// registers from the list.
556+
auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
557+
ArrayRef<int> SecondMask) {
558+
for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
559+
if (SecondMask[Idx] != UndefMaskElem) {
560+
assert(FirstMask[Idx] == UndefMaskElem &&
561+
"Expected undefined mask element.");
562+
FirstMask[Idx] = SecondMask[Idx] + VF;
563+
}
564+
}
565+
};
566+
auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
567+
for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
568+
if (Mask[Idx] != UndefMaskElem)
569+
Mask[Idx] = Idx;
570+
}
571+
};
572+
int SecondIdx;
573+
do {
574+
int FirstIdx = -1;
575+
SecondIdx = -1;
576+
MutableArrayRef<int> FirstMask, SecondMask;
577+
for (unsigned I = 0; I < NumOfDestRegs; ++I) {
578+
SmallVectorImpl<int> &RegMask = Dest[I];
579+
if (RegMask.empty())
580+
continue;
581+
582+
if (FirstIdx == SecondIdx) {
583+
FirstIdx = I;
584+
FirstMask = RegMask;
585+
continue;
586+
}
587+
SecondIdx = I;
588+
SecondMask = RegMask;
589+
CombineMasks(FirstMask, SecondMask);
590+
ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
591+
NormalizeMask(FirstMask);
592+
RegMask.clear();
593+
SecondMask = FirstMask;
594+
SecondIdx = FirstIdx;
595+
}
596+
if (FirstIdx != SecondIdx && SecondIdx >= 0) {
597+
CombineMasks(SecondMask, FirstMask);
598+
ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
599+
Dest[FirstIdx].clear();
600+
NormalizeMask(SecondMask);
601+
}
602+
} while (SecondIdx >= 0);
603+
break;
604+
}
605+
}
606+
}
607+
}
608+
499609
MapVector<Instruction *, uint64_t>
500610
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
501611
const TargetTransformInfo *TTI) {

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20413,18 +20413,39 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
2041320413
int Left = 2 * In;
2041420414
int Right = 2 * In + 1;
2041520415
SmallVector<int, 8> Mask(NumElems, -1);
20416-
for (unsigned i = 0; i != NumElems; ++i) {
20417-
if (VectorMask[i] == Left) {
20418-
Mask[i] = i;
20419-
VectorMask[i] = In;
20420-
} else if (VectorMask[i] == Right) {
20421-
Mask[i] = i + NumElems;
20422-
VectorMask[i] = In;
20416+
SDValue L = Shuffles[Left];
20417+
ArrayRef<int> LMask;
20418+
bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
20419+
L.use_empty() && L.getOperand(1).isUndef() &&
20420+
L.getOperand(0).getValueType() == L.getValueType();
20421+
if (IsLeftShuffle) {
20422+
LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
20423+
L = L.getOperand(0);
20424+
}
20425+
SDValue R = Shuffles[Right];
20426+
ArrayRef<int> RMask;
20427+
bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
20428+
R.use_empty() && R.getOperand(1).isUndef() &&
20429+
R.getOperand(0).getValueType() == R.getValueType();
20430+
if (IsRightShuffle) {
20431+
RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
20432+
R = R.getOperand(0);
20433+
}
20434+
for (unsigned I = 0; I != NumElems; ++I) {
20435+
if (VectorMask[I] == Left) {
20436+
Mask[I] = I;
20437+
if (IsLeftShuffle)
20438+
Mask[I] = LMask[I];
20439+
VectorMask[I] = In;
20440+
} else if (VectorMask[I] == Right) {
20441+
Mask[I] = I + NumElems;
20442+
if (IsRightShuffle)
20443+
Mask[I] = RMask[I] + NumElems;
20444+
VectorMask[I] = In;
2042320445
}
2042420446
}
2042520447

20426-
Shuffles[In] =
20427-
DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
20448+
Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
2042820449
}
2042920450
}
2043020451
return Shuffles[0];

0 commit comments

Comments
 (0)