Skip to content

[VectorCombine] Added pattern for recognising the construction of packed integers. #147414

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
Expand Down Expand Up @@ -125,6 +126,7 @@ class VectorCombine {
bool scalarizeLoadExtract(Instruction &I);
bool scalarizeExtExtract(Instruction &I);
bool foldConcatOfBoolMasks(Instruction &I);
bool foldIntegerPackFromVector(Instruction &I);
bool foldPermuteOfBinops(Instruction &I);
bool foldShuffleOfBinops(Instruction &I);
bool foldShuffleOfSelects(Instruction &I);
Expand Down Expand Up @@ -1957,6 +1959,126 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) {
return true;
}

/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
/// which extract vector elements and pack them in the same relative positions.
static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
uint64_t &VecOffset,
SmallBitVector &Mask) {
static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
ShlAmt = 0;
return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
};

// First try to match extractelement -> zext -> shl
uint64_t VecIdx, ShlAmt;
if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
m_Value(Vec), m_ConstantInt(VecIdx))),
ShlAmt))) {
auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
if (!VecTy)
return false;
auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
if (!EltTy)
return false;

const unsigned EltBitWidth = EltTy->getBitWidth();
const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
return false;
const unsigned ShlEltAmt = ShlAmt / EltBitWidth;

if (ShlEltAmt > VecIdx)
return false;
VecOffset = VecIdx - ShlEltAmt;
Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
Mask.set(ShlEltAmt);
return true;
}

// Now try to match shufflevector -> bitcast
Value *Lhs, *Rhs;
ArrayRef<int> ShuffleMask;
if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs),
m_Mask(ShuffleMask)))))
return false;
Mask.resize(ShuffleMask.size());

if (isa<Constant>(Lhs))
std::swap(Lhs, Rhs);

auto *RhsConst = dyn_cast<Constant>(Rhs);
if (!RhsConst)
return false;

auto *LhsTy = dyn_cast<FixedVectorType>(Lhs->getType());
if (!LhsTy)
return false;

Vec = Lhs;
const unsigned NumLhsElts = LhsTy->getNumElements();
bool FoundVecOffset = false;
for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
if (ShuffleMask[Idx] == PoisonMaskElem)
return false;
const unsigned ShuffleIdx = ShuffleMask[Idx];
if (ShuffleIdx >= NumLhsElts) {
const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
auto *RhsElt =
dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
if (!RhsElt || RhsElt->getZExtValue() != 0)
return false;
continue;
}

if (FoundVecOffset) {
if (VecOffset + Idx != ShuffleIdx)
return false;
} else {
if (ShuffleIdx < Idx)
return false;
VecOffset = ShuffleIdx - Idx;
FoundVecOffset = true;
}
Mask.set(Idx);
}
return FoundVecOffset;
}
/// Try to fold the or of two scalar integers whose contents are packed elements
/// of the same vector.
bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
assert(I.getOpcode() == Instruction::Or);
Value *LhsVec, *RhsVec;
uint64_t LhsVecOffset, RhsVecOffset;
SmallBitVector Mask;
if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
Mask))
return false;
if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
Mask))
return false;
if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
return false;

// Convert into shufflevector -> bitcast
SmallVector<int> ShuffleMask;
ShuffleMask.reserve(Mask.size());
const unsigned ZeroVecIdx =
cast<FixedVectorType>(LhsVec->getType())->getNumElements();
for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
if (Mask.test(Idx))
ShuffleMask.push_back(LhsVecOffset + Idx);
else
ShuffleMask.push_back(ZeroVecIdx);
}

Value *MaskedVec = Builder.CreateShuffleVector(
LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
LhsVec->getName() + ".extract");
Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName());
replaceValue(I, *CastedVec);
return true;
}

/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
/// --> "binop (shuffle), (shuffle)".
bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
Expand Down Expand Up @@ -3742,6 +3864,9 @@ bool VectorCombine::run() {
if (Opcode == Instruction::Store)
MadeChange |= foldSingleElementStore(I);

if (isa<IntegerType>(I.getType()) && Opcode == Instruction::Or)
MadeChange |= foldIntegerPackFromVector(I);

// If this is an early pipeline invocation of this pass, we are done.
if (TryEarlyFoldsOnly)
return;
Expand Down
108 changes: 108 additions & 0 deletions llvm/test/Transforms/VectorCombine/packed-integers.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=vector-combine %s | FileCheck %s

define i32 @bitcast.v2i(<4 x i8> %v) {
; CHECK-LABEL: define i32 @bitcast.v2i(
; CHECK-SAME: <4 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
; CHECK-NEXT: ret i32 [[X_3]]
;
%v.0 = extractelement <4 x i8> %v, i32 0
%z.0 = zext i8 %v.0 to i32

%v.1 = extractelement <4 x i8> %v, i32 1
%z.1 = zext i8 %v.1 to i32
%s.1 = shl i32 %z.1, 8
%x.1 = or i32 %z.0, %s.1

%v.2 = extractelement <4 x i8> %v, i32 2
%z.2 = zext i8 %v.2 to i32
%s.2 = shl i32 %z.2, 16
%x.2 = or i32 %x.1, %s.2

%v.3 = extractelement <4 x i8> %v, i32 3
%z.3 = zext i8 %v.3 to i32
%s.3 = shl i32 %z.3, 24
%x.3 = or i32 %x.2, %s.3

ret i32 %x.3
}

define i32 @bitcast.v2i.tree(<4 x i8> %v) {
; CHECK-LABEL: define i32 @bitcast.v2i.tree(
; CHECK-SAME: <4 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
; CHECK-NEXT: ret i32 [[X]]
;
%v.0 = extractelement <4 x i8> %v, i32 0
%z.0 = zext i8 %v.0 to i32

%v.1 = extractelement <4 x i8> %v, i32 1
%z.1 = zext i8 %v.1 to i32
%s.1 = shl i32 %z.1, 8
%x.1 = or i32 %z.0, %s.1

%v.2 = extractelement <4 x i8> %v, i32 2
%z.2 = zext i8 %v.2 to i32
%s.2 = shl i32 %z.2, 16

%v.3 = extractelement <4 x i8> %v, i32 3
%z.3 = zext i8 %v.3 to i32
%s.3 = shl i32 %z.3, 24
%x.3 = or i32 %s.2, %s.3

%x = or i32 %x.1, %x.3

ret i32 %x
}

define i32 @extract.i32(<8 x i8> %v) {
; CHECK-LABEL: define i32 @extract.i32(
; CHECK-SAME: <8 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32
; CHECK-NEXT: ret i32 [[X_3]]
;
%v.0 = extractelement <8 x i8> %v, i32 3
%z.0 = zext i8 %v.0 to i32

%v.1 = extractelement <8 x i8> %v, i32 4
%z.1 = zext i8 %v.1 to i32
%s.1 = shl i32 %z.1, 8
%x.1 = or i32 %z.0, %s.1

%v.2 = extractelement <8 x i8> %v, i32 5
%z.2 = zext i8 %v.2 to i32
%s.2 = shl i32 %z.2, 16
%x.2 = or i32 %x.1, %s.2

%v.3 = extractelement <8 x i8> %v, i32 6
%z.3 = zext i8 %v.3 to i32
%s.3 = shl i32 %z.3, 24
%x.3 = or i32 %x.2, %s.3

ret i32 %x.3
}

define i32 @partial(<4 x i8> %v) {
; CHECK-LABEL: define i32 @partial(
; CHECK-SAME: <4 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32
; CHECK-NEXT: ret i32 [[X_3]]
;
%v.0 = extractelement <4 x i8> %v, i32 0
%z.0 = zext i8 %v.0 to i32

%v.1 = extractelement <4 x i8> %v, i32 1
%z.1 = zext i8 %v.1 to i32
%s.1 = shl i32 %z.1, 8
%x.1 = or i32 %z.0, %s.1

%v.3 = extractelement <4 x i8> %v, i32 3
%z.3 = zext i8 %v.3 to i32
%s.3 = shl i32 %z.3, 24
%x.3 = or i32 %x.1, %s.3

ret i32 %x.3
}
Loading