From d98ec0176f0425173c9ad4769513ccb72d313d0f Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Mon, 7 Jul 2025 17:09:29 -0500 Subject: [PATCH] Added pattern for folding packed integer constructions. --- .../Transforms/Vectorize/VectorCombine.cpp | 125 ++++++++++++++++++ .../VectorCombine/packed-integers.ll | 108 +++++++++++++++ 2 files changed, 233 insertions(+) create mode 100644 llvm/test/Transforms/VectorCombine/packed-integers.ll diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index fe8d74c43dfdc..ce73a383d2555 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -125,6 +126,7 @@ class VectorCombine { bool scalarizeLoadExtract(Instruction &I); bool scalarizeExtExtract(Instruction &I); bool foldConcatOfBoolMasks(Instruction &I); + bool foldIntegerPackFromVector(Instruction &I); bool foldPermuteOfBinops(Instruction &I); bool foldShuffleOfBinops(Instruction &I); bool foldShuffleOfSelects(Instruction &I); @@ -1957,6 +1959,126 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) { return true; } +/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns +/// which extract vector elements and pack them in the same relative positions. +static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, + uint64_t &VecOffset, + SmallBitVector &Mask) { + static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) { + ShlAmt = 0; + return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base); + }; + + // First try to match extractelement -> zext -> shl + uint64_t VecIdx, ShlAmt; + if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt( + m_Value(Vec), m_ConstantInt(VecIdx))), + ShlAmt))) { + auto *VecTy = dyn_cast(Vec->getType()); + if (!VecTy) + return false; + auto *EltTy = dyn_cast(VecTy->getElementType()); + if (!EltTy) + return false; + + const unsigned EltBitWidth = EltTy->getBitWidth(); + const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth(); + if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0) + return false; + const unsigned ShlEltAmt = ShlAmt / EltBitWidth; + + if (ShlEltAmt > VecIdx) + return false; + VecOffset = VecIdx - ShlEltAmt; + Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth); + Mask.set(ShlEltAmt); + return true; + } + + // Now try to match shufflevector -> bitcast + Value *Lhs, *Rhs; + ArrayRef ShuffleMask; + if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs), + m_Mask(ShuffleMask))))) + return false; + Mask.resize(ShuffleMask.size()); + + if (isa(Lhs)) + std::swap(Lhs, Rhs); + + auto *RhsConst = dyn_cast(Rhs); + if (!RhsConst) + return false; + + auto *LhsTy = dyn_cast(Lhs->getType()); + if (!LhsTy) + return false; + + Vec = Lhs; + const unsigned NumLhsElts = LhsTy->getNumElements(); + bool FoundVecOffset = false; + for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) { + if (ShuffleMask[Idx] == PoisonMaskElem) + return false; + const unsigned ShuffleIdx = ShuffleMask[Idx]; + if (ShuffleIdx >= NumLhsElts) { + const unsigned RhsIdx = ShuffleIdx - NumLhsElts; + auto *RhsElt = + dyn_cast(RhsConst->getAggregateElement(RhsIdx)); + if (!RhsElt || RhsElt->getZExtValue() != 0) + return false; + continue; + } + + if (FoundVecOffset) { + if (VecOffset + Idx != ShuffleIdx) + return false; + } else { + if (ShuffleIdx < Idx) + return false; + VecOffset = ShuffleIdx - Idx; + FoundVecOffset = true; + } + Mask.set(Idx); + } + return FoundVecOffset; +} +/// Try to fold the or of two scalar integers whose contents are packed elements +/// of the same vector. +bool VectorCombine::foldIntegerPackFromVector(Instruction &I) { + assert(I.getOpcode() == Instruction::Or); + Value *LhsVec, *RhsVec; + uint64_t LhsVecOffset, RhsVecOffset; + SmallBitVector Mask; + if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset, + Mask)) + return false; + if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset, + Mask)) + return false; + if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset) + return false; + + // Convert into shufflevector -> bitcast + SmallVector ShuffleMask; + ShuffleMask.reserve(Mask.size()); + const unsigned ZeroVecIdx = + cast(LhsVec->getType())->getNumElements(); + for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) { + if (Mask.test(Idx)) + ShuffleMask.push_back(LhsVecOffset + Idx); + else + ShuffleMask.push_back(ZeroVecIdx); + } + + Value *MaskedVec = Builder.CreateShuffleVector( + LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask, + LhsVec->getName() + ".extract"); + Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName()); + replaceValue(I, *CastedVec); + return true; +} + /// Try to convert "shuffle (binop (shuffle, shuffle)), undef" /// --> "binop (shuffle), (shuffle)". bool VectorCombine::foldPermuteOfBinops(Instruction &I) { @@ -3742,6 +3864,9 @@ bool VectorCombine::run() { if (Opcode == Instruction::Store) MadeChange |= foldSingleElementStore(I); + if (isa(I.getType()) && Opcode == Instruction::Or) + MadeChange |= foldIntegerPackFromVector(I); + // If this is an early pipeline invocation of this pass, we are done. if (TryEarlyFoldsOnly) return; diff --git a/llvm/test/Transforms/VectorCombine/packed-integers.ll b/llvm/test/Transforms/VectorCombine/packed-integers.ll new file mode 100644 index 0000000000000..f01179bbde13c --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/packed-integers.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=vector-combine %s | FileCheck %s + +define i32 @bitcast.v2i(<4 x i8> %v) { +; CHECK-LABEL: define i32 @bitcast.v2i( +; CHECK-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i32 0 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i32 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <4 x i8> %v, i32 2 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %x.1, %s.2 + + %v.3 = extractelement <4 x i8> %v, i32 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i32 @bitcast.v2i.tree(<4 x i8> %v) { +; CHECK-LABEL: define i32 @bitcast.v2i.tree( +; CHECK-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-NEXT: ret i32 [[X]] +; + %v.0 = extractelement <4 x i8> %v, i32 0 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i32 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <4 x i8> %v, i32 2 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + + %v.3 = extractelement <4 x i8> %v, i32 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %s.2, %s.3 + + %x = or i32 %x.1, %x.3 + + ret i32 %x +} + +define i32 @extract.i32(<8 x i8> %v) { +; CHECK-LABEL: define i32 @extract.i32( +; CHECK-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32 +; CHECK-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <8 x i8> %v, i32 3 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <8 x i8> %v, i32 4 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <8 x i8> %v, i32 5 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %x.1, %s.2 + + %v.3 = extractelement <8 x i8> %v, i32 6 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i32 @partial(<4 x i8> %v) { +; CHECK-LABEL: define i32 @partial( +; CHECK-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32 +; CHECK-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i32 0 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i32 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.3 = extractelement <4 x i8> %v, i32 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.1, %s.3 + + ret i32 %x.3 +}