Skip to content

Commit 777d6b5

Browse files
authored
[VectorCombine] Use InstSimplifyFolder to simplify instrs on creation. (#146350)
Update VectorCombine to use InstSimplifyFolder to simplify redundant instructions on creation. PR: #146350
1 parent 6b3d2b6 commit 777d6b5

13 files changed

+53
-81
lines changed

llvm/include/llvm/IR/IRBuilder.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1575,10 +1575,14 @@ class IRBuilderBase {
15751575
return Accum;
15761576
}
15771577

1578-
Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
1578+
Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "",
1579+
bool IsDisjoint = false) {
15791580
if (auto *V = Folder.FoldBinOp(Instruction::Or, LHS, RHS))
15801581
return V;
1581-
return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
1582+
return Insert(
1583+
IsDisjoint ? BinaryOperator::CreateDisjoint(Instruction::Or, LHS, RHS)
1584+
: BinaryOperator::CreateOr(LHS, RHS),
1585+
Name);
15821586
}
15831587

15841588
Value *CreateOr(Value *LHS, const APInt &RHS, const Twine &Name = "") {

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Analysis/BasicAliasAnalysis.h"
2222
#include "llvm/Analysis/ConstantFolding.h"
2323
#include "llvm/Analysis/GlobalsModRef.h"
24+
#include "llvm/Analysis/InstSimplifyFolder.h"
2425
#include "llvm/Analysis/Loads.h"
2526
#include "llvm/Analysis/TargetTransformInfo.h"
2627
#include "llvm/Analysis/ValueTracking.h"
@@ -72,14 +73,15 @@ class VectorCombine {
7273
const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
7374
const DataLayout *DL, TTI::TargetCostKind CostKind,
7475
bool TryEarlyFoldsOnly)
75-
: F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC), DL(DL),
76-
CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
76+
: F(F), Builder(F.getContext(), InstSimplifyFolder(*DL)), TTI(TTI),
77+
DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind),
78+
TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
7779

7880
bool run();
7981

8082
private:
8183
Function &F;
82-
IRBuilder<> Builder;
84+
IRBuilder<InstSimplifyFolder> Builder;
8385
const TargetTransformInfo &TTI;
8486
const DominatorTree &DT;
8587
AAResults &AA;
@@ -529,7 +531,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
529531
/// Create a shuffle that translates (shifts) 1 element from the input vector
530532
/// to a new element location.
531533
static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
532-
unsigned NewIndex, IRBuilder<> &Builder) {
534+
unsigned NewIndex, IRBuilderBase &Builder) {
533535
// The shuffle mask is poison except for 1 lane that is being translated
534536
// to the new element index. Example for OldIndex == 2 and NewIndex == 0:
535537
// ShufMask = { 2, poison, poison, poison }
@@ -545,7 +547,7 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
545547
/// unnecessary instructions.
546548
static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
547549
unsigned NewIndex,
548-
IRBuilder<> &Builder) {
550+
IRBuilderBase &Builder) {
549551
// Shufflevectors can only be created for fixed-width vectors.
550552
Value *X = ExtElt->getVectorOperand();
551553
if (!isa<FixedVectorType>(X->getType()))
@@ -1459,10 +1461,12 @@ bool VectorCombine::foldBinopOfReductions(Instruction &I) {
14591461
LLVM_DEBUG(dbgs() << "Found two mergeable reductions: " << I
14601462
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
14611463
<< "\n");
1462-
Value *VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);
1463-
if (auto *PDInst = dyn_cast<PossiblyDisjointInst>(&I))
1464-
if (auto *PDVectorBO = dyn_cast<PossiblyDisjointInst>(VectorBO))
1465-
PDVectorBO->setIsDisjoint(PDInst->isDisjoint());
1464+
Value *VectorBO;
1465+
if (BinOpOpc == Instruction::Or)
1466+
VectorBO = Builder.CreateOr(V0, V1, "",
1467+
cast<PossiblyDisjointInst>(I).isDisjoint());
1468+
else
1469+
VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);
14661470

14671471
Instruction *Rdx = Builder.CreateIntrinsic(ReductionIID, {VTy}, {VectorBO});
14681472
replaceValue(I, *Rdx);
@@ -1519,7 +1523,7 @@ class ScalarizationResult {
15191523
}
15201524

15211525
/// Freeze the ToFreeze and update the use in \p User to use it.
1522-
void freeze(IRBuilder<> &Builder, Instruction &UserI) {
1526+
void freeze(IRBuilderBase &Builder, Instruction &UserI) {
15231527
assert(isSafeWithFreeze() &&
15241528
"should only be used when freezing is required");
15251529
assert(is_contained(ToFreeze->users(), &UserI) &&
@@ -2617,7 +2621,7 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
26172621
const SmallPtrSet<Use *, 4> &IdentityLeafs,
26182622
const SmallPtrSet<Use *, 4> &SplatLeafs,
26192623
const SmallPtrSet<Use *, 4> &ConcatLeafs,
2620-
IRBuilder<> &Builder,
2624+
IRBuilderBase &Builder,
26212625
const TargetTransformInfo *TTI) {
26222626
auto [FrontU, FrontLane] = Item.front();
26232627

llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,12 @@ target triple = "arm64-apple-darwin"
66
define void @load_extract_insert_store_const_idx(ptr %A) {
77
; CHECK-LABEL: @load_extract_insert_store_const_idx(
88
; CHECK-NEXT: entry:
9-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i32 0, i64 0
10-
; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0]], align 8
9+
; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0:%.*]], align 8
1110
; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
12-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i32 0, i64 1
11+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[TMP0]], i32 0, i64 1
1312
; CHECK-NEXT: [[EXT_1:%.*]] = load double, ptr [[TMP1]], align 8
1413
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
15-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 1
14+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[TMP0]], i64 0, i64 1
1615
; CHECK-NEXT: store double [[SUB]], ptr [[TMP2]], align 8
1716
; CHECK-NEXT: ret void
1817
;

llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ define i32 @load_extract_idx_0(ptr %x) {
1515

1616
define i32 @vscale_load_extract_idx_0(ptr %x) {
1717
; CHECK-LABEL: @vscale_load_extract_idx_0(
18-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i32 0
19-
; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 16
18+
; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1:%.*]], align 16
2019
; CHECK-NEXT: ret i32 [[R]]
2120
;
2221
%lv = load <vscale x 4 x i32>, ptr %x

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -997,10 +997,8 @@ define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) {
997997
; CHECK-NEXT: [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
998998
; CHECK-NEXT: [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
999999
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]]
1000-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[B]], <4 x i64> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1001-
; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[TMP2]] to <8 x i32>
1002-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[A]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1003-
; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[TMP4]] to <8 x i32>
1000+
; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
1001+
; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
10041002
; CHECK-NEXT: [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]]
10051003
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64>
10061004
; CHECK-NEXT: ret <4 x i64> [[RES]]

llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ define i32 @reducebase_v4i32(<4 x i32> %a, <4 x i32> %b) {
1616

1717
define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) {
1818
; CHECK-LABEL: @reduceshuffle_onein_v4i32(
19-
; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
20-
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]])
19+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X:%.*]])
2120
; CHECK-NEXT: ret i32 [[R]]
2221
;
2322
%x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -27,8 +26,7 @@ define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) {
2726

2827
define i32 @reduceshuffle_onein_const_v4i32(<4 x i32> %a) {
2928
; CHECK-LABEL: @reduceshuffle_onein_const_v4i32(
30-
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
31-
; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1)
29+
; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S:%.*]], splat (i32 -1)
3230
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]])
3331
; CHECK-NEXT: ret i32 [[R]]
3432
;
@@ -222,8 +220,7 @@ define i32 @reducebase_v16i32(<16 x i32> %a, <16 x i32> %b) {
222220

223221
define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) {
224222
; CHECK-LABEL: @reduceshuffle_onein_v16i32(
225-
; CHECK-NEXT: [[X:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
226-
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
223+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X:%.*]])
227224
; CHECK-NEXT: ret i32 [[R]]
228225
;
229226
%x = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -233,8 +230,7 @@ define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) {
233230

234231
define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) {
235232
; CHECK-LABEL: @reduceshuffle_onein_ext_v16i32(
236-
; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
237-
; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1)
233+
; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S:%.*]], splat (i32 -1)
238234
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
239235
; CHECK-NEXT: ret i32 [[R]]
240236
;
@@ -353,8 +349,7 @@ define i16 @reducebase_v16i16(<16 x i16> %a, <16 x i16> %b) {
353349

354350
define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) {
355351
; CHECK-LABEL: @reduceshuffle_onein_v16i16(
356-
; CHECK-NEXT: [[X:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
357-
; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
352+
; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X:%.*]])
358353
; CHECK-NEXT: ret i16 [[R]]
359354
;
360355
%x = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -364,8 +359,7 @@ define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) {
364359

365360
define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) {
366361
; CHECK-LABEL: @reduceshuffle_onein_ext_v16i16(
367-
; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
368-
; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1)
362+
; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S:%.*]], splat (i16 -1)
369363
; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
370364
; CHECK-NEXT: ret i16 [[R]]
371365
;

llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -486,10 +486,7 @@ define <vscale x 1 x i64> @urem_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe
486486

487487
define <vscale x 1 x i64> @sdiv_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
488488
; VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask_knownvl(
489-
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = sdiv i64 [[X:%.*]], [[X]]
490-
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
491-
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
492-
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
489+
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> splat (i64 1)
493490
;
494491
; NO-VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask_knownvl(
495492
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -530,10 +527,7 @@ define <vscale x 1 x i64> @sdiv_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale
530527

531528
define <vscale x 1 x i64> @udiv_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
532529
; VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask_knownvl(
533-
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = udiv i64 [[X:%.*]], [[X]]
534-
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
535-
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
536-
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
530+
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> splat (i64 1)
537531
;
538532
; NO-VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask_knownvl(
539533
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -574,10 +568,7 @@ define <vscale x 1 x i64> @udiv_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale
574568

575569
define <vscale x 1 x i64> @srem_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
576570
; VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask_knownvl(
577-
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = srem i64 [[X:%.*]], [[X]]
578-
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
579-
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
580-
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
571+
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> zeroinitializer
581572
;
582573
; NO-VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask_knownvl(
583574
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -618,10 +609,7 @@ define <vscale x 1 x i64> @srem_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale
618609

619610
define <vscale x 1 x i64> @urem_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
620611
; VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask_knownvl(
621-
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = urem i64 [[X:%.*]], [[X]]
622-
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
623-
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
624-
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
612+
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> zeroinitializer
625613
;
626614
; NO-VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask_knownvl(
627615
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -1572,8 +1560,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl)
15721560
; VEC-COMBINE-64-NEXT: [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer
15731561
; VEC-COMBINE-64-NEXT: [[TMP1:%.*]] = add i64 [[Y:%.*]], 42
15741562
; VEC-COMBINE-64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <1 x i64> poison, i64 [[TMP1]], i64 0
1575-
; VEC-COMBINE-64-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[DOTSPLATINSERT]], <1 x i64> poison, <1 x i32> zeroinitializer
1576-
; VEC-COMBINE-64-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP2]], <1 x i1> [[MASK]], i32 [[EVL:%.*]])
1563+
; VEC-COMBINE-64-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[DOTSPLATINSERT]], <1 x i1> [[MASK]], i32 [[EVL:%.*]])
15771564
; VEC-COMBINE-64-NEXT: ret <1 x i64> [[TMP3]]
15781565
;
15791566
; NO-VEC-COMBINE-LABEL: @add_v1i64_allonesmask(

llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,13 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
4848
define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
4949
; CHECK-LABEL: @ext2_v2f32v4f32(
5050
; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
51-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
52-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
51+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
52+
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
5353
; CHECK-NEXT: ret <4 x float> [[R]]
5454
;
55-
%e = extractelement <2 x float> %x, i32 2
55+
%e = extractelement <2 x float> %x, i32 1
5656
%n = fneg float %e
57-
%r = insertelement <4 x float> %y, float %n, i32 2
57+
%r = insertelement <4 x float> %y, float %n, i32 1
5858
ret <4 x float> %r
5959
}
6060

llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
define void @multiple_extract(ptr %p) {
77
; CHECK-LABEL: @multiple_extract(
88
; CHECK-NEXT: [[VP:%.*]] = load ptr, ptr [[P:%.*]], align 8
9-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr [[VP]], i32 0, i64 0
10-
; CHECK-NEXT: [[E0:%.*]] = load i32, ptr [[TMP1]], align 16
9+
; CHECK-NEXT: [[E0:%.*]] = load i32, ptr [[VP]], align 16
1110
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i32>, ptr [[VP]], i32 0, i64 1
1211
; CHECK-NEXT: [[E1:%.*]] = load i32, ptr [[TMP2]], align 4
1312
; CHECK-NEXT: store i32 [[E0]], ptr [[P]], align 4

llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -657,11 +657,10 @@ define <2 x float> @load_f32_insert_v2f32_msan(ptr align 16 dereferenceable(16)
657657
; PR30986 - split vector loads for scalarized operations
658658
define <2 x i64> @PR30986(ptr %0) {
659659
; CHECK-LABEL: @PR30986(
660-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0:%.*]], i32 0, i32 0
661-
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 16
660+
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2:%.*]], align 16
662661
; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP3]])
663662
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i32 0
664-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0]], i32 0, i32 1
663+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP2]], i32 0, i32 1
665664
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8
666665
; CHECK-NEXT: [[TMP8:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP7]])
667666
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP8]], i32 1

0 commit comments

Comments
 (0)