Skip to content

Commit f2f02b2

Browse files
committed
[VectorCombine] foldShuffleOfBinops - only accept exact matching cmp predicates
m_SpecificCmp allowed equivalent predicate+flags which don't necessarily work after being folded from "shuffle (cmpop), (cmpop)" into "cmpop (shuffle), (shuffle)" Fixes #121110
1 parent f68dbbb commit f2f02b2

File tree

2 files changed

+19
-10
lines changed

2 files changed

+19
-10
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,16 +1669,18 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
16691669

16701670
Value *X, *Y, *Z, *W;
16711671
bool IsCommutative = false;
1672-
CmpPredicate Pred = CmpInst::BAD_ICMP_PREDICATE;
1672+
CmpPredicate PredLHS = CmpInst::BAD_ICMP_PREDICATE;
1673+
CmpPredicate PredRHS = CmpInst::BAD_ICMP_PREDICATE;
16731674
if (match(LHS, m_BinOp(m_Value(X), m_Value(Y))) &&
16741675
match(RHS, m_BinOp(m_Value(Z), m_Value(W)))) {
16751676
auto *BO = cast<BinaryOperator>(LHS);
16761677
// Don't introduce poison into div/rem.
16771678
if (llvm::is_contained(OldMask, PoisonMaskElem) && BO->isIntDivRem())
16781679
return false;
16791680
IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
1680-
} else if (match(LHS, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
1681-
match(RHS, m_SpecificCmp(Pred, m_Value(Z), m_Value(W)))) {
1681+
} else if (match(LHS, m_Cmp(PredLHS, m_Value(X), m_Value(Y))) &&
1682+
match(RHS, m_Cmp(PredRHS, m_Value(Z), m_Value(W))) &&
1683+
(CmpInst::Predicate)PredLHS == (CmpInst::Predicate)PredRHS) {
16821684
IsCommutative = cast<CmpInst>(LHS)->isCommutative();
16831685
} else
16841686
return false;
@@ -1727,14 +1729,14 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
17271729
TTI.getShuffleCost(SK0, BinOpTy, NewMask0, CostKind, 0, nullptr, {X, Z}) +
17281730
TTI.getShuffleCost(SK1, BinOpTy, NewMask1, CostKind, 0, nullptr, {Y, W});
17291731

1730-
if (Pred == CmpInst::BAD_ICMP_PREDICATE) {
1732+
if (PredLHS == CmpInst::BAD_ICMP_PREDICATE) {
17311733
NewCost +=
17321734
TTI.getArithmeticInstrCost(LHS->getOpcode(), ShuffleDstTy, CostKind);
17331735
} else {
17341736
auto *ShuffleCmpTy =
17351737
FixedVectorType::get(BinOpTy->getElementType(), ShuffleDstTy);
17361738
NewCost += TTI.getCmpSelInstrCost(LHS->getOpcode(), ShuffleCmpTy,
1737-
ShuffleDstTy, Pred, CostKind);
1739+
ShuffleDstTy, PredLHS, CostKind);
17381740
}
17391741

17401742
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
@@ -1750,10 +1752,10 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
17501752

17511753
Value *Shuf0 = Builder.CreateShuffleVector(X, Z, NewMask0);
17521754
Value *Shuf1 = Builder.CreateShuffleVector(Y, W, NewMask1);
1753-
Value *NewBO = Pred == CmpInst::BAD_ICMP_PREDICATE
1755+
Value *NewBO = PredLHS == CmpInst::BAD_ICMP_PREDICATE
17541756
? Builder.CreateBinOp(
17551757
cast<BinaryOperator>(LHS)->getOpcode(), Shuf0, Shuf1)
1756-
: Builder.CreateCmp(Pred, Shuf0, Shuf1);
1758+
: Builder.CreateCmp(PredLHS, Shuf0, Shuf1);
17571759

17581760
// Intersect flags from the old binops.
17591761
if (auto *NewInst = dyn_cast<Instruction>(NewBO)) {

llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,11 +276,15 @@ define <4 x i32> @shuf_icmp_ugt_v4i32_use(<4 x i32> %x, <4 x i32> %y, <4 x i32>
276276
ret <4 x i32> %r
277277
}
278278

279-
; TODO: PR121110 - don't merge equivalent (but not matching) predicates
279+
; PR121110 - don't merge equivalent (but not matching) predicates
280+
280281
define <2 x i1> @PR121110() {
281282
; CHECK-LABEL: define <2 x i1> @PR121110(
282283
; CHECK-SAME: ) #[[ATTR0]] {
283-
; CHECK-NEXT: ret <2 x i1> zeroinitializer
284+
; CHECK-NEXT: [[UGT:%.*]] = icmp samesign ugt <2 x i32> zeroinitializer, zeroinitializer
285+
; CHECK-NEXT: [[SGT:%.*]] = icmp sgt <2 x i32> zeroinitializer, <i32 6, i32 -4>
286+
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i1> [[UGT]], <2 x i1> [[SGT]], <2 x i32> <i32 0, i32 3>
287+
; CHECK-NEXT: ret <2 x i1> [[RES]]
284288
;
285289
%ugt = icmp samesign ugt <2 x i32> < i32 0, i32 0 >, < i32 0, i32 0 >
286290
%sgt = icmp sgt <2 x i32> < i32 0, i32 0 >, < i32 6, i32 4294967292 >
@@ -291,7 +295,10 @@ define <2 x i1> @PR121110() {
291295
define <2 x i1> @PR121110_commute() {
292296
; CHECK-LABEL: define <2 x i1> @PR121110_commute(
293297
; CHECK-SAME: ) #[[ATTR0]] {
294-
; CHECK-NEXT: ret <2 x i1> zeroinitializer
298+
; CHECK-NEXT: [[SGT:%.*]] = icmp sgt <2 x i32> zeroinitializer, <i32 6, i32 -4>
299+
; CHECK-NEXT: [[UGT:%.*]] = icmp samesign ugt <2 x i32> zeroinitializer, zeroinitializer
300+
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i1> [[SGT]], <2 x i1> [[UGT]], <2 x i32> <i32 0, i32 3>
301+
; CHECK-NEXT: ret <2 x i1> [[RES]]
295302
;
296303
%sgt = icmp sgt <2 x i32> < i32 0, i32 0 >, < i32 6, i32 4294967292 >
297304
%ugt = icmp samesign ugt <2 x i32> < i32 0, i32 0 >, < i32 0, i32 0 >

0 commit comments

Comments
 (0)