Skip to content

Commit eb14d2a

Browse files
committed
[SLP]Fix check for matched gather node, if it is a subvector node
If the gather node is a subvector node, it may match the existing vector/gather node in the graph, but still may require reordering. in this case need to fully check its dependencies to prevent a compiler crash. Fixes llvm#128401
1 parent 22a5bb3 commit eb14d2a

File tree

2 files changed

+130
-11
lines changed

2 files changed

+130
-11
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13320,9 +13320,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1332013320
Entries.clear();
1332113321
// TODO: currently checking only for Scalars in the tree entry, need to count
1332213322
// reused elements too for better cost estimation.
13323-
const EdgeInfo &TEUseEI = TE == VectorizableTree.front().get()
13324-
? EdgeInfo(const_cast<TreeEntry *>(TE), 0)
13325-
: TE->UserTreeIndex;
13323+
auto GetUserEntry = [&](const TreeEntry *TE) {
13324+
while (TE->UserTreeIndex && TE->UserTreeIndex.EdgeIdx == UINT_MAX)
13325+
TE = TE->UserTreeIndex.UserTE;
13326+
if (TE == VectorizableTree.front().get())
13327+
return EdgeInfo(const_cast<TreeEntry *>(TE), 0);
13328+
return TE->UserTreeIndex;
13329+
};
13330+
const EdgeInfo TEUseEI = GetUserEntry(TE);
13331+
if (!TEUseEI)
13332+
return std::nullopt;
1332613333
const Instruction *TEInsertPt = &getLastInstructionInBundle(TEUseEI.UserTE);
1332713334
const BasicBlock *TEInsertBlock = nullptr;
1332813335
// Main node of PHI entries keeps the correct order of operands/incoming
@@ -13874,15 +13881,13 @@ BoUpSLP::isGatherShuffledEntry(
1387413881
assert(VL.size() % NumParts == 0 &&
1387513882
"Number of scalars must be divisible by NumParts.");
1387613883
if (TE->UserTreeIndex && TE->UserTreeIndex.UserTE->isGather() &&
13877-
TE->UserTreeIndex.EdgeIdx == UINT_MAX) {
13878-
assert(
13879-
(TE->Idx == 0 ||
13880-
(TE->hasState() && TE->getOpcode() == Instruction::ExtractElement) ||
13881-
isSplat(TE->Scalars) ||
13882-
getSameValuesTreeEntry(TE->getMainOp(), TE->Scalars)) &&
13883-
"Expected splat or extractelements only node.");
13884+
TE->UserTreeIndex.EdgeIdx == UINT_MAX &&
13885+
(TE->Idx == 0 ||
13886+
(TE->hasState() && TE->getOpcode() == Instruction::ExtractElement) ||
13887+
isSplat(TE->Scalars) ||
13888+
(TE->hasState() &&
13889+
getSameValuesTreeEntry(TE->getMainOp(), TE->Scalars))))
1388413890
return {};
13885-
}
1388613891
unsigned SliceSize = getPartNumElems(VL.size(), NumParts);
1388713892
SmallVector<std::optional<TTI::ShuffleKind>> Res;
1388813893
for (unsigned Part : seq<unsigned>(NumParts)) {
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
@g = global [8 x i8] zeroinitializer
5+
6+
define i32 @main() {
7+
; CHECK-LABEL: define i32 @main() {
8+
; CHECK-NEXT: [[ENTRY:.*]]:
9+
; CHECK-NEXT: [[L2_I_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 5), align 1
10+
; CHECK-NEXT: [[LI_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 1
11+
; CHECK-NEXT: [[L1_I_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 7), align 1
12+
; CHECK-NEXT: [[L6_I_I_I_I:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 6), align 1
13+
; CHECK-NEXT: br i1 false, label %[[IF_END151_1_I_I_1_I_I_I:.*]], label %[[END:.*]]
14+
; CHECK: [[PRE:.*]]:
15+
; CHECK-NEXT: br label %[[END]]
16+
; CHECK: [[END]]:
17+
; CHECK-NEXT: [[TMP0:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[LI_I_I_I]], %[[ENTRY]] ]
18+
; CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L2_I_I_I_I]], %[[ENTRY]] ]
19+
; CHECK-NEXT: [[TMP2:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L6_I_I_I_I]], %[[ENTRY]] ]
20+
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L1_I_I_I_I]], %[[ENTRY]] ]
21+
; CHECK-NEXT: [[TMP4:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L1_I_I_I_I]], %[[ENTRY]] ]
22+
; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L6_I_I_I_I]], %[[ENTRY]] ]
23+
; CHECK-NEXT: [[TMP6:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[L2_I_I_I_I]], %[[ENTRY]] ]
24+
; CHECK-NEXT: [[TMP7:%.*]] = phi i8 [ 0, %[[PRE]] ], [ [[LI_I_I_I]], %[[ENTRY]] ]
25+
; CHECK-NEXT: br label %[[IF_END151_1_I_I_1_I_I_I]]
26+
; CHECK: [[IF_END151_1_I_I_1_I_I_I]]:
27+
; CHECK-NEXT: [[TMP8:%.*]] = phi i8 [ [[LI_I_I_I]], %[[ENTRY]] ], [ [[TMP7]], %[[END]] ]
28+
; CHECK-NEXT: [[TMP9:%.*]] = phi i8 [ [[L2_I_I_I_I]], %[[ENTRY]] ], [ [[TMP6]], %[[END]] ]
29+
; CHECK-NEXT: [[TMP10:%.*]] = phi i8 [ [[L6_I_I_I_I]], %[[ENTRY]] ], [ [[TMP5]], %[[END]] ]
30+
; CHECK-NEXT: [[TMP11:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP4]], %[[END]] ]
31+
; CHECK-NEXT: [[TMP12:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP3]], %[[END]] ]
32+
; CHECK-NEXT: [[TMP13:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP2]], %[[END]] ]
33+
; CHECK-NEXT: [[TMP14:%.*]] = phi i8 [ [[L2_I_I_I_I]], %[[ENTRY]] ], [ [[TMP1]], %[[END]] ]
34+
; CHECK-NEXT: [[TMP15:%.*]] = phi i8 [ [[LI_I_I_I]], %[[ENTRY]] ], [ [[TMP0]], %[[END]] ]
35+
; CHECK-NEXT: [[TMP16:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP6]], %[[END]] ]
36+
; CHECK-NEXT: [[TMP17:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP4]], %[[END]] ]
37+
; CHECK-NEXT: [[TMP18:%.*]] = phi i8 [ [[L6_I_I_I_I]], %[[ENTRY]] ], [ [[TMP5]], %[[END]] ]
38+
; CHECK-NEXT: [[TMP19:%.*]] = phi i8 [ [[L1_I_I_I_I]], %[[ENTRY]] ], [ [[TMP4]], %[[END]] ]
39+
; CHECK-NEXT: [[TMP20:%.*]] = phi i8 [ [[L6_I_I_I_I]], %[[ENTRY]] ], [ [[TMP5]], %[[END]] ]
40+
; CHECK-NEXT: [[TMP21:%.*]] = phi i8 [ [[L2_I_I_I_I]], %[[ENTRY]] ], [ [[TMP6]], %[[END]] ]
41+
; CHECK-NEXT: [[TMP22:%.*]] = phi i8 [ [[LI_I_I_I]], %[[ENTRY]] ], [ [[TMP7]], %[[END]] ]
42+
; CHECK-NEXT: [[TOBOOL_NOT_I_1121_I_1_I_I_I:%.*]] = icmp ne i8 [[TMP18]], 0
43+
; CHECK-NEXT: [[C1_1_1117_I_1_I9_I_I:%.*]] = icmp eq i8 [[TMP17]], 0
44+
; CHECK-NEXT: [[C1_I_1_I_I_I:%.*]] = icmp eq i8 [[TMP16]], 0
45+
; CHECK-NEXT: [[C1_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
46+
; CHECK-NEXT: [[CMP258_I_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP13]], 0
47+
; CHECK-NEXT: [[C1_I_I_I_I:%.*]] = icmp eq i8 [[TMP14]], 0
48+
; CHECK-NEXT: [[CMP258_I_I_I_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
49+
; CHECK-NEXT: [[C1_1_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 0
50+
; CHECK-NEXT: [[CMP258_I_1_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP10]], 0
51+
; CHECK-NEXT: [[C1_187_I_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
52+
; CHECK-NEXT: [[CMP258_I_185_I_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
53+
; CHECK-NEXT: [[C1_1_1117_I_I_I_I:%.*]] = icmp eq i8 [[TMP19]], 0
54+
; CHECK-NEXT: [[CMP258_I_1_1115_I_I_I_I:%.*]] = icmp eq i8 [[TMP20]], 0
55+
; CHECK-NEXT: [[C1_1113_I_I_I_I:%.*]] = icmp eq i8 [[TMP21]], 0
56+
; CHECK-NEXT: [[CMP258_I_1111_I_I_I_I:%.*]] = icmp eq i8 [[TMP22]], 0
57+
; CHECK-NEXT: [[C1_187_1_I_I_I_I:%.*]] = icmp eq i8 [[TMP16]], 0
58+
; CHECK-NEXT: ret i32 0
59+
;
60+
entry:
61+
%l2.i.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 5), align 1
62+
%li.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 1
63+
%l1.i.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 7), align 1
64+
%l6.i.i.i.i = load i8, ptr getelementptr inbounds nuw (i8, ptr @g, i64 6), align 1
65+
br i1 false, label %if.end151.1.i.i.1.i.i.i, label %end
66+
67+
pre:
68+
br label %end
69+
70+
end:
71+
%0 = phi i8 [ 0, %pre ], [ %li.i.i.i, %entry ]
72+
%1 = phi i8 [ 0, %pre ], [ %l2.i.i.i.i, %entry ]
73+
%2 = phi i8 [ 0, %pre ], [ %l6.i.i.i.i, %entry ]
74+
%3 = phi i8 [ 0, %pre ], [ %l1.i.i.i.i, %entry ]
75+
%4 = phi i8 [ 0, %pre ], [ %l1.i.i.i.i, %entry ]
76+
%5 = phi i8 [ 0, %pre ], [ %l6.i.i.i.i, %entry ]
77+
%6 = phi i8 [ 0, %pre ], [ %l2.i.i.i.i, %entry ]
78+
%7 = phi i8 [ 0, %pre ], [ %li.i.i.i, %entry ]
79+
br label %if.end151.1.i.i.1.i.i.i
80+
81+
if.end151.1.i.i.1.i.i.i:
82+
%8 = phi i8 [ %li.i.i.i, %entry ], [ %7, %end ]
83+
%9 = phi i8 [ %l2.i.i.i.i, %entry ], [ %6, %end ]
84+
%10 = phi i8 [ %l6.i.i.i.i, %entry ], [ %5, %end ]
85+
%11 = phi i8 [ %l1.i.i.i.i, %entry ], [ %4, %end ]
86+
%12 = phi i8 [ %l1.i.i.i.i, %entry ], [ %3, %end ]
87+
%13 = phi i8 [ 0, %entry ], [ %2, %end ]
88+
%14 = phi i8 [ %l2.i.i.i.i, %entry ], [ %1, %end ]
89+
%15 = phi i8 [ %li.i.i.i, %entry ], [ %0, %end ]
90+
%16 = phi i8 [ 0, %entry ], [ %6, %end ]
91+
%17 = phi i8 [ %l1.i.i.i.i, %entry ], [ %4, %end ]
92+
%18 = phi i8 [ %l6.i.i.i.i, %entry ], [ %5, %end ]
93+
%19 = phi i8 [ %l1.i.i.i.i, %entry ], [ %4, %end ]
94+
%20 = phi i8 [ %l6.i.i.i.i, %entry ], [ %5, %end ]
95+
%21 = phi i8 [ %l2.i.i.i.i, %entry ], [ %6, %end ]
96+
%22 = phi i8 [ %li.i.i.i, %entry ], [ %7, %end ]
97+
%tobool.not.i.1121.i.1.i.i.i = icmp ne i8 %18, 0
98+
%c1.1.1117.i.1.i9.i.i = icmp eq i8 %17, 0
99+
%c1.i.1.i.i.i = icmp eq i8 %16, 0
100+
%c1.1.i.i.i.i = icmp eq i8 %12, 0
101+
%cmp258.i.1.i.i.i.i = icmp eq i8 %13, 0
102+
%c1.i.i.i.i = icmp eq i8 %14, 0
103+
%cmp258.i.i.i.i.i = icmp eq i8 %15, 0
104+
%c1.1.1.i.i.i.i = icmp eq i8 %11, 0
105+
%cmp258.i.1.1.i.i.i.i = icmp eq i8 %10, 0
106+
%c1.187.i.i.i.i = icmp eq i8 %9, 0
107+
%cmp258.i.185.i.i.i.i = icmp eq i8 %8, 0
108+
%c1.1.1117.i.i.i.i = icmp eq i8 %19, 0
109+
%cmp258.i.1.1115.i.i.i.i = icmp eq i8 %20, 0
110+
%c1.1113.i.i.i.i = icmp eq i8 %21, 0
111+
%cmp258.i.1111.i.i.i.i = icmp eq i8 %22, 0
112+
%c1.187.1.i.i.i.i = icmp eq i8 %16, 0
113+
ret i32 0
114+
}

0 commit comments

Comments
 (0)