Skip to content

Commit ad9c0b3

Browse files
committed
[SLP]Check if the gathered loads form full vector before attempting build it
Need to check that the number of gathered loads in the slice forms the build vector to avoid compiler crash. Fixes #116691
1 parent a4e1a3d commit ad9c0b3

File tree

2 files changed

+156
-11
lines changed

2 files changed

+156
-11
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6815,16 +6815,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
68156815
// Check if it is profitable to try vectorizing gathered loads. It is
68166816
// profitable if we have more than 3 consecutive loads or if we have
68176817
// less but all users are vectorized or deleted.
6818-
bool AllowToVectorize =
6819-
NumElts >= 3 ||
6820-
any_of(ValueToGatherNodes.at(Slice.front()),
6821-
[=](const TreeEntry *TE) {
6822-
return TE->Scalars.size() == 2 &&
6823-
((TE->Scalars.front() == Slice.front() &&
6824-
TE->Scalars.back() == Slice.back()) ||
6825-
(TE->Scalars.front() == Slice.back() &&
6826-
TE->Scalars.back() == Slice.front()));
6827-
});
6818+
bool AllowToVectorize = false;
68286819
// Check if it is profitable to vectorize 2-elements loads.
68296820
if (NumElts == 2) {
68306821
bool IsLegalBroadcastLoad = TTI->isLegalBroadcastLoad(
@@ -6861,6 +6852,19 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
68616852
return true;
68626853
};
68636854
AllowToVectorize = CheckIfAllowed(Slice);
6855+
} else {
6856+
AllowToVectorize =
6857+
(NumElts >= 3 ||
6858+
any_of(ValueToGatherNodes.at(Slice.front()),
6859+
[=](const TreeEntry *TE) {
6860+
return TE->Scalars.size() == 2 &&
6861+
((TE->Scalars.front() == Slice.front() &&
6862+
TE->Scalars.back() == Slice.back()) ||
6863+
(TE->Scalars.front() == Slice.back() &&
6864+
TE->Scalars.back() == Slice.front()));
6865+
})) &&
6866+
hasFullVectorsOrPowerOf2(*TTI, Slice.front()->getType(),
6867+
Slice.size());
68646868
}
68656869
if (AllowToVectorize) {
68666870
SmallVector<Value *> PointerOps;
@@ -6903,7 +6907,8 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
69036907
}
69046908
// Mark masked gathers candidates as vectorized, if any.
69056909
for (unsigned Cnt : MaskedGatherVectorized) {
6906-
ArrayRef<LoadInst *> Slice = ArrayRef(Loads).slice(Cnt, NumElts);
6910+
ArrayRef<LoadInst *> Slice = ArrayRef(Loads).slice(
6911+
Cnt, std::min<unsigned>(NumElts, Loads.size() - Cnt));
69076912
ArrayRef<Value *> Values(
69086913
reinterpret_cast<Value *const *>(Slice.begin()), Slice.size());
69096914
Results.emplace_back(Values, LoadsState::ScatterVectorize);
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux -mcpu=cascadelake < %s | FileCheck %s
3+
4+
@solid_ = external global [608 x i8]
5+
6+
define void @test(ptr noalias %0) {
7+
; CHECK-LABEL: define void @test(
8+
; CHECK-SAME: ptr noalias [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: [[_LR_PH1019:.*:]]
10+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
11+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 32
12+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 128
13+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i64 200
14+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i64 208
15+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i64 232
16+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i64 288
17+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i64 320
18+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i64 304
19+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i64 424
20+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i64 480
21+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i64 504
22+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i64 632
23+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i64 720
24+
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP1]], align 8
25+
; CHECK-NEXT: [[TMP16:%.*]] = load double, ptr [[TMP2]], align 8
26+
; CHECK-NEXT: [[TMP17:%.*]] = fadd double [[TMP16]], [[TMP15]]
27+
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[TMP3]], align 8
28+
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[TMP4]], align 8
29+
; CHECK-NEXT: [[TMP20:%.*]] = load double, ptr [[TMP5]], align 8
30+
; CHECK-NEXT: [[TMP21:%.*]] = load double, ptr [[TMP6]], align 8
31+
; CHECK-NEXT: [[TMP22:%.*]] = fadd double [[TMP21]], [[TMP20]]
32+
; CHECK-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP7]], align 8
33+
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP8]], align 8
34+
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP9]], align 8
35+
; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP10]], align 8
36+
; CHECK-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP11]], align 8
37+
; CHECK-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP12]], align 8
38+
; CHECK-NEXT: [[TMP29:%.*]] = fadd double [[TMP28]], [[TMP27]]
39+
; CHECK-NEXT: [[TMP30:%.*]] = fmul double [[TMP22]], [[TMP18]]
40+
; CHECK-NEXT: [[TMP31:%.*]] = fmul double [[TMP30]], 0.000000e+00
41+
; CHECK-NEXT: [[TMP32:%.*]] = fsub double 0.000000e+00, [[TMP25]]
42+
; CHECK-NEXT: [[TMP33:%.*]] = fmul double [[TMP32]], 0.000000e+00
43+
; CHECK-NEXT: [[TMP34:%.*]] = fadd double [[TMP33]], 0.000000e+00
44+
; CHECK-NEXT: [[TMP35:%.*]] = fmul double [[TMP34]], 0.000000e+00
45+
; CHECK-NEXT: [[TMP36:%.*]] = fmul double [[TMP29]], [[TMP26]]
46+
; CHECK-NEXT: [[TMP37:%.*]] = fmul double [[TMP36]], 0.000000e+00
47+
; CHECK-NEXT: [[TMP38:%.*]] = fadd double [[TMP37]], 0.000000e+00
48+
; CHECK-NEXT: [[TMP39:%.*]] = fsub double [[TMP17]], [[TMP19]]
49+
; CHECK-NEXT: [[TMP40:%.*]] = fmul double [[TMP39]], [[TMP23]]
50+
; CHECK-NEXT: [[TMP41:%.*]] = fmul double [[TMP40]], 0.000000e+00
51+
; CHECK-NEXT: [[TMP42:%.*]] = load double, ptr [[TMP0]], align 8
52+
; CHECK-NEXT: [[TMP43:%.*]] = load double, ptr [[TMP13]], align 8
53+
; CHECK-NEXT: [[TMP44:%.*]] = fmul double [[TMP43]], [[TMP31]]
54+
; CHECK-NEXT: [[TMP45:%.*]] = load double, ptr [[TMP14]], align 8
55+
; CHECK-NEXT: [[TMP46:%.*]] = fmul double [[TMP35]], 0.000000e+00
56+
; CHECK-NEXT: [[TMP47:%.*]] = fadd double [[TMP44]], 0.000000e+00
57+
; CHECK-NEXT: [[TMP48:%.*]] = fmul double [[TMP45]], [[TMP38]]
58+
; CHECK-NEXT: [[TMP49:%.*]] = fmul double [[TMP45]], [[TMP41]]
59+
; CHECK-NEXT: store double [[TMP46]], ptr getelementptr inbounds (i8, ptr @solid_, i64 384), align 8
60+
; CHECK-NEXT: store double [[TMP47]], ptr getelementptr inbounds (i8, ptr @solid_, i64 408), align 8
61+
; CHECK-NEXT: store double [[TMP48]], ptr getelementptr inbounds (i8, ptr @solid_, i64 392), align 8
62+
; CHECK-NEXT: store double [[TMP49]], ptr getelementptr inbounds (i8, ptr @solid_, i64 400), align 8
63+
; CHECK-NEXT: [[DOTNEG965:%.*]] = fmul double [[TMP48]], [[TMP24]]
64+
; CHECK-NEXT: [[REASS_ADD993:%.*]] = fadd double [[DOTNEG965]], 0.000000e+00
65+
; CHECK-NEXT: [[TMP50:%.*]] = fadd double [[TMP42]], [[REASS_ADD993]]
66+
; CHECK-NEXT: [[TMP51:%.*]] = fsub double 0.000000e+00, [[TMP50]]
67+
; CHECK-NEXT: store double [[TMP51]], ptr getelementptr inbounds (i8, ptr @solid_, i64 296), align 8
68+
; CHECK-NEXT: [[DOTNEG969:%.*]] = fmul double [[TMP49]], 0.000000e+00
69+
; CHECK-NEXT: [[REASS_ADD996:%.*]] = fadd double [[DOTNEG969]], 0.000000e+00
70+
; CHECK-NEXT: [[TMP52:%.*]] = fadd double [[TMP45]], [[REASS_ADD996]]
71+
; CHECK-NEXT: [[TMP53:%.*]] = fsub double 0.000000e+00, [[TMP52]]
72+
; CHECK-NEXT: store double [[TMP53]], ptr getelementptr inbounds (i8, ptr @solid_, i64 304), align 8
73+
; CHECK-NEXT: ret void
74+
;
75+
.lr.ph1019:
76+
%1 = getelementptr i8, ptr %0, i64 8
77+
%2 = getelementptr i8, ptr %0, i64 32
78+
%3 = getelementptr i8, ptr %0, i64 128
79+
%4 = getelementptr i8, ptr %0, i64 200
80+
%5 = getelementptr i8, ptr %0, i64 208
81+
%6 = getelementptr i8, ptr %0, i64 232
82+
%7 = getelementptr i8, ptr %0, i64 288
83+
%8 = getelementptr i8, ptr %0, i64 320
84+
%9 = getelementptr i8, ptr %0, i64 304
85+
%10 = getelementptr i8, ptr %0, i64 424
86+
%11 = getelementptr i8, ptr %0, i64 480
87+
%12 = getelementptr i8, ptr %0, i64 504
88+
%13 = getelementptr i8, ptr %0, i64 632
89+
%14 = getelementptr i8, ptr %0, i64 720
90+
%15 = load double, ptr %1, align 8
91+
%16 = load double, ptr %2, align 8
92+
%17 = fadd double %16, %15
93+
%18 = load double, ptr %3, align 8
94+
%19 = load double, ptr %4, align 8
95+
%20 = load double, ptr %5, align 8
96+
%21 = load double, ptr %6, align 8
97+
%22 = fadd double %21, %20
98+
%23 = load double, ptr %7, align 8
99+
%24 = load double, ptr %8, align 8
100+
%25 = load double, ptr %9, align 8
101+
%26 = load double, ptr %10, align 8
102+
%27 = load double, ptr %11, align 8
103+
%28 = load double, ptr %12, align 8
104+
%29 = fadd double %28, %27
105+
%30 = fmul double %22, %18
106+
%31 = fmul double %30, 0.000000e+00
107+
%32 = fsub double 0.000000e+00, %25
108+
%33 = fmul double %32, 0.000000e+00
109+
%34 = fadd double %33, 0.000000e+00
110+
%35 = fmul double %34, 0.000000e+00
111+
%36 = fmul double %29, %26
112+
%37 = fmul double %36, 0.000000e+00
113+
%38 = fadd double %37, 0.000000e+00
114+
%39 = fsub double %17, %19
115+
%40 = fmul double %39, %23
116+
%41 = fmul double %40, 0.000000e+00
117+
%42 = load double, ptr %0, align 8
118+
%43 = load double, ptr %13, align 8
119+
%44 = fmul double %43, %31
120+
%45 = load double, ptr %14, align 8
121+
%46 = fmul double %35, 0.000000e+00
122+
%47 = fadd double %44, 0.000000e+00
123+
%48 = fmul double %45, %38
124+
%49 = fmul double %45, %41
125+
store double %46, ptr getelementptr inbounds (i8, ptr @solid_, i64 384), align 8
126+
store double %47, ptr getelementptr inbounds (i8, ptr @solid_, i64 408), align 8
127+
store double %48, ptr getelementptr inbounds (i8, ptr @solid_, i64 392), align 8
128+
store double %49, ptr getelementptr inbounds (i8, ptr @solid_, i64 400), align 8
129+
%.neg965 = fmul double %48, %24
130+
%reass.add993 = fadd double %.neg965, 0.000000e+00
131+
%50 = fadd double %42, %reass.add993
132+
%51 = fsub double 0.000000e+00, %50
133+
store double %51, ptr getelementptr inbounds (i8, ptr @solid_, i64 296), align 8
134+
%.neg969 = fmul double %49, 0.000000e+00
135+
%reass.add996 = fadd double %.neg969, 0.000000e+00
136+
%52 = fadd double %45, %reass.add996
137+
%53 = fsub double 0.000000e+00, %52
138+
store double %53, ptr getelementptr inbounds (i8, ptr @solid_, i64 304), align 8
139+
ret void
140+
}

0 commit comments

Comments
 (0)