Skip to content

Commit 50facad

Browse files
authored
[SLP][REVEC] Fix insertelement legality checks (#146921)
The current code assumes that all the values in VL are valid instructions, while it is possible to get poison.
1 parent 7edf6bf commit 50facad

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9060,6 +9060,10 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
90609060
// different vectors.
90619061
ValueSet SourceVectors;
90629062
for (Value *V : VL) {
9063+
if (isa<PoisonValue>(V)) {
9064+
LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement/poison vector.\n");
9065+
return TreeEntry::NeedToGather;
9066+
}
90639067
SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
90649068
assert(getElementIndex(V) != std::nullopt &&
90659069
"Non-constant or undef index?");
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-1000 %s | FileCheck %s
3+
4+
; The 4 stores can be re-vectorised, make sure the poison sources
5+
; are safely handled when trying to vectorise [ %0, poison, poison, %1 ]
6+
define void @test_missing_lanes_1_2(ptr %ptr, i32 %val0, i32 %val1) {
7+
; CHECK-LABEL: @test_missing_lanes_1_2(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
10+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
11+
; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
12+
; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
13+
; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
14+
; CHECK-NEXT: store <8 x i32> poison, ptr [[GETELEMENTPTR1]], align 4
15+
; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[PTR]], i64 12
16+
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GETELEMENTPTR3]], align 4
17+
; CHECK-NEXT: ret void
18+
;
19+
entry:
20+
%0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
21+
%1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
22+
23+
%getelementptr0 = getelementptr i32, ptr %ptr, i64 0
24+
store <4 x i32> %0, ptr %getelementptr0, align 4
25+
%getelementptr1 = getelementptr i32, ptr %ptr, i64 4
26+
store <4 x i32> poison, ptr %getelementptr1, align 4
27+
%getelementptr2 = getelementptr i32, ptr %ptr, i64 8
28+
store <4 x i32> poison, ptr %getelementptr2, align 4
29+
%getelementptr3 = getelementptr i32, ptr %ptr, i64 12
30+
store <4 x i32> %1, ptr %getelementptr3, align 4
31+
32+
ret void
33+
}
34+
35+
; The 4 stores can be re-vectorised, make sure the poison sources
36+
; are safely handled when trying to vectorise [ %0, poison, %1, poison ]
37+
define void @test_missing_lanes_1_3(ptr %ptr, i32 %val0, i32 %val1) {
38+
; CHECK-LABEL: @test_missing_lanes_1_3(
39+
; CHECK-NEXT: entry:
40+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
41+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
42+
; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
43+
; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
44+
; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
45+
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP1]], i64 4)
46+
; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[GETELEMENTPTR1]], align 4
47+
; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[PTR]], i64 12
48+
; CHECK-NEXT: store <4 x i32> poison, ptr [[GETELEMENTPTR3]], align 4
49+
; CHECK-NEXT: ret void
50+
;
51+
entry:
52+
%0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
53+
%1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
54+
55+
%getelementptr0 = getelementptr i32, ptr %ptr, i64 0
56+
store <4 x i32> %0, ptr %getelementptr0, align 4
57+
%getelementptr1 = getelementptr i32, ptr %ptr, i64 4
58+
store <4 x i32> poison, ptr %getelementptr1, align 4
59+
%getelementptr2 = getelementptr i32, ptr %ptr, i64 8
60+
store <4 x i32> %1, ptr %getelementptr2, align 4
61+
%getelementptr3 = getelementptr i32, ptr %ptr, i64 12
62+
store <4 x i32> poison, ptr %getelementptr3, align 4
63+
64+
ret void
65+
}
66+
67+
; This could be re-vectorised to use a store <8 x i32> instruction.
68+
define void @test_valid_value_operands(ptr %ptr, i32 %val0, i32 %val1) {
69+
; CHECK-LABEL: @test_valid_value_operands(
70+
; CHECK-NEXT: entry:
71+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
72+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
73+
; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
74+
; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
75+
; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
76+
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GETELEMENTPTR1]], align 4
77+
; CHECK-NEXT: ret void
78+
;
79+
entry:
80+
%0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
81+
%1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
82+
83+
%getelementptr0 = getelementptr i32, ptr %ptr, i64 0
84+
store <4 x i32> %0, ptr %getelementptr0, align 4
85+
%getelementptr1 = getelementptr i32, ptr %ptr, i64 4
86+
store <4 x i32> %1, ptr %getelementptr1, align 4
87+
ret void
88+
}

0 commit comments

Comments
 (0)