Skip to content

Commit 708eb1b

Browse files
committed
[SLP]Add shuffling of extractelements to avoid extra costs/data movement.
If the scalar must be extracted and then used in the gather node, instead we can emit shuffle instruction to avoid those extra extractelements and vector-to-scalar and back data movement. Part of D110978 Differential Revision: https://reviews.llvm.org/D141940
1 parent c6c6723 commit 708eb1b

File tree

8 files changed

+382
-257
lines changed

8 files changed

+382
-257
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 298 additions & 37 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -110,18 +110,15 @@ define i8 @j(<4 x i8> %x, <4 x i8> %y) {
110110

111111
define i8 @k(<4 x i8> %x) {
112112
; CHECK-LABEL: @k(
113-
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i64 0
114-
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i64 3
115-
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i64 1
116-
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i64 2
117-
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
118-
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
119-
; CHECK-NEXT: [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]
120-
; CHECK-NEXT: [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]
121-
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
122-
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
123-
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
124-
; CHECK-NEXT: ret i8 [[TMP3]]
113+
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
114+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
115+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
116+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <2 x i32> <i32 3, i32 2>
117+
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
118+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i64 0
119+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i64 1
120+
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
121+
; CHECK-NEXT: ret i8 [[TMP8]]
125122
;
126123
%x0 = extractelement <4 x i8> %x, i32 0
127124
%x3 = extractelement <4 x i8> %x, i32 3
@@ -141,18 +138,15 @@ define i8 @k_bb(<4 x i8> %x) {
141138
; CHECK-LABEL: @k_bb(
142139
; CHECK-NEXT: br label [[BB1:%.*]]
143140
; CHECK: bb1:
144-
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i64 0
145-
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i64 3
146-
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i64 1
147-
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i64 2
148-
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
149-
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
150-
; CHECK-NEXT: [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]
151-
; CHECK-NEXT: [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]
152-
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
153-
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
154-
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
155-
; CHECK-NEXT: ret i8 [[TMP3]]
141+
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
142+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
143+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
144+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <2 x i32> <i32 3, i32 2>
145+
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
146+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i64 0
147+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i64 1
148+
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
149+
; CHECK-NEXT: ret i8 [[TMP8]]
156150
;
157151
%x0 = extractelement <4 x i8> %x, i32 0
158152
br label %bb1

llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -110,18 +110,15 @@ define i8 @j(<4 x i8> %x, <4 x i8> %y) {
110110

111111
define i8 @k(<4 x i8> %x) {
112112
; CHECK-LABEL: @k(
113-
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i64 0
114-
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i64 3
115-
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i64 1
116-
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i64 2
117-
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
118-
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
119-
; CHECK-NEXT: [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]
120-
; CHECK-NEXT: [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]
121-
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
122-
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
123-
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
124-
; CHECK-NEXT: ret i8 [[TMP3]]
113+
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
114+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
115+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
116+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <2 x i32> <i32 3, i32 2>
117+
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
118+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i64 0
119+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i64 1
120+
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
121+
; CHECK-NEXT: ret i8 [[TMP8]]
125122
;
126123
%x0 = extractelement <4 x i8> %x, i32 0
127124
%x3 = extractelement <4 x i8> %x, i32 3
@@ -141,18 +138,15 @@ define i8 @k_bb(<4 x i8> %x) {
141138
; CHECK-LABEL: @k_bb(
142139
; CHECK-NEXT: br label [[BB1:%.*]]
143140
; CHECK: bb1:
144-
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i64 0
145-
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i64 3
146-
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i64 1
147-
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i64 2
148-
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
149-
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
150-
; CHECK-NEXT: [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]
151-
; CHECK-NEXT: [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]
152-
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
153-
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
154-
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
155-
; CHECK-NEXT: ret i8 [[TMP3]]
141+
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
142+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <2 x i32> <i32 0, i32 1>
143+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
144+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <2 x i32> <i32 3, i32 2>
145+
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
146+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i64 0
147+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i64 1
148+
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
149+
; CHECK-NEXT: ret i8 [[TMP8]]
156150
;
157151
%x0 = extractelement <4 x i8> %x, i32 0
158152
br label %bb1

llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@ define void @splat(i8 %a, i8 %b, i8 %c) {
1818
; SSE-LABEL: @splat(
1919
; SSE-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
2020
; SSE-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[B:%.*]], i32 1
21-
; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22-
; SSE-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
23-
; SSE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
24-
; SSE-NEXT: [[TMP4:%.*]] = xor <16 x i8> [[SHUFFLE]], [[SHUFFLE1]]
25-
; SSE-NEXT: store <16 x i8> [[TMP4]], ptr @cle, align 16
21+
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22+
; SSE-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
23+
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> zeroinitializer
24+
; SSE-NEXT: [[TMP6:%.*]] = xor <16 x i8> [[TMP3]], [[TMP5]]
25+
; SSE-NEXT: store <16 x i8> [[TMP6]], ptr @cle, align 16
2626
; SSE-NEXT: ret void
2727
;
2828
; AVX-LABEL: @splat(
2929
; AVX-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
3030
; AVX-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[B:%.*]], i32 1
31-
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
32-
; AVX-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
33-
; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
34-
; AVX-NEXT: [[TMP4:%.*]] = xor <16 x i8> [[SHUFFLE]], [[SHUFFLE1]]
35-
; AVX-NEXT: store <16 x i8> [[TMP4]], ptr @cle, align 16
31+
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
32+
; AVX-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
33+
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> zeroinitializer
34+
; AVX-NEXT: [[TMP6:%.*]] = xor <16 x i8> [[TMP3]], [[TMP5]]
35+
; AVX-NEXT: store <16 x i8> [[TMP6]], ptr @cle, align 16
3636
; AVX-NEXT: ret void
3737
;
3838
%1 = xor i8 %c, %a
@@ -91,15 +91,15 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
9191
;
9292
; AVX-LABEL: @same_opcode_on_one_side(
9393
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
94-
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
95-
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
96-
; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
97-
; AVX-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
98-
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 1
99-
; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[C]], i32 2
100-
; AVX-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
101-
; AVX-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[SHUFFLE2]]
102-
; AVX-NEXT: store <4 x i32> [[TMP6]], ptr @cle32, align 16
94+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
95+
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
96+
; AVX-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer
97+
; AVX-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
98+
; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[B:%.*]], i32 1
99+
; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[C]], i32 2
100+
; AVX-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
101+
; AVX-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP5]], [[TMP8]]
102+
; AVX-NEXT: store <4 x i32> [[TMP9]], ptr @cle32, align 16
103103
; AVX-NEXT: ret void
104104
;
105105
%add1 = add i32 %c, %a

llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,16 +146,14 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
146146
; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3
147147
; MINTREESIZE-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0
148148
; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1
149-
; MINTREESIZE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[Q0]], i32 0
150-
; MINTREESIZE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[Q1]], i32 1
149+
; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> <i32 0, i32 1>
151150
; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2
152151
; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3
153-
; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q2]], i32 0
154-
; MINTREESIZE-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[Q3]], i32 1
152+
; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> <i32 2, i32 3>
155153
; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]]
156154
; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]]
157-
; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0
158-
; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[Q5]], i32 1
155+
; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0
156+
; MINTREESIZE-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[Q5]], i32 1
159157
; MINTREESIZE-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]]
160158
; MINTREESIZE-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]]
161159
; MINTREESIZE-NEXT: call void @llvm.assume(i1 [[QI]])

llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,16 +180,14 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
180180
; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3
181181
; MINTREESIZE-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0
182182
; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1
183-
; MINTREESIZE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[Q0]], i32 0
184-
; MINTREESIZE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[Q1]], i32 1
183+
; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> <i32 0, i32 1>
185184
; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2
186185
; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3
187-
; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q2]], i32 0
188-
; MINTREESIZE-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[Q3]], i32 1
186+
; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> <i32 2, i32 3>
189187
; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]]
190188
; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]]
191-
; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0
192-
; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[Q5]], i32 1
189+
; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0
190+
; MINTREESIZE-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[Q5]], i32 1
193191
; MINTREESIZE-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]]
194192
; MINTREESIZE-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]]
195193
; MINTREESIZE-NEXT: call void @llvm.assume(i1 [[QI]])

0 commit comments

Comments
 (0)