@@ -15,37 +15,31 @@ define i64 @load_bswap(%v8i8* %p) {
15
15
; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 5
16
16
; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 6
17
17
; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 7
18
- ; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[G0]]
19
- ; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[G1]]
20
- ; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[G2]]
21
- ; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[G3]]
18
+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G0]] to <4 x i8>*
19
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
22
20
; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]]
23
21
; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]]
24
22
; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]]
25
23
; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]]
26
- ; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[T0]] to i64
27
- ; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[T1]] to i64
28
- ; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[T2]] to i64
29
- ; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[T3]] to i64
24
+ ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
30
25
; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[T4]] to i64
31
26
; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[T5]] to i64
32
27
; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[T6]] to i64
33
28
; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[T7]] to i64
34
- ; CHECK-NEXT: [[SH0:%.*]] = shl nuw i64 [[Z0]], 56
35
- ; CHECK-NEXT: [[SH1:%.*]] = shl nuw nsw i64 [[Z1]], 48
36
- ; CHECK-NEXT: [[SH2:%.*]] = shl nuw nsw i64 [[Z2]], 40
37
- ; CHECK-NEXT: [[SH3:%.*]] = shl nuw nsw i64 [[Z3]], 32
29
+ ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <4 x i64> [[TMP3]], <i64 56, i64 48, i64 40, i64 32>
38
30
; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24
39
31
; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16
40
32
; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8
41
- ; CHECK-NEXT: [[OR01:%.*]] = or i64 [[SH0]], [[SH1]]
42
- ; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], [[SH2]]
43
- ; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], [[SH3]]
44
- ; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]]
45
- ; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]]
46
- ; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]]
47
- ; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]]
48
- ; CHECK-NEXT: ret i64 [[OR01234567]]
33
+ ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
34
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
35
+ ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
36
+ ; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]]
37
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0
38
+ ; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], [[SH4]]
39
+ ; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]]
40
+ ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]]
41
+ ; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]]
42
+ ; CHECK-NEXT: ret i64 [[OP_EXTRA]]
49
43
;
50
44
%g0 = getelementptr inbounds %v8i8 , %v8i8* %p , i64 0 , i32 0
51
45
%g1 = getelementptr inbounds %v8i8 , %v8i8* %p , i64 0 , i32 1
@@ -103,38 +97,18 @@ define i64 @load_bswap_nop_shift(%v8i8* %p) {
103
97
; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 5
104
98
; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 6
105
99
; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 7
106
- ; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[G0]]
107
- ; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[G1]]
108
- ; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[G2]]
109
- ; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[G3]]
110
- ; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]]
111
- ; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]]
112
- ; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]]
113
- ; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]]
114
- ; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[T0]] to i64
115
- ; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[T1]] to i64
116
- ; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[T2]] to i64
117
- ; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[T3]] to i64
118
- ; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[T4]] to i64
119
- ; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[T5]] to i64
120
- ; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[T6]] to i64
121
- ; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[T7]] to i64
122
- ; CHECK-NEXT: [[SH0:%.*]] = shl nuw i64 [[Z0]], 56
123
- ; CHECK-NEXT: [[SH1:%.*]] = shl nuw nsw i64 [[Z1]], 48
124
- ; CHECK-NEXT: [[SH2:%.*]] = shl nuw nsw i64 [[Z2]], 40
125
- ; CHECK-NEXT: [[SH3:%.*]] = shl nuw nsw i64 [[Z3]], 32
126
- ; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24
127
- ; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16
128
- ; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8
129
- ; CHECK-NEXT: [[SH7:%.*]] = shl nuw nsw i64 [[Z7]], 0
130
- ; CHECK-NEXT: [[OR01:%.*]] = or i64 [[SH0]], [[SH1]]
131
- ; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], [[SH2]]
132
- ; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], [[SH3]]
133
- ; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]]
134
- ; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]]
135
- ; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]]
136
- ; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[SH7]]
137
- ; CHECK-NEXT: ret i64 [[OR01234567]]
100
+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G0]] to <8 x i8>*
101
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
102
+ ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
103
+ ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], <i64 56, i64 48, i64 40, i64 32, i64 24, i64 16, i64 8, i64 0>
104
+ ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
105
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
106
+ ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
107
+ ; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <8 x i64> [[BIN_RDX]], [[RDX_SHUF1]]
108
+ ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
109
+ ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]]
110
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0
111
+ ; CHECK-NEXT: ret i64 [[TMP5]]
138
112
;
139
113
%g0 = getelementptr inbounds %v8i8 , %v8i8* %p , i64 0 , i32 0
140
114
%g1 = getelementptr inbounds %v8i8 , %v8i8* %p , i64 0 , i32 1
@@ -194,36 +168,30 @@ define i64 @load64le(i8* %arg) {
194
168
; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 6
195
169
; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 7
196
170
; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* [[ARG]], align 1
197
- ; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* [[G1]], align 1
198
- ; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* [[G2]], align 1
199
- ; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* [[G3]], align 1
200
- ; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* [[G4]], align 1
171
+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G1]] to <4 x i8>*
172
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
201
173
; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* [[G5]], align 1
202
174
; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* [[G6]], align 1
203
175
; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* [[G7]], align 1
204
176
; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[LD0]] to i64
205
- ; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[LD1]] to i64
206
- ; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[LD2]] to i64
207
- ; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[LD3]] to i64
208
- ; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[LD4]] to i64
177
+ ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
209
178
; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[LD5]] to i64
210
179
; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[LD6]] to i64
211
180
; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[LD7]] to i64
212
- ; CHECK-NEXT: [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 8
213
- ; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 16
214
- ; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i64 [[Z3]], 24
215
- ; CHECK-NEXT: [[S4:%.*]] = shl nuw nsw i64 [[Z4]], 32
181
+ ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw <4 x i64> [[TMP3]], <i64 8, i64 16, i64 24, i64 32>
216
182
; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40
217
183
; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48
218
184
; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56
219
- ; CHECK-NEXT: [[O1:%.*]] = or i64 [[S1]], [[Z0]]
220
- ; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S2]]
221
- ; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S3]]
222
- ; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], [[S4]]
223
- ; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]]
224
- ; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]]
225
- ; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]]
226
- ; CHECK-NEXT: ret i64 [[O7]]
185
+ ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
186
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
187
+ ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
188
+ ; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]]
189
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0
190
+ ; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], [[S5]]
191
+ ; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]]
192
+ ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]]
193
+ ; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]]
194
+ ; CHECK-NEXT: ret i64 [[OP_EXTRA]]
227
195
;
228
196
%g1 = getelementptr inbounds i8 , i8* %arg , i64 1
229
197
%g2 = getelementptr inbounds i8 , i8* %arg , i64 2
@@ -279,38 +247,18 @@ define i64 @load64le_nop_shift(i8* %arg) {
279
247
; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 5
280
248
; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 6
281
249
; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 7
282
- ; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* [[ARG]], align 1
283
- ; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* [[G1]], align 1
284
- ; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* [[G2]], align 1
285
- ; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* [[G3]], align 1
286
- ; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* [[G4]], align 1
287
- ; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* [[G5]], align 1
288
- ; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* [[G6]], align 1
289
- ; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* [[G7]], align 1
290
- ; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[LD0]] to i64
291
- ; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[LD1]] to i64
292
- ; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[LD2]] to i64
293
- ; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[LD3]] to i64
294
- ; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[LD4]] to i64
295
- ; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[LD5]] to i64
296
- ; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[LD6]] to i64
297
- ; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[LD7]] to i64
298
- ; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 0
299
- ; CHECK-NEXT: [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 8
300
- ; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 16
301
- ; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i64 [[Z3]], 24
302
- ; CHECK-NEXT: [[S4:%.*]] = shl nuw nsw i64 [[Z4]], 32
303
- ; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40
304
- ; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48
305
- ; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56
306
- ; CHECK-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S0]]
307
- ; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S2]]
308
- ; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S3]]
309
- ; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], [[S4]]
310
- ; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]]
311
- ; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]]
312
- ; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]]
313
- ; CHECK-NEXT: ret i64 [[O7]]
250
+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[ARG]] to <8 x i8>*
251
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
252
+ ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
253
+ ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], <i64 0, i64 8, i64 16, i64 24, i64 32, i64 40, i64 48, i64 56>
254
+ ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
255
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
256
+ ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
257
+ ; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <8 x i64> [[BIN_RDX]], [[RDX_SHUF1]]
258
+ ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
259
+ ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]]
260
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0
261
+ ; CHECK-NEXT: ret i64 [[TMP5]]
314
262
;
315
263
%g1 = getelementptr inbounds i8 , i8* %arg , i64 1
316
264
%g2 = getelementptr inbounds i8 , i8* %arg , i64 2
0 commit comments