@@ -1177,7 +1177,51 @@ bb:
1177
1177
%tmp28 = extractelement <16 x i32 > %tmp27 , i64 0
1178
1178
ret i32 %tmp28
1179
1179
}
1180
+ declare <16 x i32 > @llvm.abs.v16i32 (<16 x i32 >, i1 immarg)
1181
+
1182
+ define i32 @PR143456 (ptr %p0 , ptr %p1 ) {
1183
+ ; SSE2-LABEL: PR143456:
1184
+ ; SSE2: # %bb.0:
1185
+ ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1186
+ ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1187
+ ; SSE2-NEXT: movdqa %xmm0, %xmm2
1188
+ ; SSE2-NEXT: pminub %xmm1, %xmm2
1189
+ ; SSE2-NEXT: pmaxub %xmm1, %xmm0
1190
+ ; SSE2-NEXT: psubb %xmm2, %xmm0
1191
+ ; SSE2-NEXT: pxor %xmm1, %xmm1
1192
+ ; SSE2-NEXT: psadbw %xmm0, %xmm1
1193
+ ; SSE2-NEXT: movd %xmm1, %eax
1194
+ ; SSE2-NEXT: movzbl %al, %eax
1195
+ ; SSE2-NEXT: retq
1196
+ ;
1197
+ ; AVX-LABEL: PR143456:
1198
+ ; AVX: # %bb.0:
1199
+ ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1200
+ ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1201
+ ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm2
1202
+ ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
1203
+ ; AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0
1204
+ ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1205
+ ; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
1206
+ ; AVX-NEXT: vpextrb $0, %xmm0, %eax
1207
+ ; AVX-NEXT: retq
1208
+ %v0 = load <8 x i8 >, ptr %p0 , align 1
1209
+ %v1 = load <8 x i8 >, ptr %p1 , align 1
1210
+ %max = tail call <8 x i8 > @llvm.umax.v8i8 (<8 x i8 > %v0 , <8 x i8 > %v1 )
1211
+ %min = tail call <8 x i8 > @llvm.umin.v8i8 (<8 x i8 > %v0 , <8 x i8 > %v1 )
1212
+ %abd = sub nuw <8 x i8 > %max , %min
1213
+ %rdx.shuf = shufflevector <8 x i8 > %abd , <8 x i8 > poison, <8 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 , i32 poison, i32 poison, i32 poison, i32 poison>
1214
+ %bin.rdx = add <8 x i8 > %abd , %rdx.shuf
1215
+ %rdx.shuf15 = shufflevector <8 x i8 > %bin.rdx , <8 x i8 > poison, <8 x i32 > <i32 2 , i32 3 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1216
+ %bin.rdx16 = add <8 x i8 > %bin.rdx , %rdx.shuf15
1217
+ %rdx.shuf17 = shufflevector <8 x i8 > %bin.rdx16 , <8 x i8 > poison, <8 x i32 > <i32 1 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1218
+ %bin.rdx18 = add <8 x i8 > %bin.rdx16 , %rdx.shuf17
1219
+ %elt = extractelement <8 x i8 > %bin.rdx18 , i32 0
1220
+ %res = zext i8 %elt to i32
1221
+ ret i32 %res
1222
+ }
1223
+ declare <8 x i8 > @llvm.umax.v8i8 (<8 x i8 >, <8 x i8 >)
1224
+ declare <8 x i8 > @llvm.umin.v8i8 (<8 x i8 >, <8 x i8 >)
1225
+ declare i8 @llvm.vector.reduce.add.v8i8 (<8 x i8 >)
1180
1226
1181
- ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
1182
- declare <16 x i32 > @llvm.abs.v16i32 (<16 x i32 >, i1 immarg) #0
1183
1227
attributes #0 = { nofree nosync nounwind readnone speculatable willreturn }
0 commit comments