Skip to content

Commit 35c767a

Browse files
committed
[x86] add tests for variants of usubsat; NFC
1 parent d34cad3 commit 35c767a

File tree

1 file changed

+101
-0
lines changed

1 file changed

+101
-0
lines changed

llvm/test/CodeGen/X86/psubus.ll

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,27 @@ define <8 x i16> @ashr_xor_and(<8 x i16> %x) nounwind {
4747
ret <8 x i16> %res
4848
}
4949

50+
define <8 x i16> @ashr_add_and(<8 x i16> %x) nounwind {
51+
; SSE-LABEL: ashr_add_and:
52+
; SSE: # %bb.0:
53+
; SSE-NEXT: movdqa %xmm0, %xmm1
54+
; SSE-NEXT: psraw $15, %xmm1
55+
; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
56+
; SSE-NEXT: pand %xmm1, %xmm0
57+
; SSE-NEXT: retq
58+
;
59+
; AVX-LABEL: ashr_add_and:
60+
; AVX: # %bb.0:
61+
; AVX-NEXT: vpsraw $15, %xmm0, %xmm1
62+
; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
63+
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
64+
; AVX-NEXT: retq
65+
%signsplat = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
66+
%flipsign = add <8 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
67+
%res = and <8 x i16> %signsplat, %flipsign
68+
ret <8 x i16> %res
69+
}
70+
5071
; negative test - extra uses may lead to extra instructions when custom-lowered
5172

5273
define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x i8>* %p2) nounwind {
@@ -118,6 +139,42 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
118139
ret <4 x i32> %res
119140
}
120141

142+
define <4 x i32> @ashr_add_and_custom(<4 x i32> %x) nounwind {
143+
; SSE-LABEL: ashr_add_and_custom:
144+
; SSE: # %bb.0:
145+
; SSE-NEXT: movdqa %xmm0, %xmm1
146+
; SSE-NEXT: psrad $31, %xmm1
147+
; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
148+
; SSE-NEXT: pand %xmm1, %xmm0
149+
; SSE-NEXT: retq
150+
;
151+
; AVX1-LABEL: ashr_add_and_custom:
152+
; AVX1: # %bb.0:
153+
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
154+
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
155+
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
156+
; AVX1-NEXT: retq
157+
;
158+
; AVX2-LABEL: ashr_add_and_custom:
159+
; AVX2: # %bb.0:
160+
; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
161+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
162+
; AVX2-NEXT: vpaddd %xmm2, %xmm0, %xmm0
163+
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
164+
; AVX2-NEXT: retq
165+
;
166+
; AVX512-LABEL: ashr_add_and_custom:
167+
; AVX512: # %bb.0:
168+
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1
169+
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
170+
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
171+
; AVX512-NEXT: retq
172+
%signsplat = ashr <4 x i32> %x, <i32 undef, i32 31, i32 31, i32 31>
173+
%flipsign = add <4 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
174+
%res = and <4 x i32> %flipsign, %signsplat
175+
ret <4 x i32> %res
176+
}
177+
121178
; usubsat X, (1 << (BW-1)) <--> (X ^ (1 << (BW-1))) & (ashr X, (BW-1))
122179

123180
define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
@@ -368,6 +425,50 @@ define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
368425
ret <16 x i16> %res
369426
}
370427

428+
define <16 x i16> @ashr_add_and_v16i16(<16 x i16> %x) nounwind {
429+
; SSE-LABEL: ashr_add_and_v16i16:
430+
; SSE: # %bb.0:
431+
; SSE-NEXT: movdqa %xmm1, %xmm2
432+
; SSE-NEXT: psraw $15, %xmm2
433+
; SSE-NEXT: movdqa %xmm0, %xmm3
434+
; SSE-NEXT: psraw $15, %xmm3
435+
; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
436+
; SSE-NEXT: pand %xmm2, %xmm1
437+
; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
438+
; SSE-NEXT: pand %xmm3, %xmm0
439+
; SSE-NEXT: retq
440+
;
441+
; AVX1-LABEL: ashr_add_and_v16i16:
442+
; AVX1: # %bb.0:
443+
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
444+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
445+
; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
446+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
447+
; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
448+
; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
449+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
450+
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
451+
; AVX1-NEXT: retq
452+
;
453+
; AVX2-LABEL: ashr_add_and_v16i16:
454+
; AVX2: # %bb.0:
455+
; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1
456+
; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
457+
; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
458+
; AVX2-NEXT: retq
459+
;
460+
; AVX512-LABEL: ashr_add_and_v16i16:
461+
; AVX512: # %bb.0:
462+
; AVX512-NEXT: vpsraw $15, %ymm0, %ymm1
463+
; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
464+
; AVX512-NEXT: vpand %ymm0, %ymm1, %ymm0
465+
; AVX512-NEXT: retq
466+
%signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
467+
%flipsign = add <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
468+
%res = and <16 x i16> %signsplat, %flipsign
469+
ret <16 x i16> %res
470+
}
471+
371472
define <16 x i16> @test8(<16 x i16> %x) nounwind {
372473
; SSE-LABEL: test8:
373474
; SSE: # %bb.0: # %vector.ph

0 commit comments

Comments
 (0)