@@ -47,6 +47,27 @@ define <8 x i16> @ashr_xor_and(<8 x i16> %x) nounwind {
47
47
ret <8 x i16 > %res
48
48
}
49
49
50
+ define <8 x i16 > @ashr_add_and (<8 x i16 > %x ) nounwind {
51
+ ; SSE-LABEL: ashr_add_and:
52
+ ; SSE: # %bb.0:
53
+ ; SSE-NEXT: movdqa %xmm0, %xmm1
54
+ ; SSE-NEXT: psraw $15, %xmm1
55
+ ; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
56
+ ; SSE-NEXT: pand %xmm1, %xmm0
57
+ ; SSE-NEXT: retq
58
+ ;
59
+ ; AVX-LABEL: ashr_add_and:
60
+ ; AVX: # %bb.0:
61
+ ; AVX-NEXT: vpsraw $15, %xmm0, %xmm1
62
+ ; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
63
+ ; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
64
+ ; AVX-NEXT: retq
65
+ %signsplat = ashr <8 x i16 > %x , <i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 >
66
+ %flipsign = add <8 x i16 > %x , <i16 undef , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 >
67
+ %res = and <8 x i16 > %signsplat , %flipsign
68
+ ret <8 x i16 > %res
69
+ }
70
+
50
71
; negative test - extra uses may lead to extra instructions when custom-lowered
51
72
52
73
define <16 x i8 > @ashr_xor_and_commute_uses (<16 x i8 > %x , <16 x i8 >* %p1 , <16 x i8 >* %p2 ) nounwind {
@@ -118,6 +139,42 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
118
139
ret <4 x i32 > %res
119
140
}
120
141
142
+ define <4 x i32 > @ashr_add_and_custom (<4 x i32 > %x ) nounwind {
143
+ ; SSE-LABEL: ashr_add_and_custom:
144
+ ; SSE: # %bb.0:
145
+ ; SSE-NEXT: movdqa %xmm0, %xmm1
146
+ ; SSE-NEXT: psrad $31, %xmm1
147
+ ; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
148
+ ; SSE-NEXT: pand %xmm1, %xmm0
149
+ ; SSE-NEXT: retq
150
+ ;
151
+ ; AVX1-LABEL: ashr_add_and_custom:
152
+ ; AVX1: # %bb.0:
153
+ ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
154
+ ; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
155
+ ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
156
+ ; AVX1-NEXT: retq
157
+ ;
158
+ ; AVX2-LABEL: ashr_add_and_custom:
159
+ ; AVX2: # %bb.0:
160
+ ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
161
+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
162
+ ; AVX2-NEXT: vpaddd %xmm2, %xmm0, %xmm0
163
+ ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
164
+ ; AVX2-NEXT: retq
165
+ ;
166
+ ; AVX512-LABEL: ashr_add_and_custom:
167
+ ; AVX512: # %bb.0:
168
+ ; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1
169
+ ; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
170
+ ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
171
+ ; AVX512-NEXT: retq
172
+ %signsplat = ashr <4 x i32 > %x , <i32 undef , i32 31 , i32 31 , i32 31 >
173
+ %flipsign = add <4 x i32 > %x , <i32 2147483648 , i32 2147483648 , i32 2147483648 , i32 2147483648 >
174
+ %res = and <4 x i32 > %flipsign , %signsplat
175
+ ret <4 x i32 > %res
176
+ }
177
+
121
178
; usubsat X, (1 << (BW-1)) <--> (X ^ (1 << (BW-1))) & (ashr X, (BW-1))
122
179
123
180
define <4 x i32 > @usubsat_custom (<4 x i32 > %x ) nounwind {
@@ -368,6 +425,50 @@ define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
368
425
ret <16 x i16 > %res
369
426
}
370
427
428
+ define <16 x i16 > @ashr_add_and_v16i16 (<16 x i16 > %x ) nounwind {
429
+ ; SSE-LABEL: ashr_add_and_v16i16:
430
+ ; SSE: # %bb.0:
431
+ ; SSE-NEXT: movdqa %xmm1, %xmm2
432
+ ; SSE-NEXT: psraw $15, %xmm2
433
+ ; SSE-NEXT: movdqa %xmm0, %xmm3
434
+ ; SSE-NEXT: psraw $15, %xmm3
435
+ ; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
436
+ ; SSE-NEXT: pand %xmm2, %xmm1
437
+ ; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
438
+ ; SSE-NEXT: pand %xmm3, %xmm0
439
+ ; SSE-NEXT: retq
440
+ ;
441
+ ; AVX1-LABEL: ashr_add_and_v16i16:
442
+ ; AVX1: # %bb.0:
443
+ ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
444
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
445
+ ; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
446
+ ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
447
+ ; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
448
+ ; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
449
+ ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
450
+ ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
451
+ ; AVX1-NEXT: retq
452
+ ;
453
+ ; AVX2-LABEL: ashr_add_and_v16i16:
454
+ ; AVX2: # %bb.0:
455
+ ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1
456
+ ; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
457
+ ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
458
+ ; AVX2-NEXT: retq
459
+ ;
460
+ ; AVX512-LABEL: ashr_add_and_v16i16:
461
+ ; AVX512: # %bb.0:
462
+ ; AVX512-NEXT: vpsraw $15, %ymm0, %ymm1
463
+ ; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
464
+ ; AVX512-NEXT: vpand %ymm0, %ymm1, %ymm0
465
+ ; AVX512-NEXT: retq
466
+ %signsplat = ashr <16 x i16 > %x , <i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 >
467
+ %flipsign = add <16 x i16 > %x , <i16 undef , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 >
468
+ %res = and <16 x i16 > %signsplat , %flipsign
469
+ ret <16 x i16 > %res
470
+ }
471
+
371
472
define <16 x i16 > @test8 (<16 x i16 > %x ) nounwind {
372
473
; SSE-LABEL: test8:
373
474
; SSE: # %bb.0: # %vector.ph
0 commit comments