@@ -82,13 +82,11 @@ define <2 x i32> @strict_vector_fptosi_v2f16_to_v2i32(<2 x half> %a) #0 {
82
82
;
83
83
; NOVL-LABEL: strict_vector_fptosi_v2f16_to_v2i32:
84
84
; NOVL: # %bb.0:
85
- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
86
- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
87
- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
88
- ; NOVL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
89
- ; NOVL-NEXT: vcvttph2dq %ymm0, %zmm0
90
- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
91
- ; NOVL-NEXT: vzeroupper
85
+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
86
+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
87
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
88
+ ; NOVL-NEXT: vmovd %ecx, %xmm0
89
+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
92
90
; NOVL-NEXT: retq
93
91
%ret = call <2 x i32 > @llvm.experimental.constrained.fptosi.v2i32.v2f16 (<2 x half > %a ,
94
92
metadata !"fpexcept.strict" ) #0
@@ -105,13 +103,11 @@ define <2 x i32> @strict_vector_fptoui_v2f16_to_v2i32(<2 x half> %a) #0 {
105
103
;
106
104
; NOVL-LABEL: strict_vector_fptoui_v2f16_to_v2i32:
107
105
; NOVL: # %bb.0:
108
- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
109
- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
110
- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
111
- ; NOVL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
112
- ; NOVL-NEXT: vcvttph2udq %ymm0, %zmm0
113
- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
114
- ; NOVL-NEXT: vzeroupper
106
+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
107
+ ; NOVL-NEXT: vcvttsh2usi %xmm1, %eax
108
+ ; NOVL-NEXT: vcvttsh2usi %xmm0, %ecx
109
+ ; NOVL-NEXT: vmovd %ecx, %xmm0
110
+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
115
111
; NOVL-NEXT: retq
116
112
%ret = call <2 x i32 > @llvm.experimental.constrained.fptoui.v2i32.v2f16 (<2 x half > %a ,
117
113
metadata !"fpexcept.strict" ) #0
@@ -128,13 +124,12 @@ define <2 x i16> @strict_vector_fptosi_v2f16_to_v2i16(<2 x half> %a) #0 {
128
124
;
129
125
; NOVL-LABEL: strict_vector_fptosi_v2f16_to_v2i16:
130
126
; NOVL: # %bb.0:
131
- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
132
- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
133
- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
134
- ; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
135
- ; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
136
- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
137
- ; NOVL-NEXT: vzeroupper
127
+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
128
+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
129
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
130
+ ; NOVL-NEXT: vmovd %ecx, %xmm0
131
+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
132
+ ; NOVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
138
133
; NOVL-NEXT: retq
139
134
%ret = call <2 x i16 > @llvm.experimental.constrained.fptosi.v2i16.v2f16 (<2 x half > %a ,
140
135
metadata !"fpexcept.strict" ) #0
@@ -151,13 +146,12 @@ define <2 x i16> @strict_vector_fptoui_v2f16_to_v2i16(<2 x half> %a) #0 {
151
146
;
152
147
; NOVL-LABEL: strict_vector_fptoui_v2f16_to_v2i16:
153
148
; NOVL: # %bb.0:
154
- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
155
- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
156
- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
157
- ; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
158
- ; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
159
- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
160
- ; NOVL-NEXT: vzeroupper
149
+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
150
+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
151
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
152
+ ; NOVL-NEXT: vmovd %ecx, %xmm0
153
+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
154
+ ; NOVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
161
155
; NOVL-NEXT: retq
162
156
%ret = call <2 x i16 > @llvm.experimental.constrained.fptoui.v2i16.v2f16 (<2 x half > %a ,
163
157
metadata !"fpexcept.strict" ) #0
@@ -175,13 +169,13 @@ define <2 x i8> @strict_vector_fptosi_v2f16_to_v2i8(<2 x half> %a) #0 {
175
169
;
176
170
; NOVL-LABEL: strict_vector_fptosi_v2f16_to_v2i8:
177
171
; NOVL: # %bb.0:
178
- ; NOVL-NEXT: vxorps %xmm1 , %xmm1 , %xmm1
179
- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0], xmm1[1,2,3]
180
- ; NOVL-NEXT: vxorps %xmm1 , %xmm1, %xmm1
181
- ; NOVL-NEXT: vinsertf32x4 $0, %xmm0 , %zmm1, %zmm0
182
- ; NOVL-NEXT: vcvttph2w %zmm0 , %zmm0
183
- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
184
- ; NOVL-NEXT: vzeroupper
172
+ ; NOVL-NEXT: vpsrld $16 , %xmm0 , %xmm1
173
+ ; NOVL-NEXT: vcvttsh2si % xmm1, %eax
174
+ ; NOVL-NEXT: vcvttsh2si %xmm0 , %ecx
175
+ ; NOVL-NEXT: vmovd %ecx , %xmm0
176
+ ; NOVL-NEXT: vpinsrd $1, %eax , %xmm0, %xmm0
177
+ ; NOVL-NEXT: vpackssdw % xmm0, % xmm0, %xmm0
178
+ ; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
185
179
; NOVL-NEXT: retq
186
180
%ret = call <2 x i8 > @llvm.experimental.constrained.fptosi.v2i8.v2f16 (<2 x half > %a ,
187
181
metadata !"fpexcept.strict" ) #0
@@ -199,13 +193,13 @@ define <2 x i8> @strict_vector_fptoui_v2f16_to_v2i8(<2 x half> %a) #0 {
199
193
;
200
194
; NOVL-LABEL: strict_vector_fptoui_v2f16_to_v2i8:
201
195
; NOVL: # %bb.0:
202
- ; NOVL-NEXT: vxorps %xmm1 , %xmm1 , %xmm1
203
- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0], xmm1[1,2,3]
204
- ; NOVL-NEXT: vxorps %xmm1 , %xmm1, %xmm1
205
- ; NOVL-NEXT: vinsertf32x4 $0, %xmm0 , %zmm1, %zmm0
206
- ; NOVL-NEXT: vcvttph2uw %zmm0 , %zmm0
207
- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
208
- ; NOVL-NEXT: vzeroupper
196
+ ; NOVL-NEXT: vpsrld $16 , %xmm0 , %xmm1
197
+ ; NOVL-NEXT: vcvttsh2si % xmm1, %eax
198
+ ; NOVL-NEXT: vcvttsh2si %xmm0 , %ecx
199
+ ; NOVL-NEXT: vmovd %ecx , %xmm0
200
+ ; NOVL-NEXT: vpinsrd $1, %eax , %xmm0, %xmm0
201
+ ; NOVL-NEXT: vpackusdw % xmm0, % xmm0, %xmm0
202
+ ; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
209
203
; NOVL-NEXT: retq
210
204
%ret = call <2 x i8 > @llvm.experimental.constrained.fptoui.v2i8.v2f16 (<2 x half > %a ,
211
205
metadata !"fpexcept.strict" ) #0
@@ -335,12 +329,18 @@ define <4 x i16> @strict_vector_fptosi_v4f16_to_v4i16(<4 x half> %a) #0 {
335
329
;
336
330
; NOVL-LABEL: strict_vector_fptosi_v4f16_to_v4i16:
337
331
; NOVL: # %bb.0:
338
- ; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
339
- ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
340
- ; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
341
- ; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
342
- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
343
- ; NOVL-NEXT: vzeroupper
332
+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
333
+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
334
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
335
+ ; NOVL-NEXT: vmovd %ecx, %xmm1
336
+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
337
+ ; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
338
+ ; NOVL-NEXT: vcvttsh2si %xmm2, %eax
339
+ ; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
340
+ ; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
341
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %eax
342
+ ; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
343
+ ; NOVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
344
344
; NOVL-NEXT: retq
345
345
%ret = call <4 x i16 > @llvm.experimental.constrained.fptosi.v4i16.v4f16 (<4 x half > %a ,
346
346
metadata !"fpexcept.strict" ) #0
@@ -356,12 +356,18 @@ define <4 x i16> @strict_vector_fptoui_v4f16_to_v4i16(<4 x half> %a) #0 {
356
356
;
357
357
; NOVL-LABEL: strict_vector_fptoui_v4f16_to_v4i16:
358
358
; NOVL: # %bb.0:
359
- ; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
360
- ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
361
- ; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
362
- ; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
363
- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
364
- ; NOVL-NEXT: vzeroupper
359
+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
360
+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
361
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
362
+ ; NOVL-NEXT: vmovd %ecx, %xmm1
363
+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
364
+ ; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
365
+ ; NOVL-NEXT: vcvttsh2si %xmm2, %eax
366
+ ; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
367
+ ; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
368
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %eax
369
+ ; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
370
+ ; NOVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
365
371
; NOVL-NEXT: retq
366
372
%ret = call <4 x i16 > @llvm.experimental.constrained.fptoui.v4i16.v4f16 (<4 x half > %a ,
367
373
metadata !"fpexcept.strict" ) #0
@@ -378,12 +384,19 @@ define <4 x i8> @strict_vector_fptosi_v4f16_to_v4i8(<4 x half> %a) #0 {
378
384
;
379
385
; NOVL-LABEL: strict_vector_fptosi_v4f16_to_v4i8:
380
386
; NOVL: # %bb.0:
381
- ; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
382
- ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
383
- ; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
384
- ; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
385
- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
386
- ; NOVL-NEXT: vzeroupper
387
+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
388
+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
389
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
390
+ ; NOVL-NEXT: vmovd %ecx, %xmm1
391
+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
392
+ ; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
393
+ ; NOVL-NEXT: vcvttsh2si %xmm2, %eax
394
+ ; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
395
+ ; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
396
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %eax
397
+ ; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
398
+ ; NOVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
399
+ ; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
387
400
; NOVL-NEXT: retq
388
401
%ret = call <4 x i8 > @llvm.experimental.constrained.fptosi.v4i8.v4f16 (<4 x half > %a ,
389
402
metadata !"fpexcept.strict" ) #0
@@ -400,12 +413,19 @@ define <4 x i8> @strict_vector_fptoui_v4f16_to_v4i8(<4 x half> %a) #0 {
400
413
;
401
414
; NOVL-LABEL: strict_vector_fptoui_v4f16_to_v4i8:
402
415
; NOVL: # %bb.0:
403
- ; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
404
- ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
405
- ; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
406
- ; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
407
- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
408
- ; NOVL-NEXT: vzeroupper
416
+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
417
+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
418
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
419
+ ; NOVL-NEXT: vmovd %ecx, %xmm1
420
+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
421
+ ; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
422
+ ; NOVL-NEXT: vcvttsh2si %xmm2, %eax
423
+ ; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
424
+ ; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
425
+ ; NOVL-NEXT: vcvttsh2si %xmm0, %eax
426
+ ; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
427
+ ; NOVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
428
+ ; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
409
429
; NOVL-NEXT: retq
410
430
%ret = call <4 x i8 > @llvm.experimental.constrained.fptoui.v4i8.v4f16 (<4 x half > %a ,
411
431
metadata !"fpexcept.strict" ) #0
@@ -554,7 +574,7 @@ define <8 x i8> @strict_vector_fptosi_v8f16_to_v8i8(<8 x half> %a) #0 {
554
574
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
555
575
; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
556
576
; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
557
- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
577
+ ; NOVL-NEXT: vpacksswb % xmm0, % xmm0, %xmm0
558
578
; NOVL-NEXT: vzeroupper
559
579
; NOVL-NEXT: retq
560
580
%ret = call <8 x i8 > @llvm.experimental.constrained.fptosi.v8i8.v8f16 (<8 x half > %a ,
@@ -573,8 +593,8 @@ define <8 x i8> @strict_vector_fptoui_v8f16_to_v8i8(<8 x half> %a) #0 {
573
593
; NOVL: # %bb.0:
574
594
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
575
595
; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
576
- ; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
577
- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
596
+ ; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
597
+ ; NOVL-NEXT: vpackuswb % xmm0, % xmm0, %xmm0
578
598
; NOVL-NEXT: vzeroupper
579
599
; NOVL-NEXT: retq
580
600
%ret = call <8 x i8 > @llvm.experimental.constrained.fptoui.v8i8.v8f16 (<8 x half > %a ,
0 commit comments