Skip to content

Commit a438c60

Browse files
authored
[X86][FP16] Do not customize WidenLowerNode for half if VLX not enabled (#146994)
The #142763 tried to reuse ISD node to workaround the non-VLX lowering problem, but it caused a new problem: https://godbolt.org/z/1hEGnddhY
1 parent e854321 commit a438c60

File tree

3 files changed

+110
-70
lines changed

3 files changed

+110
-70
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34150,7 +34150,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3415034150
return;
3415134151
}
3415234152

34153-
if (VT.isVector() && Subtarget.hasFP16() &&
34153+
if (VT.isVector() && Subtarget.hasFP16() && Subtarget.hasVLX() &&
3415434154
SrcVT.getVectorElementType() == MVT::f16) {
3415534155
EVT EleVT = VT.getVectorElementType();
3415634156
EVT ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
@@ -34164,10 +34164,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3416434164
}
3416534165

3416634166
if (IsStrict) {
34167+
Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
3416734168
Res =
3416834169
DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {N->getOperand(0), Src});
3416934170
Chain = Res.getValue(1);
3417034171
} else {
34172+
Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
3417134173
Res = DAG.getNode(Opc, dl, ResVT, Src);
3417234174
}
3417334175

llvm/test/CodeGen/X86/avx512fp16-cvt.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,4 +1054,22 @@ define <8 x half> @u64tof16(<8 x i64> %a) #0 {
10541054
ret <8 x half> %1
10551055
}
10561056

1057+
define void @f16tou32(ptr %ptr) {
1058+
; X64-LABEL: f16tou32:
1059+
; X64: # %bb.0:
1060+
; X64-NEXT: vcvttph2udq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0
1061+
; X64-NEXT: vmovlps %xmm0, (%rdi)
1062+
; X64-NEXT: retq
1063+
;
1064+
; X86-LABEL: f16tou32:
1065+
; X86: # %bb.0:
1066+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1067+
; X86-NEXT: vcvttph2udq {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0
1068+
; X86-NEXT: vmovlps %xmm0, (%eax)
1069+
; X86-NEXT: retl
1070+
%1 = fptoui <2 x half> splat (half 0xH7E00) to <2 x i32>
1071+
store <2 x i32> %1, ptr %ptr, align 8
1072+
ret void
1073+
}
1074+
10571075
attributes #0 = { "min-legal-vector-width"="256" "prefer-vector-width"="256" }

llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll

Lines changed: 89 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,11 @@ define <2 x i32> @strict_vector_fptosi_v2f16_to_v2i32(<2 x half> %a) #0 {
8282
;
8383
; NOVL-LABEL: strict_vector_fptosi_v2f16_to_v2i32:
8484
; NOVL: # %bb.0:
85-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
86-
; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
87-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
88-
; NOVL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
89-
; NOVL-NEXT: vcvttph2dq %ymm0, %zmm0
90-
; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
91-
; NOVL-NEXT: vzeroupper
85+
; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
86+
; NOVL-NEXT: vcvttsh2si %xmm1, %eax
87+
; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
88+
; NOVL-NEXT: vmovd %ecx, %xmm0
89+
; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
9290
; NOVL-NEXT: retq
9391
%ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half> %a,
9492
metadata !"fpexcept.strict") #0
@@ -105,13 +103,11 @@ define <2 x i32> @strict_vector_fptoui_v2f16_to_v2i32(<2 x half> %a) #0 {
105103
;
106104
; NOVL-LABEL: strict_vector_fptoui_v2f16_to_v2i32:
107105
; NOVL: # %bb.0:
108-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
109-
; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
110-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
111-
; NOVL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
112-
; NOVL-NEXT: vcvttph2udq %ymm0, %zmm0
113-
; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
114-
; NOVL-NEXT: vzeroupper
106+
; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
107+
; NOVL-NEXT: vcvttsh2usi %xmm1, %eax
108+
; NOVL-NEXT: vcvttsh2usi %xmm0, %ecx
109+
; NOVL-NEXT: vmovd %ecx, %xmm0
110+
; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
115111
; NOVL-NEXT: retq
116112
%ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half> %a,
117113
metadata !"fpexcept.strict") #0
@@ -128,13 +124,12 @@ define <2 x i16> @strict_vector_fptosi_v2f16_to_v2i16(<2 x half> %a) #0 {
128124
;
129125
; NOVL-LABEL: strict_vector_fptosi_v2f16_to_v2i16:
130126
; NOVL: # %bb.0:
131-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
132-
; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
133-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
134-
; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
135-
; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
136-
; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
137-
; NOVL-NEXT: vzeroupper
127+
; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
128+
; NOVL-NEXT: vcvttsh2si %xmm1, %eax
129+
; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
130+
; NOVL-NEXT: vmovd %ecx, %xmm0
131+
; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
132+
; NOVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
138133
; NOVL-NEXT: retq
139134
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half> %a,
140135
metadata !"fpexcept.strict") #0
@@ -151,13 +146,12 @@ define <2 x i16> @strict_vector_fptoui_v2f16_to_v2i16(<2 x half> %a) #0 {
151146
;
152147
; NOVL-LABEL: strict_vector_fptoui_v2f16_to_v2i16:
153148
; NOVL: # %bb.0:
154-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
155-
; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
156-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
157-
; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
158-
; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
159-
; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
160-
; NOVL-NEXT: vzeroupper
149+
; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
150+
; NOVL-NEXT: vcvttsh2si %xmm1, %eax
151+
; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
152+
; NOVL-NEXT: vmovd %ecx, %xmm0
153+
; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
154+
; NOVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
161155
; NOVL-NEXT: retq
162156
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half> %a,
163157
metadata !"fpexcept.strict") #0
@@ -175,13 +169,13 @@ define <2 x i8> @strict_vector_fptosi_v2f16_to_v2i8(<2 x half> %a) #0 {
175169
;
176170
; NOVL-LABEL: strict_vector_fptosi_v2f16_to_v2i8:
177171
; NOVL: # %bb.0:
178-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
179-
; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
180-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
181-
; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
182-
; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
183-
; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
184-
; NOVL-NEXT: vzeroupper
172+
; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
173+
; NOVL-NEXT: vcvttsh2si %xmm1, %eax
174+
; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
175+
; NOVL-NEXT: vmovd %ecx, %xmm0
176+
; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
177+
; NOVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
178+
; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
185179
; NOVL-NEXT: retq
186180
%ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half> %a,
187181
metadata !"fpexcept.strict") #0
@@ -199,13 +193,13 @@ define <2 x i8> @strict_vector_fptoui_v2f16_to_v2i8(<2 x half> %a) #0 {
199193
;
200194
; NOVL-LABEL: strict_vector_fptoui_v2f16_to_v2i8:
201195
; NOVL: # %bb.0:
202-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
203-
; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
204-
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
205-
; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
206-
; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
207-
; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
208-
; NOVL-NEXT: vzeroupper
196+
; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
197+
; NOVL-NEXT: vcvttsh2si %xmm1, %eax
198+
; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
199+
; NOVL-NEXT: vmovd %ecx, %xmm0
200+
; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
201+
; NOVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
202+
; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
209203
; NOVL-NEXT: retq
210204
%ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half> %a,
211205
metadata !"fpexcept.strict") #0
@@ -335,12 +329,18 @@ define <4 x i16> @strict_vector_fptosi_v4f16_to_v4i16(<4 x half> %a) #0 {
335329
;
336330
; NOVL-LABEL: strict_vector_fptosi_v4f16_to_v4i16:
337331
; NOVL: # %bb.0:
338-
; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
339-
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
340-
; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
341-
; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
342-
; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
343-
; NOVL-NEXT: vzeroupper
332+
; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
333+
; NOVL-NEXT: vcvttsh2si %xmm1, %eax
334+
; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
335+
; NOVL-NEXT: vmovd %ecx, %xmm1
336+
; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
337+
; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
338+
; NOVL-NEXT: vcvttsh2si %xmm2, %eax
339+
; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
340+
; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
341+
; NOVL-NEXT: vcvttsh2si %xmm0, %eax
342+
; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
343+
; NOVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
344344
; NOVL-NEXT: retq
345345
%ret = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half> %a,
346346
metadata !"fpexcept.strict") #0
@@ -356,12 +356,18 @@ define <4 x i16> @strict_vector_fptoui_v4f16_to_v4i16(<4 x half> %a) #0 {
356356
;
357357
; NOVL-LABEL: strict_vector_fptoui_v4f16_to_v4i16:
358358
; NOVL: # %bb.0:
359-
; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
360-
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
361-
; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
362-
; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
363-
; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
364-
; NOVL-NEXT: vzeroupper
359+
; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
360+
; NOVL-NEXT: vcvttsh2si %xmm1, %eax
361+
; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
362+
; NOVL-NEXT: vmovd %ecx, %xmm1
363+
; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
364+
; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
365+
; NOVL-NEXT: vcvttsh2si %xmm2, %eax
366+
; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
367+
; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
368+
; NOVL-NEXT: vcvttsh2si %xmm0, %eax
369+
; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
370+
; NOVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
365371
; NOVL-NEXT: retq
366372
%ret = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half> %a,
367373
metadata !"fpexcept.strict") #0
@@ -378,12 +384,19 @@ define <4 x i8> @strict_vector_fptosi_v4f16_to_v4i8(<4 x half> %a) #0 {
378384
;
379385
; NOVL-LABEL: strict_vector_fptosi_v4f16_to_v4i8:
380386
; NOVL: # %bb.0:
381-
; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
382-
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
383-
; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
384-
; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
385-
; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
386-
; NOVL-NEXT: vzeroupper
387+
; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
388+
; NOVL-NEXT: vcvttsh2si %xmm1, %eax
389+
; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
390+
; NOVL-NEXT: vmovd %ecx, %xmm1
391+
; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
392+
; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
393+
; NOVL-NEXT: vcvttsh2si %xmm2, %eax
394+
; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
395+
; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
396+
; NOVL-NEXT: vcvttsh2si %xmm0, %eax
397+
; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
398+
; NOVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
399+
; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
387400
; NOVL-NEXT: retq
388401
%ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half> %a,
389402
metadata !"fpexcept.strict") #0
@@ -400,12 +413,19 @@ define <4 x i8> @strict_vector_fptoui_v4f16_to_v4i8(<4 x half> %a) #0 {
400413
;
401414
; NOVL-LABEL: strict_vector_fptoui_v4f16_to_v4i8:
402415
; NOVL: # %bb.0:
403-
; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
404-
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
405-
; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
406-
; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
407-
; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
408-
; NOVL-NEXT: vzeroupper
416+
; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
417+
; NOVL-NEXT: vcvttsh2si %xmm1, %eax
418+
; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
419+
; NOVL-NEXT: vmovd %ecx, %xmm1
420+
; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
421+
; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
422+
; NOVL-NEXT: vcvttsh2si %xmm2, %eax
423+
; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
424+
; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
425+
; NOVL-NEXT: vcvttsh2si %xmm0, %eax
426+
; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
427+
; NOVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
428+
; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
409429
; NOVL-NEXT: retq
410430
%ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half> %a,
411431
metadata !"fpexcept.strict") #0
@@ -554,7 +574,7 @@ define <8 x i8> @strict_vector_fptosi_v8f16_to_v8i8(<8 x half> %a) #0 {
554574
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
555575
; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
556576
; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
557-
; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
577+
; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
558578
; NOVL-NEXT: vzeroupper
559579
; NOVL-NEXT: retq
560580
%ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half> %a,
@@ -573,8 +593,8 @@ define <8 x i8> @strict_vector_fptoui_v8f16_to_v8i8(<8 x half> %a) #0 {
573593
; NOVL: # %bb.0:
574594
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
575595
; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
576-
; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
577-
; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
596+
; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
597+
; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
578598
; NOVL-NEXT: vzeroupper
579599
; NOVL-NEXT: retq
580600
%ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half> %a,

0 commit comments

Comments
 (0)