Skip to content

Commit da4cbec

Browse files
[LLVM][SVE] Implement isel for fptoi half/float/double to i1. (#129269)
Also adds an assert that SVE support for strict_fp fp<->int operations is missing. The added costs are to maintain the existing values expected by Analysis/CostModel/AArch64/sve-cast.ll.
1 parent c711c65 commit da4cbec

File tree

3 files changed

+250
-0
lines changed

3 files changed

+250
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,6 +1569,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
15691569

15701570
// There are no legal MVT::nxv16f## based types.
15711571
if (VT != MVT::nxv16i1) {
1572+
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1573+
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
15721574
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
15731575
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
15741576
}
@@ -4728,7 +4730,18 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
47284730
EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType();
47294731
EVT VT = Op.getValueType();
47304732

4733+
assert(!(IsStrict && VT.isScalableVector()) &&
4734+
"Unimplemented SVE support for STRICT_FP_to_INT!");
4735+
47314736
if (VT.isScalableVector()) {
4737+
if (VT.getVectorElementType() == MVT::i1) {
4738+
SDLoc DL(Op);
4739+
EVT CvtVT = getPromotedVTForPredicate(VT);
4740+
SDValue Cvt = DAG.getNode(Op.getOpcode(), DL, CvtVT, Op.getOperand(0));
4741+
SDValue Zero = DAG.getConstant(0, DL, CvtVT);
4742+
return DAG.getSetCC(DL, VT, Cvt, Zero, ISD::SETNE);
4743+
}
4744+
47324745
unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
47334746
? AArch64ISD::FCVTZU_MERGE_PASSTHRU
47344747
: AArch64ISD::FCVTZS_MERGE_PASSTHRU;
@@ -5034,6 +5047,9 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
50345047
unsigned Opc = Op.getOpcode();
50355048
bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
50365049

5050+
assert(!(IsStrict && VT.isScalableVector()) &&
5051+
"Unimplemented SVE support for ISD:::STRICT_INT_TO_FP!");
5052+
50375053
if (VT.isScalableVector()) {
50385054
if (InVT.getVectorElementType() == MVT::i1) {
50395055
SDValue FalseVal = DAG.getConstantFP(0.0, dl, VT);

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3017,20 +3017,24 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
30173017
{ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 1},
30183018
{ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 1},
30193019
{ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f64, 1},
3020+
{ISD::FP_TO_SINT, MVT::nxv2i1, MVT::nxv2f64, 1},
30203021
{ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1},
30213022
{ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 1},
30223023
{ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 1},
30233024
{ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f64, 1},
3025+
{ISD::FP_TO_UINT, MVT::nxv2i1, MVT::nxv2f64, 1},
30243026

30253027
// Complex, from nxv4f32.
30263028
{ISD::FP_TO_SINT, MVT::nxv4i64, MVT::nxv4f32, 4},
30273029
{ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1},
30283030
{ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 1},
30293031
{ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f32, 1},
3032+
{ISD::FP_TO_SINT, MVT::nxv4i1, MVT::nxv4f32, 1},
30303033
{ISD::FP_TO_UINT, MVT::nxv4i64, MVT::nxv4f32, 4},
30313034
{ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1},
30323035
{ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 1},
30333036
{ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f32, 1},
3037+
{ISD::FP_TO_UINT, MVT::nxv4i1, MVT::nxv4f32, 1},
30343038

30353039
// Complex, from nxv8f64. Illegal -> illegal conversions not required.
30363040
{ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 7},
@@ -3057,10 +3061,12 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
30573061
{ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f16, 4},
30583062
{ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f16, 1},
30593063
{ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f16, 1},
3064+
{ISD::FP_TO_SINT, MVT::nxv8i1, MVT::nxv8f16, 1},
30603065
{ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f16, 10},
30613066
{ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f16, 4},
30623067
{ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f16, 1},
30633068
{ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f16, 1},
3069+
{ISD::FP_TO_UINT, MVT::nxv8i1, MVT::nxv8f16, 1},
30643070

30653071
// Complex, from nxv4f16.
30663072
{ISD::FP_TO_SINT, MVT::nxv4i64, MVT::nxv4f16, 4},

llvm/test/CodeGen/AArch64/sve-fcvt.ll

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,120 @@ define <vscale x 2 x float> @fcvts_nxv2f64(<vscale x 2 x double> %a) {
113113
; FP_TO_SINT
114114
;
115115

116+
define <vscale x 2 x i1> @fcvtzs_nxv2f16_to_nxv2i1(<vscale x 2 x half> %a) {
117+
; CHECK-LABEL: fcvtzs_nxv2f16_to_nxv2i1:
118+
; CHECK: // %bb.0:
119+
; CHECK-NEXT: ptrue p0.d
120+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
121+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
122+
; CHECK-NEXT: ret
123+
%res = fptosi <vscale x 2 x half> %a to <vscale x 2 x i1>
124+
ret <vscale x 2 x i1> %res
125+
}
126+
127+
define <vscale x 2 x i1> @fcvtzs_nxv2f32_to_nxv2i1(<vscale x 2 x float> %a) {
128+
; CHECK-LABEL: fcvtzs_nxv2f32_to_nxv2i1:
129+
; CHECK: // %bb.0:
130+
; CHECK-NEXT: ptrue p0.d
131+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
132+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
133+
; CHECK-NEXT: ret
134+
%res = fptosi <vscale x 2 x float> %a to <vscale x 2 x i1>
135+
ret <vscale x 2 x i1> %res
136+
}
137+
138+
define <vscale x 2 x i1> @fcvtzs_nxv2f64_to_nxv2i1(<vscale x 2 x double> %a) {
139+
; CHECK-LABEL: fcvtzs_nxv2f64_to_nxv2i1:
140+
; CHECK: // %bb.0:
141+
; CHECK-NEXT: ptrue p0.d
142+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
143+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
144+
; CHECK-NEXT: ret
145+
%res = fptosi <vscale x 2 x double> %a to <vscale x 2 x i1>
146+
ret <vscale x 2 x i1> %res
147+
}
148+
149+
define <vscale x 4 x i1> @fcvtzs_nxv4f16_to_nxv4i1(<vscale x 4 x half> %a) {
150+
; CHECK-LABEL: fcvtzs_nxv4f16_to_nxv4i1:
151+
; CHECK: // %bb.0:
152+
; CHECK-NEXT: ptrue p0.s
153+
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
154+
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
155+
; CHECK-NEXT: ret
156+
%res = fptosi <vscale x 4 x half> %a to <vscale x 4 x i1>
157+
ret <vscale x 4 x i1> %res
158+
}
159+
160+
define <vscale x 4 x i1> @fcvtzs_nxv4f32_to_nxv4i1(<vscale x 4 x float> %a) {
161+
; CHECK-LABEL: fcvtzs_nxv4f32_to_nxv4i1:
162+
; CHECK: // %bb.0:
163+
; CHECK-NEXT: ptrue p0.s
164+
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
165+
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
166+
; CHECK-NEXT: ret
167+
%res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i1>
168+
ret <vscale x 4 x i1> %res
169+
}
170+
171+
define <vscale x 4 x i1> @fcvtzs_nxv4f64_to_nxv4i1(<vscale x 4 x double> %a) {
172+
; CHECK-LABEL: fcvtzs_nxv4f64_to_nxv4i1:
173+
; CHECK: // %bb.0:
174+
; CHECK-NEXT: ptrue p0.d
175+
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
176+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
177+
; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
178+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
179+
; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
180+
; CHECK-NEXT: ret
181+
%res = fptosi <vscale x 4 x double> %a to <vscale x 4 x i1>
182+
ret <vscale x 4 x i1> %res
183+
}
184+
185+
define <vscale x 8 x i1> @fcvtzs_nxv8f16_to_nxv8i1(<vscale x 8 x half> %a) {
186+
; CHECK-LABEL: fcvtzs_nxv8f16_to_nxv8i1:
187+
; CHECK: // %bb.0:
188+
; CHECK-NEXT: ptrue p0.h
189+
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
190+
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
191+
; CHECK-NEXT: ret
192+
%res = fptosi <vscale x 8 x half> %a to <vscale x 8 x i1>
193+
ret <vscale x 8 x i1> %res
194+
}
195+
196+
define <vscale x 8 x i1> @fcvtzs_nxv8f32_to_nxv8i1(<vscale x 8 x float> %a) {
197+
; CHECK-LABEL: fcvtzs_nxv8f32_to_nxv8i1:
198+
; CHECK: // %bb.0:
199+
; CHECK-NEXT: ptrue p0.s
200+
; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s
201+
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
202+
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
203+
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
204+
; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
205+
; CHECK-NEXT: ret
206+
%res = fptosi <vscale x 8 x float> %a to <vscale x 8 x i1>
207+
ret <vscale x 8 x i1> %res
208+
}
209+
210+
define <vscale x 8 x i1> @fcvtzs_nxv8f64_to_nxv8i1(<vscale x 8 x double> %a) {
211+
; CHECK-LABEL: fcvtzs_nxv8f64_to_nxv8i1:
212+
; CHECK: // %bb.0:
213+
; CHECK-NEXT: ptrue p0.d
214+
; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
215+
; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
216+
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
217+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
218+
; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0
219+
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
220+
; CHECK-NEXT: cmpne p3.d, p0/z, z1.d, #0
221+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
222+
; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s
223+
; CHECK-NEXT: uzp1 p0.s, p0.s, p3.s
224+
; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
225+
; CHECK-NEXT: ret
226+
%res = fptosi <vscale x 8 x double> %a to <vscale x 8 x i1>
227+
ret <vscale x 8 x i1> %res
228+
}
229+
116230
define <vscale x 2 x i16> @fcvtzs_h_nxv2f16(<vscale x 2 x half> %a) {
117231
; CHECK-LABEL: fcvtzs_h_nxv2f16:
118232
; CHECK: // %bb.0:
@@ -277,6 +391,120 @@ define <vscale x 2 x i64> @fcvtzs_d_nxv2f64(<vscale x 2 x double> %a) {
277391
; FP_TO_UINT
278392
;
279393

394+
define <vscale x 2 x i1> @fcvtzu_nxv2f16_to_nxv2i1(<vscale x 2 x half> %a) {
395+
; CHECK-LABEL: fcvtzu_nxv2f16_to_nxv2i1:
396+
; CHECK: // %bb.0:
397+
; CHECK-NEXT: ptrue p0.d
398+
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
399+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
400+
; CHECK-NEXT: ret
401+
%res = fptoui <vscale x 2 x half> %a to <vscale x 2 x i1>
402+
ret <vscale x 2 x i1> %res
403+
}
404+
405+
define <vscale x 2 x i1> @fcvtzu_nxv2f32_to_nxv2i1(<vscale x 2 x float> %a) {
406+
; CHECK-LABEL: fcvtzu_nxv2f32_to_nxv2i1:
407+
; CHECK: // %bb.0:
408+
; CHECK-NEXT: ptrue p0.d
409+
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
410+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
411+
; CHECK-NEXT: ret
412+
%res = fptoui <vscale x 2 x float> %a to <vscale x 2 x i1>
413+
ret <vscale x 2 x i1> %res
414+
}
415+
416+
define <vscale x 2 x i1> @fcvtzu_nxv2f64_to_nxv2i1(<vscale x 2 x double> %a) {
417+
; CHECK-LABEL: fcvtzu_nxv2f64_to_nxv2i1:
418+
; CHECK: // %bb.0:
419+
; CHECK-NEXT: ptrue p0.d
420+
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
421+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
422+
; CHECK-NEXT: ret
423+
%res = fptoui <vscale x 2 x double> %a to <vscale x 2 x i1>
424+
ret <vscale x 2 x i1> %res
425+
}
426+
427+
define <vscale x 4 x i1> @fcvtzu_nxv4f16_to_nxv4i1(<vscale x 4 x half> %a) {
428+
; CHECK-LABEL: fcvtzu_nxv4f16_to_nxv4i1:
429+
; CHECK: // %bb.0:
430+
; CHECK-NEXT: ptrue p0.s
431+
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
432+
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
433+
; CHECK-NEXT: ret
434+
%res = fptoui <vscale x 4 x half> %a to <vscale x 4 x i1>
435+
ret <vscale x 4 x i1> %res
436+
}
437+
438+
define <vscale x 4 x i1> @fcvtzu_nxv4f32_to_nxv4i1(<vscale x 4 x float> %a) {
439+
; CHECK-LABEL: fcvtzu_nxv4f32_to_nxv4i1:
440+
; CHECK: // %bb.0:
441+
; CHECK-NEXT: ptrue p0.s
442+
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
443+
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
444+
; CHECK-NEXT: ret
445+
%res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i1>
446+
ret <vscale x 4 x i1> %res
447+
}
448+
449+
define <vscale x 4 x i1> @fcvtzu_nxv4f64_to_nxv4i1(<vscale x 4 x double> %a) {
450+
; CHECK-LABEL: fcvtzu_nxv4f64_to_nxv4i1:
451+
; CHECK: // %bb.0:
452+
; CHECK-NEXT: ptrue p0.d
453+
; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d
454+
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
455+
; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
456+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
457+
; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
458+
; CHECK-NEXT: ret
459+
%res = fptoui <vscale x 4 x double> %a to <vscale x 4 x i1>
460+
ret <vscale x 4 x i1> %res
461+
}
462+
463+
define <vscale x 8 x i1> @fcvtzu_nxv8f16_to_nxv8i1(<vscale x 8 x half> %a) {
464+
; CHECK-LABEL: fcvtzu_nxv8f16_to_nxv8i1:
465+
; CHECK: // %bb.0:
466+
; CHECK-NEXT: ptrue p0.h
467+
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
468+
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
469+
; CHECK-NEXT: ret
470+
%res = fptoui <vscale x 8 x half> %a to <vscale x 8 x i1>
471+
ret <vscale x 8 x i1> %res
472+
}
473+
474+
define <vscale x 8 x i1> @fcvtzu_nxv8f32_to_nxv8i1(<vscale x 8 x float> %a) {
475+
; CHECK-LABEL: fcvtzu_nxv8f32_to_nxv8i1:
476+
; CHECK: // %bb.0:
477+
; CHECK-NEXT: ptrue p0.s
478+
; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s
479+
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
480+
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
481+
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
482+
; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
483+
; CHECK-NEXT: ret
484+
%res = fptoui <vscale x 8 x float> %a to <vscale x 8 x i1>
485+
ret <vscale x 8 x i1> %res
486+
}
487+
488+
define <vscale x 8 x i1> @fcvtzu_nxv8f64_to_nxv8i1(<vscale x 8 x double> %a) {
489+
; CHECK-LABEL: fcvtzu_nxv8f64_to_nxv8i1:
490+
; CHECK: // %bb.0:
491+
; CHECK-NEXT: ptrue p0.d
492+
; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.d
493+
; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.d
494+
; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d
495+
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
496+
; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0
497+
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
498+
; CHECK-NEXT: cmpne p3.d, p0/z, z1.d, #0
499+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
500+
; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s
501+
; CHECK-NEXT: uzp1 p0.s, p0.s, p3.s
502+
; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
503+
; CHECK-NEXT: ret
504+
%res = fptoui <vscale x 8 x double> %a to <vscale x 8 x i1>
505+
ret <vscale x 8 x i1> %res
506+
}
507+
280508
; NOTE: Using fcvtzs is safe as fptoui overflow is considered poison and a
281509
; 64bit signed value encompasses the entire range of a 16bit unsigned value
282510
define <vscale x 2 x i16> @fcvtzu_h_nxv2f16(<vscale x 2 x half> %a) {

0 commit comments

Comments
 (0)