Skip to content

Commit 3ab95e4

Browse files
authored
[AArch64] Scalarize v2f16 vecreduce.fadd (#147783)
This adds a custom lowering for v2f16 vecreduce.fadd to scalarize as opposed to padding with zeroes. This allows it to generate the more efficient faddp. Helps with #147583.
1 parent 6dc193a commit 3ab95e4

File tree

2 files changed

+14
-4
lines changed

2 files changed

+14
-4
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13521352
setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
13531353
}
13541354
}
1355+
if (Subtarget->hasFullFP16())
1356+
setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
1357+
13551358
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
13561359
MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
13571360
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
@@ -16046,9 +16049,19 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT,
1604616049
SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
1604716050
SelectionDAG &DAG) const {
1604816051
SDValue Src = Op.getOperand(0);
16052+
EVT SrcVT = Src.getValueType();
16053+
16054+
// Scalarize v2f16 to turn it into a faddp. This will be more efficient than
16055+
// widening by inserting zeroes.
16056+
if (Subtarget->hasFullFP16() && Op.getOpcode() == ISD::VECREDUCE_FADD &&
16057+
SrcVT == MVT::v2f16) {
16058+
SDLoc DL(Op);
16059+
return DAG.getNode(ISD::FADD, DL, MVT::f16,
16060+
DAG.getExtractVectorElt(DL, MVT::f16, Src, 0),
16061+
DAG.getExtractVectorElt(DL, MVT::f16, Src, 1));
16062+
}
1604916063

1605016064
// Try to lower fixed length reductions to SVE.
16051-
EVT SrcVT = Src.getValueType();
1605216065
bool OverrideNEON = !Subtarget->isNeonAvailable() ||
1605316066
Op.getOpcode() == ISD::VECREDUCE_AND ||
1605416067
Op.getOpcode() == ISD::VECREDUCE_OR ||

llvm/test/CodeGen/AArch64/vecreduce-fadd.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) {
2727
; CHECK-SD-FP16-LABEL: add_v2HalfH:
2828
; CHECK-SD-FP16: // %bb.0:
2929
; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
30-
; CHECK-SD-FP16-NEXT: mov v0.h[2], wzr
31-
; CHECK-SD-FP16-NEXT: mov v0.h[3], wzr
32-
; CHECK-SD-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
3330
; CHECK-SD-FP16-NEXT: faddp h0, v0.2h
3431
; CHECK-SD-FP16-NEXT: ret
3532
;

0 commit comments

Comments
 (0)