diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 01be10be433fd..20008fa90e094 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1352,6 +1352,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); } } + if (Subtarget->hasFullFP16()) + setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom); + for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); @@ -16046,9 +16049,19 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT, SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const { SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // Scalarize v2f16 to turn it into a faddp. This will be more efficient than + // widening by inserting zeroes. + if (Subtarget->hasFullFP16() && Op.getOpcode() == ISD::VECREDUCE_FADD && + SrcVT == MVT::v2f16) { + SDLoc DL(Op); + return DAG.getNode(ISD::FADD, DL, MVT::f16, + DAG.getExtractVectorElt(DL, MVT::f16, Src, 0), + DAG.getExtractVectorElt(DL, MVT::f16, Src, 1)); + } // Try to lower fixed length reductions to SVE. - EVT SrcVT = Src.getValueType(); bool OverrideNEON = !Subtarget->isNeonAvailable() || Op.getOpcode() == ISD::VECREDUCE_AND || Op.getOpcode() == ISD::VECREDUCE_OR || diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll index 2e993a85760c6..8a84d3ca2328c 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -27,9 +27,6 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) { ; CHECK-SD-FP16-LABEL: add_v2HalfH: ; CHECK-SD-FP16: // %bb.0: ; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-FP16-NEXT: mov v0.h[2], wzr -; CHECK-SD-FP16-NEXT: mov v0.h[3], wzr -; CHECK-SD-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h ; CHECK-SD-FP16-NEXT: faddp h0, v0.2h ; CHECK-SD-FP16-NEXT: ret ;