Skip to content

Commit d7354fa

Browse files
committed
[RISCV] Lower VP_SELECT constant false to use vmerge.vxm/vmerge.vim
Currently, when the false path of a vp_select is a splat vector, it is lowered to a vmv_v_x/vmv_v_i. The vmv is hoisted out of the loop and the whole copy in loop body by MachineLICM. By inverting the mask register and swapping the true and false values in the vp_select, we can eliminate some instructions inside the loop. corrent: https://godbolt.org/z/EnGMn3xeM expected similar form: https://godbolt.org/z/nWhGM6Ej5
1 parent e14f327 commit d7354fa

File tree

4 files changed

+28
-7
lines changed

4 files changed

+28
-7
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8168,11 +8168,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
81688168
return lowerRESET_FPENV(Op, DAG);
81698169
case ISD::EH_DWARF_CFA:
81708170
return lowerEH_DWARF_CFA(Op, DAG);
8171+
case ISD::VP_SELECT:
8172+
if (SDValue Op2 = Op.getOperand(2);
8173+
Op2.hasOneUse() && (Op2.getOpcode() == ISD::SPLAT_VECTOR ||
8174+
Op2.getOpcode() == ISD::SPLAT_VECTOR_PARTS))
8175+
return lowerVPSelectConstantFalse(Op, DAG);
8176+
else
8177+
return lowerVPOp(Op, DAG);
81718178
case ISD::VP_MERGE:
81728179
if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
81738180
return lowerVPMergeMask(Op, DAG);
81748181
[[fallthrough]];
8175-
case ISD::VP_SELECT:
81768182
case ISD::VP_ADD:
81778183
case ISD::VP_SUB:
81788184
case ISD::VP_MUL:
@@ -13175,6 +13181,20 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
1317513181
return convertFromScalableVector(VT, Result, DAG, Subtarget);
1317613182
}
1317713183

13184+
SDValue
13185+
RISCVTargetLowering::lowerVPSelectConstantFalse(SDValue Op,
13186+
SelectionDAG &DAG) const {
13187+
SDLoc DL(Op);
13188+
MVT VT = Op.getSimpleValueType();
13189+
SDValue TrueVal = Op.getOperand(1);
13190+
SDValue FalseVal = Op.getOperand(2);
13191+
SDValue VL = Op.getOperand(3);
13192+
SDValue Op0 = Op.getOperand(0);
13193+
SDValue Val = DAG.getLogicalNOT(DL, Op0, Op0.getValueType());
13194+
return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Val, FalseVal, TrueVal,
13195+
DAG.getUNDEF(VT), VL);
13196+
}
13197+
1317813198
SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
1317913199
SelectionDAG &DAG) const {
1318013200
SDLoc DL(Op);

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ class RISCVTargetLowering : public TargetLowering {
486486
SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
487487

488488
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
489+
SDValue lowerVPSelectConstantFalse(SDValue Op, SelectionDAG &DAG) const;
489490
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
490491
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
491492
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ define <vscale x 1 x i8> @masked_load_passthru_nxv1i8(ptr %a, <vscale x 1 x i1>
3434
; ZVE32: # %bb.0:
3535
; ZVE32-NEXT: csrr a1, vlenb
3636
; ZVE32-NEXT: srli a1, a1, 3
37-
; ZVE32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
38-
; ZVE32-NEXT: vmv.v.i v8, 0
39-
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
37+
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
4038
; ZVE32-NEXT: vle8.v v8, (a0), v0.t
39+
; ZVE32-NEXT: vmnot.m v0, v0
40+
; ZVE32-NEXT: vmerge.vim v8, v8, 0, v0
4141
; ZVE32-NEXT: ret
4242
%load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> zeroinitializer)
4343
ret <vscale x 1 x i8> %load

llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -483,10 +483,10 @@ define <vscale x 2 x i64> @select_nxv2i64_constant_true(<vscale x 2 x i1> %a, <v
483483
define <vscale x 2 x i64> @select_nxv2i64_constant_false(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, i32 zeroext %evl) {
484484
; CHECK-LABEL: select_nxv2i64_constant_false:
485485
; CHECK: # %bb.0:
486-
; CHECK-NEXT: li a1, 100
487486
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
488-
; CHECK-NEXT: vmv.v.x v10, a1
489-
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
487+
; CHECK-NEXT: vmnot.m v0, v0
488+
; CHECK-NEXT: li a0, 100
489+
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
490490
; CHECK-NEXT: ret
491491
%v = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> splat (i64 100), i32 %evl)
492492
ret <vscale x 2 x i64> %v

0 commit comments

Comments
 (0)