Skip to content

Commit 3151db1

Browse files
committed
[RISCV] Combine VP_SELECT constant false to use vmerge.vxm/vmerge.vim
Currently, when the false path of a vp_select is a splat vector, it is lowered to a vmv_v_x/vmv_v_i. The vmv is hoisted out of the loop and the whole copy in loop body by MachineLICM. By inverting the mask register and swapping the true and false values in the vp_select, we can eliminate some instructions inside the loop. corrent: https://godbolt.org/z/EnGMn3xeM expected similar form: https://godbolt.org/z/nWhGM6Ej5
1 parent e14f327 commit 3151db1

File tree

3 files changed

+27
-15
lines changed

3 files changed

+27
-15
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1628,15 +1628,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
16281628
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
16291629
if (Subtarget.hasVInstructions())
16301630
setTargetDAGCombine(
1631-
{ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1632-
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1633-
ISD::SRL, ISD::SHL, ISD::STORE,
1634-
ISD::SPLAT_VECTOR, ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1635-
ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1636-
ISD::MUL, ISD::SDIV, ISD::UDIV,
1637-
ISD::SREM, ISD::UREM, ISD::INSERT_VECTOR_ELT,
1638-
ISD::ABS, ISD::CTPOP, ISD::VECTOR_SHUFFLE,
1639-
ISD::VSELECT, ISD::VECREDUCE_ADD});
1631+
{ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1632+
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1633+
ISD::SRL, ISD::SHL, ISD::STORE,
1634+
ISD::SPLAT_VECTOR, ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1635+
ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1636+
ISD::MUL, ISD::SDIV, ISD::UDIV,
1637+
ISD::SREM, ISD::UREM, ISD::INSERT_VECTOR_ELT,
1638+
ISD::ABS, ISD::CTPOP, ISD::VECTOR_SHUFFLE,
1639+
ISD::VSELECT, ISD::VECREDUCE_ADD, ISD::VP_SELECT});
16401640

16411641
if (Subtarget.hasVendorXTHeadMemPair())
16421642
setTargetDAGCombine({ISD::LOAD, ISD::STORE});
@@ -19725,6 +19725,18 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1972519725
return performSELECTCombine(N, DAG, Subtarget);
1972619726
case ISD::VSELECT:
1972719727
return performVSELECTCombine(N, DAG);
19728+
case ISD::VP_SELECT: {
19729+
if (SDValue Op2 = N->getOperand(2);
19730+
Op2.hasOneUse() && (Op2.getOpcode() == ISD::SPLAT_VECTOR ||
19731+
Op2.getOpcode() == ISD::SPLAT_VECTOR_PARTS)) {
19732+
SDLoc DL(N);
19733+
SDValue Op0 = N->getOperand(0);
19734+
SDValue Val = DAG.getLogicalNOT(DL, Op0, Op0.getValueType());
19735+
return DAG.getNode(ISD::VP_SELECT, DL, N->getValueType(0), Val,
19736+
N->getOperand(2), N->getOperand(1), N->getOperand(3));
19737+
}
19738+
return SDValue();
19739+
}
1972819740
case RISCVISD::CZERO_EQZ:
1972919741
case RISCVISD::CZERO_NEZ: {
1973019742
SDValue Val = N->getOperand(0);

llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ define <vscale x 1 x i8> @masked_load_passthru_nxv1i8(ptr %a, <vscale x 1 x i1>
3434
; ZVE32: # %bb.0:
3535
; ZVE32-NEXT: csrr a1, vlenb
3636
; ZVE32-NEXT: srli a1, a1, 3
37-
; ZVE32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
38-
; ZVE32-NEXT: vmv.v.i v8, 0
39-
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
37+
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
4038
; ZVE32-NEXT: vle8.v v8, (a0), v0.t
39+
; ZVE32-NEXT: vmnot.m v0, v0
40+
; ZVE32-NEXT: vmerge.vim v8, v8, 0, v0
4141
; ZVE32-NEXT: ret
4242
%load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> zeroinitializer)
4343
ret <vscale x 1 x i8> %load

llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -483,10 +483,10 @@ define <vscale x 2 x i64> @select_nxv2i64_constant_true(<vscale x 2 x i1> %a, <v
483483
define <vscale x 2 x i64> @select_nxv2i64_constant_false(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, i32 zeroext %evl) {
484484
; CHECK-LABEL: select_nxv2i64_constant_false:
485485
; CHECK: # %bb.0:
486-
; CHECK-NEXT: li a1, 100
487486
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
488-
; CHECK-NEXT: vmv.v.x v10, a1
489-
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
487+
; CHECK-NEXT: vmnot.m v0, v0
488+
; CHECK-NEXT: li a0, 100
489+
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
490490
; CHECK-NEXT: ret
491491
%v = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> splat (i64 100), i32 %evl)
492492
ret <vscale x 2 x i64> %v

0 commit comments

Comments
 (0)