Skip to content

Commit c7d1eae

Browse files
authored
[RISCV] Use masked segment LD/ST intrinsics in (de)interleaveN lowering [nfc] (#148966)
Follow up on the work from e5bc7e7, and extend it to the lowering used for interleave and deinterleave when we can't combine with a nearby memory operation.
1 parent 0f1b16d commit c7d1eae

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11968,7 +11968,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
1196811968

1196911969
// Store with unit-stride store and load it back with segmented load.
1197011970
MVT XLenVT = Subtarget.getXLenVT();
11971-
SDValue VL = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget).second;
11971+
auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
1197211972
SDValue Passthru = DAG.getUNDEF(ConcatVT);
1197311973

1197411974
// Allocate a stack slot.
@@ -11989,16 +11989,20 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
1198911989
MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer());
1199011990

1199111991
static const Intrinsic::ID VlsegIntrinsicsIds[] = {
11992-
Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, Intrinsic::riscv_vlseg4,
11993-
Intrinsic::riscv_vlseg5, Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
11994-
Intrinsic::riscv_vlseg8};
11992+
Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11993+
Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11994+
Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11995+
Intrinsic::riscv_vlseg8_mask};
1199511996

1199611997
SDValue LoadOps[] = {
1199711998
Chain,
1199811999
DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
1199912000
Passthru,
1200012001
StackPtr,
12002+
Mask,
1200112003
VL,
12004+
DAG.getTargetConstant(
12005+
RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC, DL, XLenVT),
1200212006
DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
1200312007

1200412008
unsigned Sz =
@@ -12050,7 +12054,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
1205012054
}
1205112055

1205212056
MVT XLenVT = Subtarget.getXLenVT();
12053-
SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
12057+
auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
1205412058

1205512059
// If the VT is larger than LMUL=8, we need to split and reassemble.
1205612060
if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
@@ -12099,10 +12103,10 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
1209912103
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
1210012104

1210112105
static const Intrinsic::ID IntrIds[] = {
12102-
Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
12103-
Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
12104-
Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
12105-
Intrinsic::riscv_vsseg8,
12106+
Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12107+
Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12108+
Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12109+
Intrinsic::riscv_vsseg8_mask,
1210612110
};
1210712111

1210812112
unsigned Sz =
@@ -12118,6 +12122,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
1211812122
DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
1211912123
StoredVal,
1212012124
StackPtr,
12125+
Mask,
1212112126
VL,
1212212127
DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()),
1212312128
DL, XLenVT)};

0 commit comments

Comments
 (0)