Skip to content

[RISCV] Add optimization for memset inline #146673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -2017,7 +2017,7 @@ class LLVM_ABI TargetLoweringBase {
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
virtual EVT
getOptimalMemOpType(const MemOp &Op,
getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList & /*FuncAttributes*/) const {
return MVT::Other;
}
Comment on lines 2019 to 2023
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you precommit these signature changes

Expand Down Expand Up @@ -4118,8 +4118,9 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
virtual bool
findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
const MemOp &Op, unsigned DstAS, unsigned SrcAS,
findOptimalMemOpLowering(LLVMContext &Context, std::vector<EVT> &MemOps,
unsigned Limit, const MemOp &Op, unsigned DstAS,
unsigned SrcAS,
const AttributeList &FuncAttributes) const;

/// Check to see if the specified operand of the specified instruction is a
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8410,7 +8410,7 @@ static SDValue getMemcpyLoadsAndStores(
: MemOp::Copy(Size, DstAlignCanChange, Alignment,
*SrcAlign, isVol, CopyFromConstant);
if (!TLI.findOptimalMemOpLowering(
MemOps, Limit, Op, DstPtrInfo.getAddrSpace(),
C, MemOps, Limit, Op, DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
return SDValue();

Expand Down Expand Up @@ -8602,7 +8602,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
assert(SrcAlign && "SrcAlign must be set");
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
if (!TLI.findOptimalMemOpLowering(
MemOps, Limit,
C, MemOps, Limit,
MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign,
/*IsVolatile*/ true),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
Expand Down Expand Up @@ -8711,6 +8711,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
LLVMContext &C = *DAG.getContext();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
Expand All @@ -8721,7 +8722,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize);

if (!TLI.findOptimalMemOpLowering(
MemOps, Limit,
C, MemOps, Limit,
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
return SDValue();
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,14 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
}

bool TargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
unsigned SrcAS, const AttributeList &FuncAttributes) const {
LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
const MemOp &Op, unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const {
if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() < Op.getDstAlign())
return false;

EVT VT = getOptimalMemOpType(Op, FuncAttributes);
EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);

if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17593,7 +17593,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
}

EVT AArch64TargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const {
bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ class AArch64TargetLowering : public TargetLowering {

bool shouldConsiderGEPOffsetSplit() const override;

EVT getOptimalMemOpType(const MemOp &Op,
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const override;

LLT getOptimalMemOpLLT(const MemOp &Op,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1983,7 +1983,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
}

EVT SITargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const {
// FIXME: Should account for address space here.

// The default fallback uses the private pointer size as a guess for a type to
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *IsFast = nullptr) const override;

EVT getOptimalMemOpType(const MemOp &Op,
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const override;

bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19242,9 +19242,9 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
return false;
}


EVT ARMTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const {
// See if we can use NEON instructions for this...
if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ class VectorType;
MachineMemOperand::Flags Flags,
unsigned *Fast) const override;

EVT getOptimalMemOpType(const MemOp &Op,
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const override;

bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1213,9 +1213,9 @@ int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const {
// loaded and stored. That's why we multiply the number of elements by 2 to
// get the cost for this memcpy.
std::vector<EVT> MemOps;
if (getTLI()->findOptimalMemOpLowering(
MemOps, Limit, MOp, DstAddrSpace,
SrcAddrSpace, F->getAttributes()))
LLVMContext &C = F->getContext();
if (getTLI()->findOptimalMemOpLowering(C, MemOps, Limit, MOp, DstAddrSpace,
SrcAddrSpace, F->getAttributes()))
return MemOps.size() * Factor;

// If we can't find an optimal memop lowering, return the default cost
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/BPF/BPFISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ class BPFTargetLowering : public TargetLowering {
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;

EVT getOptimalMemOpType(const MemOp &Op,
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const override {
return Op.size() >= 8 ? MVT::i64 : MVT::i32;
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3814,7 +3814,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
/// does not need to be loaded. It returns EVT::Other if the type should be
/// determined using generic target-independent logic.
EVT HexagonTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const {
if (Op.size() >= 8 && Op.isAligned(Align(8)))
return MVT::i64;
if (Op.size() >= 4 && Op.isAligned(Align(4)))
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/Hexagon/HexagonISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ class HexagonTargetLowering : public TargetLowering {
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;

EVT getOptimalMemOpType(const MemOp &Op,
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const override;

bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/Mips/MipsISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4519,7 +4519,8 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
}

EVT MipsTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const {
if (Subtarget.hasMips64())
return MVT::i64;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/Mips/MipsISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@ class TargetRegisterClass;

bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;

EVT getOptimalMemOpType(const MemOp &Op,
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const override;

/// isFPImmLegal - Returns true if the target can instruction select the
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18239,7 +18239,8 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const {
if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,7 @@ namespace llvm {

/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT getOptimalMemOpType(const MemOp &Op,
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const override;

/// Is unaligned memory access allowed for the given type, and is it fast
Expand Down
31 changes: 25 additions & 6 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1664,7 +1664,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();

MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
MaxStoresPerMemset = Subtarget.hasVInstructions()
? (Subtarget.getRealMinVLen() / 8 *
Subtarget.getMaxLMULForFixedLengthVectors() /
(Subtarget.is64Bit() ? 8 : 4))
: Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);

MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
Expand Down Expand Up @@ -23773,9 +23777,9 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
return Subtarget.enableUnalignedVectorMem();
}


EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const {
EVT RISCVTargetLowering::getOptimalMemOpType(
LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const {
if (!Subtarget.hasVInstructions())
return MVT::Other;

Expand Down Expand Up @@ -23808,8 +23812,23 @@ EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
// a large scalar constant and instead use vmv.v.x/i to do the
// broadcast. For everything else, prefer ELenVT to minimize VL and thus
// maximize the chance we can encode the size in the vsetvli.
MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
// If Op size is greater than LMUL8 memory operation, we don't support inline
// of memset. Return EVT based on Op size to avoid redundant splitting and
// merging operations if Op size is no greater than LMUL8 memory operation.
if (Op.isMemset()) {
if (!Op.isZeroMemset())
return EVT::getVectorVT(Context, MVT::i8, Op.size());
if (Op.size() >
Subtarget.getMaxLMULForFixedLengthVectors() * MinVLenInBytes)
return MVT::Other;
if (Subtarget.hasVInstructionsI64() && Op.size() % 8 == 0)
return EVT::getVectorVT(Context, MVT::i64, Op.size() / 8);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to check for Zve64x or V before using an i64 element type?

if (Op.size() % 4 == 0)
return EVT::getVectorVT(Context, MVT::i32, Op.size() / 4);
return EVT::getVectorVT(Context, MVT::i8, Op.size());
}

MVT PreferredVT = MVT::getIntegerVT(Subtarget.getELen());

// Do we have sufficient alignment for our preferred VT? If not, revert
// to largest size allowed by our alignment criteria.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ class RISCVTargetLowering : public TargetLowering {
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *Fast = nullptr) const override;

EVT getOptimalMemOpType(const MemOp &Op,
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const override;

bool splitValueIntoRegisterParts(
Expand Down
14 changes: 8 additions & 6 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1423,8 +1423,9 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
}

bool SystemZTargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
unsigned SrcAS, const AttributeList &FuncAttributes) const {
LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
const MemOp &Op, unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const {
const int MVCFastLen = 16;

if (Limit != ~unsigned(0)) {
Expand All @@ -1437,12 +1438,13 @@ bool SystemZTargetLowering::findOptimalMemOpLowering(
return false; // Memset zero: Use XC
}

return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
SrcAS, FuncAttributes);
return TargetLowering::findOptimalMemOpLowering(Context, MemOps, Limit, Op,
DstAS, SrcAS, FuncAttributes);
}

EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const {
EVT SystemZTargetLowering::getOptimalMemOpType(
LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const {
return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
}

Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -510,10 +510,11 @@ class SystemZTargetLowering : public TargetLowering {
MachineMemOperand::Flags Flags,
unsigned *Fast) const override;
bool
findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
const MemOp &Op, unsigned DstAS, unsigned SrcAS,
findOptimalMemOpLowering(LLVMContext &Context, std::vector<EVT> &MemOps,
unsigned Limit, const MemOp &Op, unsigned DstAS,
unsigned SrcAS,
const AttributeList &FuncAttributes) const override;
EVT getOptimalMemOpType(const MemOp &Op,
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const override;
bool isTruncateFree(Type *, Type *) const override;
bool isTruncateFree(EVT, EVT) const override;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1095,7 +1095,7 @@ namespace llvm {
/// 4-byte boundaries.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override;

EVT getOptimalMemOpType(const MemOp &Op,
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const override;

/// Returns true if it's safe to use load / store of the
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86ISelLoweringCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,8 @@ Align X86TargetLowering::getByValTypeAlignment(Type *Ty,
/// For vector ops we check that the overall size isn't larger than our
/// preferred vector width.
EVT X86TargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
LLVMContext &Context, const MemOp &Op,
const AttributeList &FuncAttributes) const {
if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
if (Op.size() >= 16 &&
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
Expand Down
Loading