From c72f8ba193796c77f4c145ab9d25dba3858c1686 Mon Sep 17 00:00:00 2001 From: wangboyao Date: Wed, 9 Jul 2025 14:39:05 +0800 Subject: [PATCH 1/2] [TargetLowering] Change getOptimalMemOpType and findOptimalMemOpLowering to take LLVM Context --- llvm/include/llvm/CodeGen/TargetLowering.h | 7 ++++--- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 7 ++++--- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 7 ++++--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 3 ++- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 ++- llvm/lib/Target/AMDGPU/SIISelLowering.h | 2 +- llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 ++-- llvm/lib/Target/ARM/ARMISelLowering.h | 2 +- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 6 +++--- llvm/lib/Target/BPF/BPFISelLowering.h | 2 +- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 3 ++- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 2 +- llvm/lib/Target/Mips/MipsISelLowering.cpp | 3 ++- llvm/lib/Target/Mips/MipsISelLowering.h | 2 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 ++- llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 +++--- llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 14 ++++++++------ llvm/lib/Target/SystemZ/SystemZISelLowering.h | 7 ++++--- llvm/lib/Target/X86/X86ISelLowering.h | 2 +- llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 3 ++- 23 files changed, 53 insertions(+), 41 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index fee94cc167363..acc683baef7bd 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2017,7 +2017,7 @@ class LLVM_ABI TargetLoweringBase { /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(const MemOp &Op, + getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList & /*FuncAttributes*/) const { return MVT::Other; } @@ -4118,8 +4118,9 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase { /// It returns the types of the sequence of memory ops to perform /// memset / memcpy by reference. virtual bool - findOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, - const MemOp &Op, unsigned DstAS, unsigned SrcAS, + findOptimalMemOpLowering(LLVMContext &Context, std::vector &MemOps, + unsigned Limit, const MemOp &Op, unsigned DstAS, + unsigned SrcAS, const AttributeList &FuncAttributes) const; /// Check to see if the specified operand of the specified instruction is a diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c1356239ad206..5a4cc466d2bce 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8410,7 +8410,7 @@ static SDValue getMemcpyLoadsAndStores( : MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign, isVol, CopyFromConstant); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Op, DstPtrInfo.getAddrSpace(), + C, MemOps, Limit, Op, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); @@ -8602,7 +8602,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, assert(SrcAlign && "SrcAlign must be set"); unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, + C, MemOps, Limit, MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign, /*IsVolatile*/ true), DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), @@ -8711,6 +8711,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector MemOps; bool DstAlignCanChange = false; + LLVMContext &C = *DAG.getContext(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); bool OptSize = shouldLowerMemFuncForSize(MF, DAG); @@ -8721,7 +8722,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, + C, MemOps, Limit, MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes())) return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 45ab7526c3a32..dee5a3d1263bd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -210,13 +210,14 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, } bool TargetLowering::findOptimalMemOpLowering( - std::vector &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, - unsigned SrcAS, const AttributeList &FuncAttributes) const { + LLVMContext &Context, std::vector &MemOps, unsigned Limit, + const MemOp &Op, unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes) const { if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) return false; - EVT VT = getOptimalMemOpType(Op, FuncAttributes); + EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes); if (VT == MVT::Other) { // Use the largest integer type whose alignment constraints are satisfied. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6b7e9357aab5a..de8f87fbde2be 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17593,7 +17593,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore( } EVT AArch64TargetLowering::getOptimalMemOpType( - const MemOp &Op, const AttributeList &FuncAttributes) const { + LLVMContext &Context, const MemOp &Op, + const AttributeList &FuncAttributes) const { bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat); bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat; bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 89f90ee2b7707..65fe08e92c235 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -233,7 +233,7 @@ class AArch64TargetLowering : public TargetLowering { bool shouldConsiderGEPOffsetSplit() const override; - EVT getOptimalMemOpType(const MemOp &Op, + EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override; LLT getOptimalMemOpLLT(const MemOp &Op, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0f70792cc03f4..e2a10be4c2c7b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1983,7 +1983,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses( } EVT SITargetLowering::getOptimalMemOpType( - const MemOp &Op, const AttributeList &FuncAttributes) const { + LLVMContext &Context, const MemOp &Op, + const AttributeList &FuncAttributes) const { // FIXME: Should account for address space here. // The default fallback uses the private pointer size as a guess for a type to diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index c66f300ec4cb1..acf6158572a4d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -357,7 +357,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { MachineMemOperand::Flags Flags = MachineMemOperand::MONone, unsigned *IsFast = nullptr) const override; - EVT getOptimalMemOpType(const MemOp &Op, + EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override; bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 6b85e62d2eb8b..6e8935dc302d2 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -19242,9 +19242,9 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, return false; } - EVT ARMTargetLowering::getOptimalMemOpType( - const MemOp &Op, const AttributeList &FuncAttributes) const { + LLVMContext &Context, const MemOp &Op, + const AttributeList &FuncAttributes) const { // See if we can use NEON instructions for this... if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() && !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 604910e04d4cc..5f4aef55b22c9 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -472,7 +472,7 @@ class VectorType; MachineMemOperand::Flags Flags, unsigned *Fast) const override; - EVT getOptimalMemOpType(const MemOp &Op, + EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override; bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 203fb76d7be86..05d4069a686ab 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1213,9 +1213,9 @@ int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const { // loaded and stored. That's why we multiply the number of elements by 2 to // get the cost for this memcpy. std::vector MemOps; - if (getTLI()->findOptimalMemOpLowering( - MemOps, Limit, MOp, DstAddrSpace, - SrcAddrSpace, F->getAttributes())) + LLVMContext &C = F->getContext(); + if (getTLI()->findOptimalMemOpLowering(C, MemOps, Limit, MOp, DstAddrSpace, + SrcAddrSpace, F->getAttributes())) return MemOps.size() * Factor; // If we can't find an optimal memop lowering, return the default cost diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index 23cbce7094e6b..8f60261c10e9e 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -114,7 +114,7 @@ class BPFTargetLowering : public TargetLowering { void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; - EVT getOptimalMemOpType(const MemOp &Op, + EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override { return Op.size() >= 8 ? MVT::i64 : MVT::i32; } diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 5243f3bb1bf3c..acd5b58c48785 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3814,7 +3814,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization( /// does not need to be loaded. It returns EVT::Other if the type should be /// determined using generic target-independent logic. EVT HexagonTargetLowering::getOptimalMemOpType( - const MemOp &Op, const AttributeList &FuncAttributes) const { + LLVMContext &Context, const MemOp &Op, + const AttributeList &FuncAttributes) const { if (Op.size() >= 8 && Op.isAligned(Align(8))) return MVT::i64; if (Op.size() >= 4 && Op.isAligned(Align(4))) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 5e5f3ab78dcd7..f9e5478f457f8 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -336,7 +336,7 @@ class HexagonTargetLowering : public TargetLowering { /// the immediate into a register. bool isLegalICmpImmediate(int64_t Imm) const override; - EVT getOptimalMemOpType(const MemOp &Op, + EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override; bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 74a256e9729b3..0e581a7a16503 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -4519,7 +4519,8 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { } EVT MipsTargetLowering::getOptimalMemOpType( - const MemOp &Op, const AttributeList &FuncAttributes) const { + LLVMContext &Context, const MemOp &Op, + const AttributeList &FuncAttributes) const { if (Subtarget.hasMips64()) return MVT::i64; diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 241e9343ae384..31ac5d4c185bc 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -698,7 +698,7 @@ class TargetRegisterClass; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - EVT getOptimalMemOpType(const MemOp &Op, + EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override; /// isFPImmLegal - Returns true if the target can instruction select the diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index b96505816dee8..459525ed4ee9a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -18239,7 +18239,8 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType( - const MemOp &Op, const AttributeList &FuncAttributes) const { + LLVMContext &Context, const MemOp &Op, + const AttributeList &FuncAttributes) const { if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) { // We should use Altivec/VSX loads and stores when available. For unaligned // addresses, unaligned VSX loads are only fast starting with the P8. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 4c88bd372b106..124c7116dc3b5 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1088,7 +1088,7 @@ namespace llvm { /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. - EVT getOptimalMemOpType(const MemOp &Op, + EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override; /// Is unaligned memory access allowed for the given type, and is it fast diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 98b613d9cc856..35692bda5388a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -23773,9 +23773,9 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( return Subtarget.enableUnalignedVectorMem(); } - -EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op, - const AttributeList &FuncAttributes) const { +EVT RISCVTargetLowering::getOptimalMemOpType( + LLVMContext &Context, const MemOp &Op, + const AttributeList &FuncAttributes) const { if (!Subtarget.hasVInstructions()) return MVT::Other; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index f67d7f155c9d0..bcbda30342b80 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -331,7 +331,7 @@ class RISCVTargetLowering : public TargetLowering { MachineMemOperand::Flags Flags = MachineMemOperand::MONone, unsigned *Fast = nullptr) const override; - EVT getOptimalMemOpType(const MemOp &Op, + EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override; bool splitValueIntoRegisterParts( diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 262b607b953cc..8b868c3950a94 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1423,8 +1423,9 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, } bool SystemZTargetLowering::findOptimalMemOpLowering( - std::vector &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, - unsigned SrcAS, const AttributeList &FuncAttributes) const { + LLVMContext &Context, std::vector &MemOps, unsigned Limit, + const MemOp &Op, unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes) const { const int MVCFastLen = 16; if (Limit != ~unsigned(0)) { @@ -1437,12 +1438,13 @@ bool SystemZTargetLowering::findOptimalMemOpLowering( return false; // Memset zero: Use XC } - return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS, - SrcAS, FuncAttributes); + return TargetLowering::findOptimalMemOpLowering(Context, MemOps, Limit, Op, + DstAS, SrcAS, FuncAttributes); } -EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, - const AttributeList &FuncAttributes) const { +EVT SystemZTargetLowering::getOptimalMemOpType( + LLVMContext &Context, const MemOp &Op, + const AttributeList &FuncAttributes) const { return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; } diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index f2f0bf6d8b410..1866962e17587 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -510,10 +510,11 @@ class SystemZTargetLowering : public TargetLowering { MachineMemOperand::Flags Flags, unsigned *Fast) const override; bool - findOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, - const MemOp &Op, unsigned DstAS, unsigned SrcAS, + findOptimalMemOpLowering(LLVMContext &Context, std::vector &MemOps, + unsigned Limit, const MemOp &Op, unsigned DstAS, + unsigned SrcAS, const AttributeList &FuncAttributes) const override; - EVT getOptimalMemOpType(const MemOp &Op, + EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override; bool isTruncateFree(Type *, Type *) const override; bool isTruncateFree(EVT, EVT) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 5cb6b3e493a32..3039b7eeb38ff 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1095,7 +1095,7 @@ namespace llvm { /// 4-byte boundaries. Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override; - EVT getOptimalMemOpType(const MemOp &Op, + EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override; /// Returns true if it's safe to use load / store of the diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index cb38a39ff991d..9ad355311527b 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -287,7 +287,8 @@ Align X86TargetLowering::getByValTypeAlignment(Type *Ty, /// For vector ops we check that the overall size isn't larger than our /// preferred vector width. EVT X86TargetLowering::getOptimalMemOpType( - const MemOp &Op, const AttributeList &FuncAttributes) const { + LLVMContext &Context, const MemOp &Op, + const AttributeList &FuncAttributes) const { if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) { if (Op.size() >= 16 && (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) { From 1f7cbd91aa262f1ab1a1805d3d22a9e6bf328090 Mon Sep 17 00:00:00 2001 From: wangboyao Date: Wed, 9 Jul 2025 20:29:22 +0800 Subject: [PATCH 2/2] [RISCV] Add optimization for memset inline --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 25 +++- llvm/test/CodeGen/RISCV/pr135206.ll | 39 ++---- llvm/test/CodeGen/RISCV/rvv/memset-inline.ll | 137 +++++++------------ llvm/test/CodeGen/RISCV/rvv/pr83017.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/pr90559.ll | 10 +- 5 files changed, 91 insertions(+), 130 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 35692bda5388a..e44c01a35dbac 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1664,7 +1664,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive(); MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true); - MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false); + MaxStoresPerMemset = Subtarget.hasVInstructions() + ? (Subtarget.getRealMinVLen() / 8 * + Subtarget.getMaxLMULForFixedLengthVectors() / + (Subtarget.is64Bit() ? 8 : 4)) + : Subtarget.getMaxStoresPerMemset(/*OptSize=*/false); MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy(); MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true); @@ -23808,8 +23812,23 @@ EVT RISCVTargetLowering::getOptimalMemOpType( // a large scalar constant and instead use vmv.v.x/i to do the // broadcast. For everything else, prefer ELenVT to minimize VL and thus // maximize the chance we can encode the size in the vsetvli. - MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen()); - MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT; + // If Op size is greater than LMUL8 memory operation, we don't support inline + // of memset. Return EVT based on Op size to avoid redundant splitting and + // merging operations if Op size is no greater than LMUL8 memory operation. + if (Op.isMemset()) { + if (!Op.isZeroMemset()) + return EVT::getVectorVT(Context, MVT::i8, Op.size()); + if (Op.size() > + Subtarget.getMaxLMULForFixedLengthVectors() * MinVLenInBytes) + return MVT::Other; + if (Subtarget.hasVInstructionsI64() && Op.size() % 8 == 0) + return EVT::getVectorVT(Context, MVT::i64, Op.size() / 8); + if (Op.size() % 4 == 0) + return EVT::getVectorVT(Context, MVT::i32, Op.size() / 4); + return EVT::getVectorVT(Context, MVT::i8, Op.size()); + } + + MVT PreferredVT = MVT::getIntegerVT(Subtarget.getELen()); // Do we have sufficient alignment for our preferred VT? If not, revert // to largest size allowed by our alignment criteria. diff --git a/llvm/test/CodeGen/RISCV/pr135206.ll b/llvm/test/CodeGen/RISCV/pr135206.ll index 75b11c373895b..ab921cbc03011 100644 --- a/llvm/test/CodeGen/RISCV/pr135206.ll +++ b/llvm/test/CodeGen/RISCV/pr135206.ll @@ -13,8 +13,6 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" { ; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 2000(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s3, 1992(sp) # 8-byte Folded Spill ; CHECK-NEXT: lui a0, 7 ; CHECK-NEXT: sub t1, sp, a0 ; CHECK-NEXT: lui t2, 1 @@ -24,8 +22,9 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" { ; CHECK-NEXT: bne sp, t1, .LBB0_1 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: addi sp, sp, -2048 -; CHECK-NEXT: addi sp, sp, -96 +; CHECK-NEXT: addi sp, sp, -80 ; CHECK-NEXT: csrr t1, vlenb +; CHECK-NEXT: slli t1, t1, 2 ; CHECK-NEXT: lui t2, 1 ; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, t2 @@ -34,45 +33,35 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" { ; CHECK-NEXT: bge t1, t2, .LBB0_3 ; CHECK-NEXT: # %bb.4: ; CHECK-NEXT: sub sp, sp, t1 -; CHECK-NEXT: li a0, 86 -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: addi s1, sp, 32 -; CHECK-NEXT: addi s2, sp, 16 -; CHECK-NEXT: lui a1, 353637 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: li a1, 86 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-NEXT: addi a0, a1, 1622 -; CHECK-NEXT: vse8.v v8, (s0) +; CHECK-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill +; CHECK-NEXT: li s0, 56 +; CHECK-NEXT: addi s1, sp, 16 +; CHECK-NEXT: vsetvli zero, s0, e8, m4, ta, ma ; CHECK-NEXT: vse8.v v8, (s1) -; CHECK-NEXT: vse8.v v8, (s2) -; CHECK-NEXT: slli a1, a0, 32 -; CHECK-NEXT: add s3, a0, a1 -; CHECK-NEXT: sd s3, 64(sp) ; CHECK-NEXT: call bar ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vse8.v v8, (s0) +; CHECK-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload +; CHECK-NEXT: vsetvli zero, s0, e8, m4, ta, ma ; CHECK-NEXT: vse8.v v8, (s1) -; CHECK-NEXT: vse8.v v8, (s2) -; CHECK-NEXT: sd s3, 64(sp) ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: lui a1, 8 -; CHECK-NEXT: addi a1, a1, -1952 +; CHECK-NEXT: addi a1, a1, -1968 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 2000(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s3, 1992(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 2032 ; CHECK-NEXT: ret %1 = alloca %"buff", align 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll index 896394017b6f1..2e31e947cf195 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -169,19 +169,17 @@ define void @memset_16(ptr %a, i8 %value) nounwind { define void @memset_32(ptr %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: memset_32: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a2, a0, 16 -; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-BOTH-NEXT: li a2, 32 +; RV32-BOTH-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: vse8.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: memset_32: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a2, a0, 16 -; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-BOTH-NEXT: li a2, 32 +; RV64-BOTH-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: vse8.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0) @@ -191,27 +189,17 @@ define void @memset_32(ptr %a, i8 %value) nounwind { define void @memset_64(ptr %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: memset_64: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a2, a0, 48 -; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-BOTH-NEXT: li a2, 64 +; RV32-BOTH-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse8.v v8, (a2) -; RV32-BOTH-NEXT: addi a2, a0, 16 -; RV32-BOTH-NEXT: vse8.v v8, (a1) -; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: vse8.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: memset_64: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a2, a0, 48 -; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-BOTH-NEXT: li a2, 64 +; RV64-BOTH-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse8.v v8, (a2) -; RV64-BOTH-NEXT: addi a2, a0, 16 -; RV64-BOTH-NEXT: vse8.v v8, (a1) -; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: vse8.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 64, i1 0) @@ -309,19 +297,17 @@ define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind { define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: aligned_memset_32: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a2, a0, 16 -; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-BOTH-NEXT: li a2, 32 +; RV32-BOTH-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: vse8.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_memset_32: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a2, a0, 16 -; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-BOTH-NEXT: li a2, 32 +; RV64-BOTH-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: vse8.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0) @@ -331,27 +317,17 @@ define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind { define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: aligned_memset_64: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a2, a0, 48 -; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-BOTH-NEXT: li a2, 64 +; RV32-BOTH-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse8.v v8, (a2) -; RV32-BOTH-NEXT: addi a2, a0, 16 -; RV32-BOTH-NEXT: vse8.v v8, (a1) -; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: vse8.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_memset_64: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a2, a0, 48 -; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-BOTH-NEXT: li a2, 64 +; RV64-BOTH-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse8.v v8, (a2) -; RV64-BOTH-NEXT: addi a2, a0, 16 -; RV64-BOTH-NEXT: vse8.v v8, (a1) -; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: vse8.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 %value, i64 64, i1 0) @@ -472,15 +448,17 @@ define void @bzero_8(ptr %a) nounwind { define void @bzero_16(ptr %a) nounwind { ; RV32-LABEL: bzero_16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; @@ -504,38 +482,34 @@ define void @bzero_16(ptr %a) nounwind { define void @bzero_32(ptr %a) nounwind { ; RV32-LABEL: bzero_32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vse8.v v8, (a0) -; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_32: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-FAST-NEXT: vmv.v.i v8, 0 ; RV32-FAST-NEXT: vse64.v v8, (a0) -; RV32-FAST-NEXT: addi a0, a0, 16 -; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_32: ; RV64-FAST: # %bb.0: -; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-FAST-NEXT: vmv.v.i v8, 0 ; RV64-FAST-NEXT: vse64.v v8, (a0) -; RV64-FAST-NEXT: addi a0, a0, 16 -; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0) ret void @@ -544,17 +518,19 @@ define void @bzero_32(ptr %a) nounwind { define void @bzero_64(ptr %a) nounwind { ; RV32-LABEL: bzero_64: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a1, 64 ; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_64: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: li a1, 64 ; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; @@ -642,20 +618,16 @@ define void @aligned_bzero_16(ptr %a) nounwind { define void @aligned_bzero_32(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_32: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 ; RV32-BOTH-NEXT: vse64.v v8, (a0) -; RV32-BOTH-NEXT: addi a0, a0, 16 -; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_32: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 ; RV64-BOTH-NEXT: vse64.v v8, (a0) -; RV64-BOTH-NEXT: addi a0, a0, 16 -; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0) ret void @@ -682,18 +654,22 @@ define void @aligned_bzero_64(ptr %a) nounwind { define void @aligned_bzero_66(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_66: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: sh zero, 64(a0) -; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-BOTH-NEXT: li a1, 128 +; RV32-BOTH-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a0) +; RV32-BOTH-NEXT: li a1, 66 +; RV32-BOTH-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV32-BOTH-NEXT: vse8.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_66: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: sh zero, 64(a0) -; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-BOTH-NEXT: li a1, 128 +; RV64-BOTH-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a0) +; RV64-BOTH-NEXT: li a1, 66 +; RV64-BOTH-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV64-BOTH-NEXT: vse8.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 66, i1 0) ret void @@ -702,27 +678,15 @@ define void @aligned_bzero_66(ptr %a) nounwind { define void @aligned_bzero_96(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_96: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: addi a1, a0, 80 -; RV32-BOTH-NEXT: vse64.v v8, (a0) -; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-BOTH-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: addi a0, a0, 64 -; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_96: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-BOTH-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: addi a1, a0, 80 -; RV64-BOTH-NEXT: vse64.v v8, (a0) -; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: addi a0, a0, 64 -; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 96, i1 0) @@ -750,11 +714,12 @@ define void @aligned_bzero_128(ptr %a) nounwind { define void @aligned_bzero_256(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_256: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-BOTH-NEXT: addi a1, a0, 128 +; RV32-BOTH-NEXT: li a2, 32 +; RV32-BOTH-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a0) -; RV32-BOTH-NEXT: addi a0, a0, 128 -; RV32-BOTH-NEXT: vse64.v v8, (a0) +; RV32-BOTH-NEXT: vse32.v v8, (a1) +; RV32-BOTH-NEXT: vse32.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_256: diff --git a/llvm/test/CodeGen/RISCV/rvv/pr83017.ll b/llvm/test/CodeGen/RISCV/rvv/pr83017.ll index beca480378a35..7a450397a30ee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr83017.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr83017.ll @@ -31,15 +31,9 @@ define void @aliasing(ptr %p) { ; CHECK-LABEL: aliasing: ; CHECK: # %bb.0: ; CHECK-NEXT: lw a1, 84(a0) -; CHECK-NEXT: addi a2, a0, 80 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vs1r.v v8, (a2) -; CHECK-NEXT: addi a2, a0, 64 -; CHECK-NEXT: vs1r.v v8, (a2) -; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vs4r.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: sw a1, 84(a0) ; CHECK-NEXT: ret %q = getelementptr inbounds i8, ptr %p, i64 84 diff --git a/llvm/test/CodeGen/RISCV/rvv/pr90559.ll b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll index 7e109f307c4a5..97a3e6f2f6f58 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr90559.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll @@ -28,15 +28,9 @@ define void @f(ptr %p) vscale_range(2,2) { ; CHECK-LABEL: f: ; CHECK: # %bb.0: ; CHECK-NEXT: lw a1, 84(a0) -; CHECK-NEXT: addi a2, a0, 80 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vs1r.v v8, (a2) -; CHECK-NEXT: addi a2, a0, 64 -; CHECK-NEXT: vs1r.v v8, (a2) -; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vs4r.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: sw a1, 84(a0) ; CHECK-NEXT: ret %q = getelementptr inbounds i8, ptr %p, i64 84