From 8664a890ef0567b92b82fd8eee8b69914fac678b Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 8 May 2025 17:38:27 +0000 Subject: [PATCH 1/7] [AArch64] Prepare for split ZPR and PPR area allocation (NFCI) This patch attempts to refactor AArch64FrameLowering to allow the size of the ZPR and PPR areas to be calculated separately. This will be used by a subsequent patch to support allocating ZPRs and PPRs to separate areas. This patch should be an NFC and is split out to make later functional changes easier to spot. --- .../Target/AArch64/AArch64FrameLowering.cpp | 306 ++++++++++++------ .../lib/Target/AArch64/AArch64FrameLowering.h | 12 +- .../AArch64/AArch64MachineFunctionInfo.h | 61 ++-- .../Target/AArch64/AArch64RegisterInfo.cpp | 7 +- 4 files changed, 257 insertions(+), 129 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 409a21a37810f..054347fef4622 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -330,10 +330,13 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); +static StackOffset getZPRStackSize(const MachineFunction &MF); +static StackOffset getPPRStackSize(const MachineFunction &MF); static StackOffset getSVEStackSize(const MachineFunction &MF); static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, bool HasCall = false); static bool requiresSaveVG(const MachineFunction &MF); +static bool hasSVEStackSize(const MachineFunction &MF); // Conservatively, returns true if the function is likely to have SVE vectors // on the stack. This function is safe to be called before callee-saves or @@ -493,10 +496,36 @@ static unsigned getFixedObjectSize(const MachineFunction &MF, } } -/// Returns the size of the entire SVE stackframe (calleesaves + spills). +static unsigned getStackHazardSize(const MachineFunction &MF) { + return MF.getSubtarget().getStreamingHazardSize(); +} + +/// Returns the size of the entire ZPR stackframe (calleesaves + spills). +static StackOffset getZPRStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return StackOffset::getScalable(AFI->getStackSizeZPR()); +} + +/// Returns the size of the entire PPR stackframe (calleesaves + spills). +static StackOffset getPPRStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return StackOffset::getScalable(AFI->getStackSizePPR()); +} + +/// Returns the size of the entire SVE stackframe (PPRs + ZPRs). static StackOffset getSVEStackSize(const MachineFunction &MF) { + return getZPRStackSize(MF) + getPPRStackSize(MF); +} + +static bool hasSVEStackSize(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo(); - return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); + return AFI->getStackSizeZPR() > 0 || AFI->getStackSizePPR() > 0; +} + +/// Returns true if PPRs are spilled as ZPRs. +static bool arePPRsSpilledAsZPR(const MachineFunction &MF) { + return MF.getSubtarget().getRegisterInfo()->getSpillSize( + AArch64::PPRRegClass) == 16; } bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { @@ -524,7 +553,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { !Subtarget.hasSVE(); return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || - getSVEStackSize(MF) || LowerQRegCopyThroughMem); + hasSVEStackSize(MF) || LowerQRegCopyThroughMem); } /// hasFPImpl - Return true if the specified function should have a dedicated @@ -1224,7 +1253,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( // When there is an SVE area on the stack, always allocate the // callee-saves and spills/locals separately. - if (getSVEStackSize(MF)) + if (hasSVEStackSize(MF)) return false; return true; @@ -1668,25 +1697,19 @@ static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } -static unsigned getStackHazardSize(const MachineFunction &MF) { - return MF.getSubtarget().getStreamingHazardSize(); -} - // Convenience function to determine whether I is an SVE callee save. -static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { +static bool IsZPRCalleeSave(MachineBasicBlock::iterator I) { switch (I->getOpcode()) { default: return false; - case AArch64::PTRUE_C_B: case AArch64::LD1B_2Z_IMM: case AArch64::ST1B_2Z_IMM: case AArch64::STR_ZXI: - case AArch64::STR_PXI: case AArch64::LDR_ZXI: - case AArch64::LDR_PXI: - case AArch64::PTRUE_B: case AArch64::CPY_ZPzI_B: case AArch64::CMPNE_PPzZI_B: + case AArch64::PTRUE_C_B: + case AArch64::PTRUE_B: return I->getFlag(MachineInstr::FrameSetup) || I->getFlag(MachineInstr::FrameDestroy); case AArch64::SEH_SavePReg: @@ -1695,6 +1718,22 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { } } +// Convenience function to determine whether I is an SVE predicate callee save. +static bool IsPPRCalleeSave(MachineBasicBlock::iterator I) { + switch (I->getOpcode()) { + default: + return false; + case AArch64::STR_PXI: + case AArch64::LDR_PXI: + return I->getFlag(MachineInstr::FrameSetup) || + I->getFlag(MachineInstr::FrameDestroy); + } +} + +static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { + return IsZPRCalleeSave(I) || IsPPRCalleeSave(I); +} + static void emitShadowCallStackPrologue(const TargetInstrInfo &TII, MachineFunction &MF, MachineBasicBlock &MBB, @@ -1926,8 +1965,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, else AFI->setTaggedBasePointerOffset(MFI.getStackSize()); - const StackOffset &SVEStackSize = getSVEStackSize(MF); - // getStackSize() includes all the locals in its size calculation. We don't // include these locals when computing the stack size of a funclet, as they // are allocated in the parent's stack frame and accessed via the frame @@ -1938,7 +1975,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); - assert(!SVEStackSize && + assert(!hasSVEStackSize(MF) && "unexpected function without stack frame but with SVE objects"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); @@ -2019,7 +2056,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, &HasWinCFI, EmitAsyncCFI); NumBytes -= AFI->getCalleeSavedStackSize(); } else if (CombineSPBump) { - assert(!SVEStackSize && "Cannot combine SP bump with SVE"); + assert(!hasSVEStackSize(MF) && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI, @@ -2278,36 +2315,62 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } } - StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize; MachineBasicBlock::iterator CalleeSavesEnd = MBBI; + StackOffset PPRCalleeSavesSize = + StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize()); + StackOffset ZPRCalleeSavesSize = + StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize()); + StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize; + StackOffset PPRLocalsSize = getPPRStackSize(MF) - PPRCalleeSavesSize; + StackOffset ZPRLocalsSize = getZPRStackSize(MF) - ZPRCalleeSavesSize; + StackOffset CFAOffset = StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes); + if (!FPAfterSVECalleeSaves) { + MachineBasicBlock::iterator ZPRCalleeSavesBegin = MBBI, + ZPRCalleeSavesEnd = MBBI; + MachineBasicBlock::iterator PPRCalleeSavesBegin = MBBI, + PPRCalleeSavesEnd = MBBI; - // Process the SVE callee-saves to determine what space needs to be - // allocated. - if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { - LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize - << "\n"); - SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize); - SVELocalsSize = SVEStackSize - SVECalleeSavesSize; - // Find callee save instructions in frame. - // Note: With FPAfterSVECalleeSaves the callee saves have already been + // Process the SVE callee-saves to determine what space needs to be // allocated. - if (!FPAfterSVECalleeSaves) { - MachineBasicBlock::iterator CalleeSavesBegin = MBBI; - assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction"); - while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator()) + + if (PPRCalleeSavesSize) { + LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = " + << PPRCalleeSavesSize.getScalable() << "\n"); + + PPRCalleeSavesBegin = MBBI; + assert(IsPPRCalleeSave(PPRCalleeSavesBegin) && "Unexpected instruction"); + while (IsPPRCalleeSave(MBBI) && MBBI != MBB.getFirstTerminator()) ++MBBI; - CalleeSavesEnd = MBBI; + PPRCalleeSavesEnd = MBBI; + } - StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes); - // Allocate space for the callee saves (if any). - allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false, - nullptr, EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || LocalsSize); + if (ZPRCalleeSavesSize) { + LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = " + << ZPRCalleeSavesSize.getScalable() << "\n"); + ZPRCalleeSavesBegin = MBBI; + assert(IsZPRCalleeSave(ZPRCalleeSavesBegin) && "Unexpected instruction"); + while (IsZPRCalleeSave(MBBI) && MBBI != MBB.getFirstTerminator()) + ++MBBI; + ZPRCalleeSavesEnd = MBBI; } + + // Allocate space for the callee saves (if any). + StackOffset LocalsSize = + PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes); + MachineBasicBlock::iterator CalleeSavesBegin = + AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin + : ZPRCalleeSavesBegin; + allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false, + nullptr, EmitAsyncCFI && !HasFP, CFAOffset, + MFI.hasVarSizedObjects() || LocalsSize); + + CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd + : PPRCalleeSavesEnd; } + CFAOffset += SVECalleeSavesSize; if (EmitAsyncCFI) @@ -2321,6 +2384,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. + StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize; allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding, SVELocalsSize + StackOffset::getFixed(NumBytes), NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, @@ -2370,7 +2434,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject); } else { StackOffset TotalSize = - SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize()); + getSVEStackSize(MF) + + StackOffset::getFixed((int64_t)MFI.getStackSize()); CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); CFIBuilder.insertCFIInst( createDefCFA(*RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP, @@ -2571,7 +2636,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, } } - const StackOffset &SVEStackSize = getSVEStackSize(MF); + StackOffset SVEStackSize = getSVEStackSize(MF); // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { @@ -2596,7 +2661,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // deallocated. StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize; MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI; - if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { + int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize(); + int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize(); + int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize; + + if (SVECalleeSavedSize) { if (FPAfterSVECalleeSaves) RestoreEnd = MBB.getFirstTerminator(); @@ -2609,7 +2678,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction"); StackOffset CalleeSavedSizeAsOffset = - StackOffset::getScalable(CalleeSavedSize); + StackOffset::getScalable(SVECalleeSavedSize); DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset; DeallocateAfter = CalleeSavedSizeAsOffset; } @@ -2795,7 +2864,9 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, const auto &MFI = MF.getFrameInfo(); int64_t ObjectOffset = MFI.getObjectOffset(FI); - StackOffset SVEStackSize = getSVEStackSize(MF); + StackOffset ZPRStackSize = getZPRStackSize(MF); + StackOffset PPRStackSize = getPPRStackSize(MF); + StackOffset SVEStackSize = ZPRStackSize + PPRStackSize; // For VLA-area objects, just emit an offset at the end of the stack frame. // Whilst not quite correct, these objects do live at the end of the frame and @@ -2896,7 +2967,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( bool isCSR = !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); - const StackOffset &SVEStackSize = getSVEStackSize(MF); + const StackOffset SVEStackSize = getSVEStackSize(MF); // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't @@ -3192,10 +3263,13 @@ static void computeCalleeSaveRegisterPairs( FirstReg = Count - 1; } bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize(); - int ScalableByteOffset = - FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize(); + int ScalableByteOffset = FPAfterSVECalleeSaves + ? 0 + : AFI->getZPRCalleeSavedStackSize() + + AFI->getPPRCalleeSavedStackSize(); bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace(); Register LastReg = 0; + bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex(); // When iterating backwards, the loop condition relies on unsigned wraparound. for (unsigned i = FirstReg; i < Count; i += RegInc) { @@ -3225,7 +3299,7 @@ static void computeCalleeSaveRegisterPairs( } // Add the stack hazard size as we transition from GPR->FPR CSRs. - if (AFI->hasStackHazardSlotIndex() && + if (HasCSHazardPadding && (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) && AArch64InstrInfo::isFpOrNEON(RPI.Reg1)) ByteOffset += StackFillDir * StackHazardSize; @@ -3233,7 +3307,7 @@ static void computeCalleeSaveRegisterPairs( int Scale = TRI->getSpillSize(*RPI.RC); // Add the next reg to the pair if it is in the same register class. - if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) { + if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) { MCRegister NextReg = CSI[i + RegInc].getReg(); bool IsFirst = i == FirstReg; switch (RPI.Type) { @@ -3862,10 +3936,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); - const AArch64Subtarget &Subtarget = MF.getSubtarget(); AArch64FunctionInfo *AFI = MF.getInfo(); unsigned UnspilledCSGPR = AArch64::NoRegister; unsigned UnspilledCSGPRPaired = AArch64::NoRegister; @@ -3986,15 +4061,19 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // Calculates the callee saved stack size. unsigned CSStackSize = 0; - unsigned SVECSStackSize = 0; + unsigned ZPRCSStackSize = 0; + unsigned PPRCSStackSize = 0; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); for (unsigned Reg : SavedRegs.set_bits()) { auto *RC = TRI->getMinimalPhysRegClass(Reg); assert(RC && "expected register class!"); auto SpillSize = TRI->getSpillSize(*RC); - if (AArch64::PPRRegClass.contains(Reg) || - AArch64::ZPRRegClass.contains(Reg)) - SVECSStackSize += SpillSize; + bool IsZPR = AArch64::ZPRRegClass.contains(Reg); + bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg); + if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF))) + ZPRCSStackSize += SpillSize; + else if (IsPPR) + PPRCSStackSize += SpillSize; else CSStackSize += SpillSize; } @@ -4004,6 +4083,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // only 64-bit GPRs can be added to SavedRegs. unsigned NumSavedRegs = SavedRegs.count(); + // Determine if a Hazard slot should be used, and increase the CSStackSize by + // StackHazardSize if so. + determineStackHazardSlot(MF, SavedRegs); + if (AFI->hasStackHazardSlotIndex()) + CSStackSize += getStackHazardSize(MF); + // Increase the callee-saved stack size if the function has streaming mode // changes, as we will need to spill the value of the VG register. // For locally streaming functions, we spill both the streaming and @@ -4016,12 +4101,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, CSStackSize += 8; } - // Determine if a Hazard slot should be used, and increase the CSStackSize by - // StackHazardSize if so. - determineStackHazardSlot(MF, SavedRegs); - if (AFI->hasStackHazardSlotIndex()) - CSStackSize += getStackHazardSize(MF); - // If we must call __arm_get_current_vg in the prologue preserve the LR. if (requiresSaveVG(MF) && !Subtarget.hasSVE()) SavedRegs.set(AArch64::LR); @@ -4042,8 +4121,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, }); // If any callee-saved registers are used, the frame cannot be eliminated. + auto [ZPRLocalStackSize, PPRLocalStackSize] = + estimateSVEStackObjectOffsets(MF); + int64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize; int64_t SVEStackSize = - alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16); + alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16); bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize; // The CSR spill slots have not been allocated yet, so estimateStackSize @@ -4128,7 +4210,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // instructions. AFI->setCalleeSavedStackSize(AlignedCSStackSize); AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize); - AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16)); + + AFI->setZPRCalleeSavedStackSize(ZPRCSStackSize); + AFI->setPPRCalleeSavedStackSize(alignTo(PPRCSStackSize, 16)); } bool AArch64FrameLowering::assignCalleeSavedSpillSlots( @@ -4283,7 +4367,6 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, assert((Max == std::numeric_limits::min() || Max + 1 == CS.getFrameIdx()) && "SVE CalleeSaves are not consecutive"); - Min = std::min(Min, CS.getFrameIdx()); Max = std::max(Max, CS.getFrameIdx()); } @@ -4296,10 +4379,20 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, // Fills in the first and last callee-saved frame indices into // Min/MaxCSFrameIndex, respectively. // Returns the size of the stack. -static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, - int &MinCSFrameIndex, - int &MaxCSFrameIndex, - bool AssignOffsets) { +static SVEStackSizes +determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, + bool SplitSVEObjects = false) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + + int64_t ZPRStack = 0; + int64_t PPRStack = 0; + + auto [ZPROffset, PPROffset] = [&] { + if (SplitSVEObjects) + return std::tie(ZPRStack, PPRStack); + return std::tie(ZPRStack, ZPRStack); + }(); + #ifndef NDEBUG // First process all fixed stack objects. for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) @@ -4308,26 +4401,34 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, "reference."); #endif + auto OffsetForObject = [&](int FI, int64_t &ZPROffset, + int64_t &PPROffset) -> int64_t & { + return MFI.getStackID(FI) == TargetStackID::ScalableVector ? ZPROffset + : PPROffset; + }; + auto Assign = [&MFI](int FI, int64_t Offset) { LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); MFI.setObjectOffset(FI, Offset); }; - int64_t Offset = 0; - // Then process all callee saved slots. + int MinCSFrameIndex, MaxCSFrameIndex; if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { - // Assign offsets to the callee save slots. - for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) { - Offset += MFI.getObjectSize(I); - Offset = alignTo(Offset, MFI.getObjectAlign(I)); - if (AssignOffsets) - Assign(I, -Offset); + for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI) { + int64_t &Offset = OffsetForObject(FI, ZPROffset, PPROffset); + Offset += MFI.getObjectSize(FI); + Offset = alignTo(Offset, MFI.getObjectAlign(FI)); + if (AssignOffsets) { + LLVM_DEBUG(dbgs() << "FI: " << FI << ", Offset: " << -Offset << "\n"); + Assign(FI, -Offset); + } } } - // Ensure that the Callee-save area is aligned to 16bytes. - Offset = alignTo(Offset, Align(16U)); + // Ensure the CS area is 16-byte aligned. + PPROffset = alignTo(PPROffset, Align(16U)); + ZPROffset = alignTo(ZPROffset, Align(16U)); // Create a buffer of SVE objects to allocate and sort it. SmallVector ObjectsToAllocate; @@ -4337,20 +4438,21 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, int StackProtectorFI = -1; if (MFI.hasStackProtectorIndex()) { StackProtectorFI = MFI.getStackProtectorIndex(); - if (MFI.isScalableStackID(StackProtectorFI)) + if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector) ObjectsToAllocate.push_back(StackProtectorFI); } - for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { - if (!MFI.isScalableStackID(I)) - continue; - if (I == StackProtectorFI) + + for (int FI = 0, E = MFI.getObjectIndexEnd(); FI != E; ++FI) { + if (FI == StackProtectorFI || MFI.isDeadObjectIndex(FI)) continue; - if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex) + if (MaxCSFrameIndex >= FI && FI >= MinCSFrameIndex) continue; - if (MFI.isDeadObjectIndex(I)) + + if (MFI.getStackID(FI) != TargetStackID::ScalableVector && + MFI.getStackID(FI) != TargetStackID::ScalablePredVector) continue; - ObjectsToAllocate.push_back(I); + ObjectsToAllocate.push_back(FI); } // Allocate all SVE locals and spills @@ -4363,24 +4465,32 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, report_fatal_error( "Alignment of scalable vectors > 16 bytes is not yet supported"); + int64_t &Offset = OffsetForObject(FI, ZPROffset, PPROffset); Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment); if (AssignOffsets) Assign(FI, -Offset); } - return Offset; + PPROffset = alignTo(PPROffset, Align(16U)); + ZPROffset = alignTo(ZPROffset, Align(16U)); + + if (&ZPROffset != &PPROffset) { + // SplitSVEObjects (PPRs and ZPRs allocated to separate areas). + return SVEStackSizes{ZPROffset, PPROffset}; + } + // When SplitSVEObjects is disabled just attribute all the stack to ZPRs. + // Determining the split is not necessary. + return SVEStackSizes{ZPROffset, 0}; } -int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets( - MachineFrameInfo &MFI) const { - int MinCSFrameIndex, MaxCSFrameIndex; - return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false); +SVEStackSizes +AArch64FrameLowering::estimateSVEStackObjectOffsets(MachineFunction &MF) const { + return determineSVEStackObjectOffsets(MF, false); } -int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( - MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const { - return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, - true); +SVEStackSizes +AArch64FrameLowering::assignSVEStackObjectOffsets(MachineFunction &MF) const { + return determineSVEStackObjectOffsets(MF, true); } /// Attempts to scavenge a register from \p ScavengeableRegs given the used @@ -4694,12 +4804,9 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && "Upwards growing stack unsupported"); - int MinCSFrameIndex, MaxCSFrameIndex; - int64_t SVEStackSize = - assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex); - - AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U)); - AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex); + auto [ZPRStackSize, PPRStackSize] = assignSVEStackObjectOffsets(MF); + AFI->setStackSizeZPR(ZPRStackSize); + AFI->setStackSizePPR(PPRStackSize); // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. @@ -5248,8 +5355,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( } // Go to common code if we cannot provide sp + offset. - if (MFI.hasVarSizedObjects() || - MF.getInfo()->getStackSizeSVE() || + if (MFI.hasVarSizedObjects() || hasSVEStackSize(MF) || MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF)) return getFrameIndexReference(MF, FI, FrameReg); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 6892be4d97b26..b58a4f655c2ef 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -19,6 +19,11 @@ namespace llvm { +struct SVEStackSizes { + int64_t ZPRStackSize{0}; + int64_t PPRStackSize{0}; +}; + class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() @@ -147,10 +152,8 @@ class AArch64FrameLowering : public TargetFrameLowering { bool shouldCombineCSRLocalStackBump(MachineFunction &MF, uint64_t StackBumpBytes) const; - int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const; - int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF, - int &MinCSFrameIndex, - int &MaxCSFrameIndex) const; + SVEStackSizes estimateSVEStackObjectOffsets(MachineFunction &MF) const; + SVEStackSizes assignSVEStackObjectOffsets(MachineFunction &MF) const; bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, uint64_t StackBumpBytes) const; void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB, @@ -166,6 +169,7 @@ class AArch64FrameLowering : public TargetFrameLowering { int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset, bool FollowupAllocs) const; + /// Make a determination whether a Hazard slot is used and create it if /// needed. void determineStackHazardSlot(MachineFunction &MF, diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 800787cc0b4f5..1ca004b27b601 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -74,13 +74,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// Amount of stack frame size, not including callee-saved registers. uint64_t LocalStackSize = 0; - /// The start and end frame indices for the SVE callee saves. - int MinSVECSFrameIndex = 0; - int MaxSVECSFrameIndex = 0; - /// Amount of stack frame size used for saving callee-saved registers. unsigned CalleeSavedStackSize = 0; - unsigned SVECalleeSavedStackSize = 0; + unsigned ZPRCalleeSavedStackSize = 0; + unsigned PPRCalleeSavedStackSize = 0; bool HasCalleeSavedStackSize = false; bool HasSVECalleeSavedStackSize = false; @@ -137,9 +134,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// SVE stack size (for predicates and data vectors) are maintained here /// rather than in FrameInfo, as the placement and Stack IDs are target /// specific. - uint64_t StackSizeSVE = 0; + uint64_t StackSizeZPR = 0; + uint64_t StackSizePPR = 0; - /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid. + /// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid. bool HasCalculatedStackSizeSVE = false; /// Has a value when it is known whether or not the function uses a @@ -300,17 +298,26 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { TailCallReservedStack = bytes; } - bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; } + void setStackSizeZPR(uint64_t S) { + HasCalculatedStackSizeSVE = true; + StackSizeZPR = S; + } - void setStackSizeSVE(uint64_t S) { + void setStackSizePPR(uint64_t S) { HasCalculatedStackSizeSVE = true; - StackSizeSVE = S; + StackSizePPR = S; } - uint64_t getStackSizeSVE() const { + uint64_t getStackSizeZPR() const { assert(hasCalculatedStackSizeSVE()); - return StackSizeSVE; + return StackSizeZPR; } + uint64_t getStackSizePPR() const { + assert(hasCalculatedStackSizeSVE()); + return StackSizePPR; + } + + bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; } bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } @@ -402,23 +409,33 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { } // Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes' - void setSVECalleeSavedStackSize(unsigned Size) { - SVECalleeSavedStackSize = Size; + void setZPRCalleeSavedStackSize(unsigned Size) { + ZPRCalleeSavedStackSize = Size; HasSVECalleeSavedStackSize = true; } - unsigned getSVECalleeSavedStackSize() const { + unsigned getZPRCalleeSavedStackSize() const { assert(HasSVECalleeSavedStackSize && - "SVECalleeSavedStackSize has not been calculated"); - return SVECalleeSavedStackSize; + "ZPRCalleeSavedStackSize has not been calculated"); + return ZPRCalleeSavedStackSize; } - void setMinMaxSVECSFrameIndex(int Min, int Max) { - MinSVECSFrameIndex = Min; - MaxSVECSFrameIndex = Max; + // Saves the CalleeSavedStackSize for SVE predicate vectors in 'scalable + // bytes' + void setPPRCalleeSavedStackSize(unsigned Size) { + PPRCalleeSavedStackSize = Size; + HasSVECalleeSavedStackSize = true; + } + unsigned getPPRCalleeSavedStackSize() const { + assert(HasSVECalleeSavedStackSize && + "PPRCalleeSavedStackSize has not been calculated"); + return PPRCalleeSavedStackSize; } - int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; } - int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; } + unsigned getSVECalleeSavedStackSize() const { + assert(HasSVECalleeSavedStackSize && + "SVECalleeSavedStackSize has not been calculated"); + return PPRCalleeSavedStackSize + ZPRCalleeSavedStackSize; + } void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; } unsigned getNumLocalDynamicTLSAccesses() const { diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index dd23bf51a98c4..72210e56f2f80 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -644,7 +644,8 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { if (ST.hasSVE() || ST.isStreaming()) { // Frames that have variable sized objects and scalable SVE objects, // should always use a basepointer. - if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE()) + if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeZPR() || + AFI->getStackSizePPR()) return true; } @@ -784,8 +785,8 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { assert((!MF.getSubtarget().hasSVE() || AFI->hasCalculatedStackSizeSVE()) && "Expected SVE area to be calculated by this point"); - return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() && - !AFI->hasStackHazardSlotIndex(); + return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeZPR() && + !AFI->getStackSizePPR() && !AFI->hasStackHazardSlotIndex(); } bool AArch64RegisterInfo::requiresFrameIndexScavenging( From eba3bca2598bedb2d8de100f235d0644288afdb5 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 26 Jun 2025 09:16:39 +0000 Subject: [PATCH 2/7] Rename StackID Change-Id: Ibdf1ed80c9e5048bd0d96edb77f90b39c673b66e --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 054347fef4622..9458f17fc1ff5 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4449,7 +4449,7 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, continue; if (MFI.getStackID(FI) != TargetStackID::ScalableVector && - MFI.getStackID(FI) != TargetStackID::ScalablePredVector) + MFI.getStackID(FI) != TargetStackID::ScalablePredicateVector) continue; ObjectsToAllocate.push_back(FI); From 4e231e7740565e0a2f49e1baf5458cffd81f893f Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 8 Jul 2025 10:08:39 +0000 Subject: [PATCH 3/7] Fixups Change-Id: Ifa920748f994e4160978da7d67ceb0c66093f729 --- .../Target/AArch64/AArch64FrameLowering.cpp | 136 ++++++++---------- .../AArch64/AArch64MachineFunctionInfo.h | 14 +- .../Target/AArch64/AArch64RegisterInfo.cpp | 7 +- 3 files changed, 72 insertions(+), 85 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 9458f17fc1ff5..af34921c7a7c4 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -330,13 +330,36 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); -static StackOffset getZPRStackSize(const MachineFunction &MF); -static StackOffset getPPRStackSize(const MachineFunction &MF); -static StackOffset getSVEStackSize(const MachineFunction &MF); static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, bool HasCall = false); static bool requiresSaveVG(const MachineFunction &MF); -static bool hasSVEStackSize(const MachineFunction &MF); + +static unsigned getStackHazardSize(const MachineFunction &MF) { + return MF.getSubtarget().getStreamingHazardSize(); +} + +/// Returns the size of the entire ZPR stackframe (calleesaves + spills). +static StackOffset getZPRStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return StackOffset::getScalable(AFI->getStackSizeZPR()); +} + +/// Returns the size of the entire PPR stackframe (calleesaves + spills). +static StackOffset getPPRStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return StackOffset::getScalable(AFI->getStackSizePPR()); +} + +/// Returns the size of the entire SVE stackframe (PPRs + ZPRs). +static StackOffset getSVEStackSize(const MachineFunction &MF) { + return getZPRStackSize(MF) + getPPRStackSize(MF); +} + +/// Returns true if PPRs are spilled as ZPRs. +static bool arePPRsSpilledAsZPR(const MachineFunction &MF) { + return MF.getSubtarget().getRegisterInfo()->getSpillSize( + AArch64::PPRRegClass) == 16; +} // Conservatively, returns true if the function is likely to have SVE vectors // on the stack. This function is safe to be called before callee-saves or @@ -496,38 +519,6 @@ static unsigned getFixedObjectSize(const MachineFunction &MF, } } -static unsigned getStackHazardSize(const MachineFunction &MF) { - return MF.getSubtarget().getStreamingHazardSize(); -} - -/// Returns the size of the entire ZPR stackframe (calleesaves + spills). -static StackOffset getZPRStackSize(const MachineFunction &MF) { - const AArch64FunctionInfo *AFI = MF.getInfo(); - return StackOffset::getScalable(AFI->getStackSizeZPR()); -} - -/// Returns the size of the entire PPR stackframe (calleesaves + spills). -static StackOffset getPPRStackSize(const MachineFunction &MF) { - const AArch64FunctionInfo *AFI = MF.getInfo(); - return StackOffset::getScalable(AFI->getStackSizePPR()); -} - -/// Returns the size of the entire SVE stackframe (PPRs + ZPRs). -static StackOffset getSVEStackSize(const MachineFunction &MF) { - return getZPRStackSize(MF) + getPPRStackSize(MF); -} - -static bool hasSVEStackSize(const MachineFunction &MF) { - const AArch64FunctionInfo *AFI = MF.getInfo(); - return AFI->getStackSizeZPR() > 0 || AFI->getStackSizePPR() > 0; -} - -/// Returns true if PPRs are spilled as ZPRs. -static bool arePPRsSpilledAsZPR(const MachineFunction &MF) { - return MF.getSubtarget().getRegisterInfo()->getSpillSize( - AArch64::PPRRegClass) == 16; -} - bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -553,7 +544,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { !Subtarget.hasSVE(); return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || - hasSVEStackSize(MF) || LowerQRegCopyThroughMem); + AFI->hasSVEStackSize() || LowerQRegCopyThroughMem); } /// hasFPImpl - Return true if the specified function should have a dedicated @@ -1253,7 +1244,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( // When there is an SVE area on the stack, always allocate the // callee-saves and spills/locals separately. - if (hasSVEStackSize(MF)) + if (AFI->hasSVEStackSize()) return false; return true; @@ -1697,8 +1688,8 @@ static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } -// Convenience function to determine whether I is an SVE callee save. -static bool IsZPRCalleeSave(MachineBasicBlock::iterator I) { +// Convenience function to determine whether I is part of the ZPR callee saves. +static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) { switch (I->getOpcode()) { default: return false; @@ -1718,8 +1709,8 @@ static bool IsZPRCalleeSave(MachineBasicBlock::iterator I) { } } -// Convenience function to determine whether I is an SVE predicate callee save. -static bool IsPPRCalleeSave(MachineBasicBlock::iterator I) { +// Convenience function to determine whether I is part of the PPR callee saves. +static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I) { switch (I->getOpcode()) { default: return false; @@ -1730,8 +1721,9 @@ static bool IsPPRCalleeSave(MachineBasicBlock::iterator I) { } } -static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { - return IsZPRCalleeSave(I) || IsPPRCalleeSave(I); +// Convenience function to determine whether I is part of the SVE callee saves. +static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I) { + return isPartOfZPRCalleeSaves(I) || isPartOfPPRCalleeSaves(I); } static void emitShadowCallStackPrologue(const TargetInstrInfo &TII, @@ -1975,7 +1967,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); - assert(!hasSVEStackSize(MF) && + assert(!AFI->hasSVEStackSize() && "unexpected function without stack frame but with SVE objects"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); @@ -2049,14 +2041,14 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, NumBytes -= FixedObject; // Now allocate space for the GPR callee saves. - while (MBBI != End && IsSVECalleeSave(MBBI)) + while (MBBI != End && isPartOfSVECalleeSaves(MBBI)) ++MBBI; MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI, &HasWinCFI, EmitAsyncCFI); NumBytes -= AFI->getCalleeSavedStackSize(); } else if (CombineSPBump) { - assert(!hasSVEStackSize(MF) && "Cannot combine SP bump with SVE"); + assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI, @@ -2077,7 +2069,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // and pre-inc if we decided to combine the callee-save and local stack // pointer bump above. while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && - !IsSVECalleeSave(MBBI)) { + !isPartOfSVECalleeSaves(MBBI)) { if (CombineSPBump && // Only fix-up frame-setup load/store instructions. (!requiresSaveVG(MF) || !isVGInstruction(MBBI))) @@ -2341,8 +2333,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, << PPRCalleeSavesSize.getScalable() << "\n"); PPRCalleeSavesBegin = MBBI; - assert(IsPPRCalleeSave(PPRCalleeSavesBegin) && "Unexpected instruction"); - while (IsPPRCalleeSave(MBBI) && MBBI != MBB.getFirstTerminator()) + assert(isPartOfPPRCalleeSaves(PPRCalleeSavesBegin) && + "Unexpected instruction"); + while (isPartOfPPRCalleeSaves(MBBI) && MBBI != MBB.getFirstTerminator()) ++MBBI; PPRCalleeSavesEnd = MBBI; } @@ -2351,8 +2344,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = " << ZPRCalleeSavesSize.getScalable() << "\n"); ZPRCalleeSavesBegin = MBBI; - assert(IsZPRCalleeSave(ZPRCalleeSavesBegin) && "Unexpected instruction"); - while (IsZPRCalleeSave(MBBI) && MBBI != MBB.getFirstTerminator()) + assert(isPartOfZPRCalleeSaves(ZPRCalleeSavesBegin) && + "Unexpected instruction"); + while (isPartOfZPRCalleeSaves(MBBI) && MBBI != MBB.getFirstTerminator()) ++MBBI; ZPRCalleeSavesEnd = MBBI; } @@ -2586,7 +2580,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, while (LastPopI != Begin) { --LastPopI; if (!LastPopI->getFlag(MachineInstr::FrameDestroy) || - (!FPAfterSVECalleeSaves && IsSVECalleeSave(LastPopI))) { + (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves(LastPopI))) { ++LastPopI; break; } else if (CombineSPBump) @@ -2671,11 +2665,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, RestoreBegin = std::prev(RestoreEnd); while (RestoreBegin != MBB.begin() && - IsSVECalleeSave(std::prev(RestoreBegin))) + isPartOfSVECalleeSaves(std::prev(RestoreBegin))) --RestoreBegin; - assert(IsSVECalleeSave(RestoreBegin) && - IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction"); + assert(isPartOfSVECalleeSaves(RestoreBegin) && + isPartOfSVECalleeSaves(std::prev(RestoreEnd)) && + "Unexpected instruction"); StackOffset CalleeSavedSizeAsOffset = StackOffset::getScalable(SVECalleeSavedSize); @@ -4384,14 +4379,14 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, bool SplitSVEObjects = false) { MachineFrameInfo &MFI = MF.getFrameInfo(); - int64_t ZPRStack = 0; - int64_t PPRStack = 0; + SVEStackSizes SVEStack{}; - auto [ZPROffset, PPROffset] = [&] { - if (SplitSVEObjects) - return std::tie(ZPRStack, PPRStack); - return std::tie(ZPRStack, ZPRStack); - }(); + // With SplitSVEObjects we maintain separate stack offsets for predicates + // (PPRs) and SVE vectors (ZPRs). When SplitSVEObjects is disabled predicates + // are included in the SVE vector area. + int64_t &ZPROffset = SVEStack.ZPRStackSize; + int64_t &PPROffset = + SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize; #ifndef NDEBUG // First process all fixed stack objects. @@ -4473,14 +4468,7 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, PPROffset = alignTo(PPROffset, Align(16U)); ZPROffset = alignTo(ZPROffset, Align(16U)); - - if (&ZPROffset != &PPROffset) { - // SplitSVEObjects (PPRs and ZPRs allocated to separate areas). - return SVEStackSizes{ZPROffset, PPROffset}; - } - // When SplitSVEObjects is disabled just attribute all the stack to ZPRs. - // Determining the split is not necessary. - return SVEStackSizes{ZPROffset, 0}; + return SVEStack; } SVEStackSizes @@ -4805,8 +4793,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( "Upwards growing stack unsupported"); auto [ZPRStackSize, PPRStackSize] = assignSVEStackObjectOffsets(MF); - AFI->setStackSizeZPR(ZPRStackSize); - AFI->setStackSizePPR(PPRStackSize); + AFI->setStackSizeSVE(ZPRStackSize, PPRStackSize); // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. @@ -5355,7 +5342,8 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( } // Go to common code if we cannot provide sp + offset. - if (MFI.hasVarSizedObjects() || hasSVEStackSize(MF) || + if (MFI.hasVarSizedObjects() || + MF.getInfo()->hasSVEStackSize() || MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF)) return getFrameIndexReference(MF, FI, FrameReg); diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 1ca004b27b601..c79ef61f54aba 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -298,14 +298,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { TailCallReservedStack = bytes; } - void setStackSizeZPR(uint64_t S) { + void setStackSizeSVE(uint64_t ZPR, uint64_t PPR = 0) { + StackSizeZPR = ZPR; + StackSizePPR = PPR; HasCalculatedStackSizeSVE = true; - StackSizeZPR = S; - } - - void setStackSizePPR(uint64_t S) { - HasCalculatedStackSizeSVE = true; - StackSizePPR = S; } uint64_t getStackSizeZPR() const { @@ -319,6 +315,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; } + bool hasSVEStackSize() const { + return getStackSizeZPR() > 0 || getStackSizePPR() > 0; + } + bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 72210e56f2f80..71368413744e5 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -644,8 +644,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { if (ST.hasSVE() || ST.isStreaming()) { // Frames that have variable sized objects and scalable SVE objects, // should always use a basepointer. - if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeZPR() || - AFI->getStackSizePPR()) + if (!AFI->hasCalculatedStackSizeSVE() || AFI->hasSVEStackSize()) return true; } @@ -785,8 +784,8 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { assert((!MF.getSubtarget().hasSVE() || AFI->hasCalculatedStackSizeSVE()) && "Expected SVE area to be calculated by this point"); - return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeZPR() && - !AFI->getStackSizePPR() && !AFI->hasStackHazardSlotIndex(); + return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->hasSVEStackSize() && + !AFI->hasStackHazardSlotIndex(); } bool AArch64RegisterInfo::requiresFrameIndexScavenging( From 1f4858fb8d201107bc911092e3dd4a4ad2ba7f5b Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 9 Jul 2025 12:05:52 +0000 Subject: [PATCH 4/7] Rebase Change-Id: I1208aebeabbc4df1c81f1c8f5a222688c28ef985 --- .../Target/AArch64/AArch64FrameLowering.cpp | 4 +--- .../AArch64/AArch64MachineFunctionInfo.cpp | 20 ++++++++++++++----- .../AArch64/AArch64MachineFunctionInfo.h | 18 +++++++---------- .../DebugInfo/AArch64/asan-stack-vars.mir | 3 ++- .../compiler-gen-bbs-livedebugvalues.mir | 3 ++- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index af34921c7a7c4..17066ca9faa9a 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4205,9 +4205,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // instructions. AFI->setCalleeSavedStackSize(AlignedCSStackSize); AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize); - - AFI->setZPRCalleeSavedStackSize(ZPRCSStackSize); - AFI->setPPRCalleeSavedStackSize(alignTo(PPRCSStackSize, 16)); + AFI->setSVECalleeSavedStackSize(ZPRCSStackSize, alignTo(PPRCSStackSize, 16)); } bool AArch64FrameLowering::assignCalleeSavedSpillSlots( diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp index b4197a04840b7..e6b2e1ce8bb1c 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -23,12 +23,21 @@ using namespace llvm; +static std::optional +getSVEStackSize(const AArch64FunctionInfo &MFI, + uint64_t (AArch64FunctionInfo::*GetStackSize)() const) { + if (!MFI.hasCalculatedStackSizeSVE()) + return std::nullopt; + return (MFI.*GetStackSize)(); +} + yaml::AArch64FunctionInfo::AArch64FunctionInfo( const llvm::AArch64FunctionInfo &MFI) : HasRedZone(MFI.hasRedZone()), - StackSizeSVE(MFI.hasCalculatedStackSizeSVE() - ? std::optional(MFI.getStackSizeSVE()) - : std::nullopt) {} + StackSizeZPR( + getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizeZPR)), + StackSizePPR( + getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)) {} void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) { MappingTraits::mapping(YamlIO, *this); @@ -38,8 +47,9 @@ void AArch64FunctionInfo::initializeBaseYamlFields( const yaml::AArch64FunctionInfo &YamlMFI) { if (YamlMFI.HasRedZone) HasRedZone = YamlMFI.HasRedZone; - if (YamlMFI.StackSizeSVE) - setStackSizeSVE(*YamlMFI.StackSizeSVE); + if (YamlMFI.StackSizeZPR || YamlMFI.StackSizePPR) + setStackSizeSVE(YamlMFI.StackSizeZPR.value_or(0), + YamlMFI.StackSizePPR.value_or(0)); } static std::pair GetSignReturnAddress(const Function &F) { diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index c79ef61f54aba..e64cdcf485409 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -409,8 +409,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { } // Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes' - void setZPRCalleeSavedStackSize(unsigned Size) { - ZPRCalleeSavedStackSize = Size; + void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR = 0) { + ZPRCalleeSavedStackSize = ZPR; + PPRCalleeSavedStackSize = PPR; HasSVECalleeSavedStackSize = true; } unsigned getZPRCalleeSavedStackSize() const { @@ -418,13 +419,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { "ZPRCalleeSavedStackSize has not been calculated"); return ZPRCalleeSavedStackSize; } - - // Saves the CalleeSavedStackSize for SVE predicate vectors in 'scalable - // bytes' - void setPPRCalleeSavedStackSize(unsigned Size) { - PPRCalleeSavedStackSize = Size; - HasSVECalleeSavedStackSize = true; - } unsigned getPPRCalleeSavedStackSize() const { assert(HasSVECalleeSavedStackSize && "PPRCalleeSavedStackSize has not been calculated"); @@ -616,7 +610,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { namespace yaml { struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo { std::optional HasRedZone; - std::optional StackSizeSVE; + std::optional StackSizeZPR; + std::optional StackSizePPR; AArch64FunctionInfo() = default; AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI); @@ -628,7 +623,8 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo { template <> struct MappingTraits { static void mapping(IO &YamlIO, AArch64FunctionInfo &MFI) { YamlIO.mapOptional("hasRedZone", MFI.HasRedZone); - YamlIO.mapOptional("stackSizeSVE", MFI.StackSizeSVE); + YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR); + YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR); } }; diff --git a/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir b/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir index 5d644c3e5416c..718fa6f4c4cb1 100644 --- a/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir +++ b/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir @@ -366,7 +366,8 @@ frameInfo: maxCallFrameSize: 0 localFrameSize: 144 machineFunctionInfo: - stackSizeSVE: 0 + stackSizeZPR: 0 + stackSizePPR: 0 stack: - { id: 0, name: StackGuardSlot, offset: -40, size: 8, alignment: 8, stack-id: default, local-offset: -8 } diff --git a/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir b/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir index 013d93378a204..b7a9892f13977 100644 --- a/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir +++ b/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir @@ -69,7 +69,8 @@ frameInfo: hasCalls: true maxCallFrameSize: 0 machineFunctionInfo: - stackSizeSVE: 0 + stackSizeZPR: 0 + stackSizePPR: 0 stack: - { id: 0, type: spill-slot, offset: -20, size: 4, alignment: 4, stack-id: default } - { id: 1, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default, From 3d23b57272042d84c957049cbabff61552d2f1b2 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 9 Jul 2025 15:05:32 +0000 Subject: [PATCH 5/7] Fixups --- llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index e64cdcf485409..4b0f5cdf4d4ff 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -298,7 +298,7 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { TailCallReservedStack = bytes; } - void setStackSizeSVE(uint64_t ZPR, uint64_t PPR = 0) { + void setStackSizeSVE(uint64_t ZPR, uint64_t PPR) { StackSizeZPR = ZPR; StackSizePPR = PPR; HasCalculatedStackSizeSVE = true; @@ -409,7 +409,7 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { } // Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes' - void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR = 0) { + void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR) { ZPRCalleeSavedStackSize = ZPR; PPRCalleeSavedStackSize = PPR; HasSVECalleeSavedStackSize = true; @@ -426,9 +426,7 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { } unsigned getSVECalleeSavedStackSize() const { - assert(HasSVECalleeSavedStackSize && - "SVECalleeSavedStackSize has not been calculated"); - return PPRCalleeSavedStackSize + ZPRCalleeSavedStackSize; + return getZPRCalleeSavedStackSize() + getPPRCalleeSavedStackSize(); } void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; } From 8c7b7f88b14409c526ec0368a8c30dbbf327ea99 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 10 Jul 2025 15:34:45 +0000 Subject: [PATCH 6/7] Fixups --- .../Target/AArch64/AArch64FrameLowering.cpp | 49 +++++++++---------- .../lib/Target/AArch64/AArch64FrameLowering.h | 6 +-- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 17066ca9faa9a..9dbd5af2ac5da 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4382,8 +4382,8 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, // With SplitSVEObjects we maintain separate stack offsets for predicates // (PPRs) and SVE vectors (ZPRs). When SplitSVEObjects is disabled predicates // are included in the SVE vector area. - int64_t &ZPROffset = SVEStack.ZPRStackSize; - int64_t &PPROffset = + uint64_t &ZPRStackTop = SVEStack.ZPRStackSize; + uint64_t &PPRStackTop = SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize; #ifndef NDEBUG @@ -4394,10 +4394,10 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, "reference."); #endif - auto OffsetForObject = [&](int FI, int64_t &ZPROffset, - int64_t &PPROffset) -> int64_t & { - return MFI.getStackID(FI) == TargetStackID::ScalableVector ? ZPROffset - : PPROffset; + auto StackForObject = [&](int FI, uint64_t &ZPRStackTop, + uint64_t &PPRStackTop) -> uint64_t & { + return MFI.getStackID(FI) == TargetStackID::ScalableVector ? ZPRStackTop + : PPRStackTop; }; auto Assign = [&MFI](int FI, int64_t Offset) { @@ -4409,19 +4409,17 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, int MinCSFrameIndex, MaxCSFrameIndex; if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI) { - int64_t &Offset = OffsetForObject(FI, ZPROffset, PPROffset); - Offset += MFI.getObjectSize(FI); - Offset = alignTo(Offset, MFI.getObjectAlign(FI)); - if (AssignOffsets) { - LLVM_DEBUG(dbgs() << "FI: " << FI << ", Offset: " << -Offset << "\n"); - Assign(FI, -Offset); - } + uint64_t &StackTop = StackForObject(FI, ZPRStackTop, PPRStackTop); + StackTop += MFI.getObjectSize(FI); + StackTop = alignTo(StackTop, MFI.getObjectAlign(FI)); + if (AssignOffsets) + Assign(FI, -int64_t(StackTop)); } } // Ensure the CS area is 16-byte aligned. - PPROffset = alignTo(PPROffset, Align(16U)); - ZPROffset = alignTo(ZPROffset, Align(16U)); + PPRStackTop = alignTo(PPRStackTop, Align(16U)); + ZPRStackTop = alignTo(ZPRStackTop, Align(16U)); // Create a buffer of SVE objects to allocate and sort it. SmallVector ObjectsToAllocate; @@ -4458,14 +4456,14 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, report_fatal_error( "Alignment of scalable vectors > 16 bytes is not yet supported"); - int64_t &Offset = OffsetForObject(FI, ZPROffset, PPROffset); - Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment); + uint64_t &StackTop = StackForObject(FI, ZPRStackTop, PPRStackTop); + StackTop = alignTo(StackTop + MFI.getObjectSize(FI), Alignment); if (AssignOffsets) - Assign(FI, -Offset); + Assign(FI, -int64_t(StackTop)); } - PPROffset = alignTo(PPROffset, Align(16U)); - ZPROffset = alignTo(ZPROffset, Align(16U)); + PPRStackTop = alignTo(PPRStackTop, Align(16U)); + ZPRStackTop = alignTo(ZPRStackTop, Align(16U)); return SVEStack; } @@ -4474,9 +4472,11 @@ AArch64FrameLowering::estimateSVEStackObjectOffsets(MachineFunction &MF) const { return determineSVEStackObjectOffsets(MF, false); } -SVEStackSizes -AArch64FrameLowering::assignSVEStackObjectOffsets(MachineFunction &MF) const { - return determineSVEStackObjectOffsets(MF, true); +void AArch64FrameLowering::assignSVEStackObjectOffsets( + MachineFunction &MF) const { + auto [ZPRStackSize, PPRStackSize] = determineSVEStackObjectOffsets(MF, true); + MF.getInfo()->setStackSizeSVE(ZPRStackSize, + PPRStackSize); } /// Attempts to scavenge a register from \p ScavengeableRegs given the used @@ -4790,8 +4790,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && "Upwards growing stack unsupported"); - auto [ZPRStackSize, PPRStackSize] = assignSVEStackObjectOffsets(MF); - AFI->setStackSizeSVE(ZPRStackSize, PPRStackSize); + assignSVEStackObjectOffsets(MF); // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index b58a4f655c2ef..99da483aa6bd1 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -20,8 +20,8 @@ namespace llvm { struct SVEStackSizes { - int64_t ZPRStackSize{0}; - int64_t PPRStackSize{0}; + uint64_t ZPRStackSize{0}; + uint64_t PPRStackSize{0}; }; class AArch64FrameLowering : public TargetFrameLowering { @@ -153,7 +153,7 @@ class AArch64FrameLowering : public TargetFrameLowering { uint64_t StackBumpBytes) const; SVEStackSizes estimateSVEStackObjectOffsets(MachineFunction &MF) const; - SVEStackSizes assignSVEStackObjectOffsets(MachineFunction &MF) const; + void assignSVEStackObjectOffsets(MachineFunction &MF) const; bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, uint64_t StackBumpBytes) const; void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB, From ef83c9d5f18676274d65fdef81f6721fb59db123 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 10 Jul 2025 15:54:49 +0000 Subject: [PATCH 7/7] Fixups --- .../Target/AArch64/AArch64FrameLowering.cpp | 48 +++++++++---------- .../lib/Target/AArch64/AArch64FrameLowering.h | 2 - 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 9dbd5af2ac5da..4da635676fba4 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -334,6 +334,14 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, bool HasCall = false); static bool requiresSaveVG(const MachineFunction &MF); +enum class AssignObjectOffsets { No, Yes }; +/// Process all the SVE stack objects and the SVE stack size and offsets for +/// each object. If AssignOffsets is "Yes", the offsets get assigned (and SVE +/// stack sizes set). Returns the size of the SVE stack. +static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, + AssignObjectOffsets AssignOffsets, + bool SplitSVEObjects = false); + static unsigned getStackHazardSize(const MachineFunction &MF) { return MF.getSubtarget().getStreamingHazardSize(); } @@ -4117,7 +4125,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // If any callee-saved registers are used, the frame cannot be eliminated. auto [ZPRLocalStackSize, PPRLocalStackSize] = - estimateSVEStackObjectOffsets(MF); + determineSVEStackSizes(MF, AssignObjectOffsets::No); int64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize; int64_t SVEStackSize = alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16); @@ -4367,15 +4375,11 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, return Min != std::numeric_limits::max(); } -// Process all the SVE stack objects and determine offsets for each -// object. If AssignOffsets is true, the offsets get assigned. -// Fills in the first and last callee-saved frame indices into -// Min/MaxCSFrameIndex, respectively. -// Returns the size of the stack. -static SVEStackSizes -determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, - bool SplitSVEObjects = false) { +static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, + AssignObjectOffsets AssignOffsets, + bool SplitSVEObjects) { MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *AFI = MF.getInfo(); SVEStackSizes SVEStack{}; @@ -4400,7 +4404,9 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, : PPRStackTop; }; - auto Assign = [&MFI](int FI, int64_t Offset) { + auto Assign = [&MFI, AssignOffsets](int FI, int64_t Offset) { + if (AssignOffsets == AssignObjectOffsets::No) + return; LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); MFI.setObjectOffset(FI, Offset); }; @@ -4412,8 +4418,7 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, uint64_t &StackTop = StackForObject(FI, ZPRStackTop, PPRStackTop); StackTop += MFI.getObjectSize(FI); StackTop = alignTo(StackTop, MFI.getObjectAlign(FI)); - if (AssignOffsets) - Assign(FI, -int64_t(StackTop)); + Assign(FI, -int64_t(StackTop)); } } @@ -4458,25 +4463,16 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets, uint64_t &StackTop = StackForObject(FI, ZPRStackTop, PPRStackTop); StackTop = alignTo(StackTop + MFI.getObjectSize(FI), Alignment); - if (AssignOffsets) - Assign(FI, -int64_t(StackTop)); + Assign(FI, -int64_t(StackTop)); } PPRStackTop = alignTo(PPRStackTop, Align(16U)); ZPRStackTop = alignTo(ZPRStackTop, Align(16U)); - return SVEStack; -} -SVEStackSizes -AArch64FrameLowering::estimateSVEStackObjectOffsets(MachineFunction &MF) const { - return determineSVEStackObjectOffsets(MF, false); -} + if (AssignOffsets == AssignObjectOffsets::Yes) + AFI->setStackSizeSVE(SVEStack.ZPRStackSize, SVEStack.PPRStackSize); -void AArch64FrameLowering::assignSVEStackObjectOffsets( - MachineFunction &MF) const { - auto [ZPRStackSize, PPRStackSize] = determineSVEStackObjectOffsets(MF, true); - MF.getInfo()->setStackSizeSVE(ZPRStackSize, - PPRStackSize); + return SVEStack; } /// Attempts to scavenge a register from \p ScavengeableRegs given the used @@ -4790,7 +4786,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && "Upwards growing stack unsupported"); - assignSVEStackObjectOffsets(MF); + (void)determineSVEStackSizes(MF, AssignObjectOffsets::Yes); // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 99da483aa6bd1..971a3c116027d 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -152,8 +152,6 @@ class AArch64FrameLowering : public TargetFrameLowering { bool shouldCombineCSRLocalStackBump(MachineFunction &MF, uint64_t StackBumpBytes) const; - SVEStackSizes estimateSVEStackObjectOffsets(MachineFunction &MF) const; - void assignSVEStackObjectOffsets(MachineFunction &MF) const; bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, uint64_t StackBumpBytes) const; void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,