diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 409a21a37810f..4da635676fba4 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -330,11 +330,45 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); -static StackOffset getSVEStackSize(const MachineFunction &MF); static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, bool HasCall = false); static bool requiresSaveVG(const MachineFunction &MF); +enum class AssignObjectOffsets { No, Yes }; +/// Process all the SVE stack objects and the SVE stack size and offsets for +/// each object. If AssignOffsets is "Yes", the offsets get assigned (and SVE +/// stack sizes set). Returns the size of the SVE stack. +static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, + AssignObjectOffsets AssignOffsets, + bool SplitSVEObjects = false); + +static unsigned getStackHazardSize(const MachineFunction &MF) { + return MF.getSubtarget().getStreamingHazardSize(); +} + +/// Returns the size of the entire ZPR stackframe (calleesaves + spills). +static StackOffset getZPRStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return StackOffset::getScalable(AFI->getStackSizeZPR()); +} + +/// Returns the size of the entire PPR stackframe (calleesaves + spills). +static StackOffset getPPRStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return StackOffset::getScalable(AFI->getStackSizePPR()); +} + +/// Returns the size of the entire SVE stackframe (PPRs + ZPRs). +static StackOffset getSVEStackSize(const MachineFunction &MF) { + return getZPRStackSize(MF) + getPPRStackSize(MF); +} + +/// Returns true if PPRs are spilled as ZPRs. +static bool arePPRsSpilledAsZPR(const MachineFunction &MF) { + return MF.getSubtarget().getRegisterInfo()->getSpillSize( + AArch64::PPRRegClass) == 16; +} + // Conservatively, returns true if the function is likely to have SVE vectors // on the stack. This function is safe to be called before callee-saves or // object offsets have been determined. @@ -493,12 +527,6 @@ static unsigned getFixedObjectSize(const MachineFunction &MF, } } -/// Returns the size of the entire SVE stackframe (calleesaves + spills). -static StackOffset getSVEStackSize(const MachineFunction &MF) { - const AArch64FunctionInfo *AFI = MF.getInfo(); - return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); -} - bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -524,7 +552,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { !Subtarget.hasSVE(); return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || - getSVEStackSize(MF) || LowerQRegCopyThroughMem); + AFI->hasSVEStackSize() || LowerQRegCopyThroughMem); } /// hasFPImpl - Return true if the specified function should have a dedicated @@ -1224,7 +1252,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( // When there is an SVE area on the stack, always allocate the // callee-saves and spills/locals separately. - if (getSVEStackSize(MF)) + if (AFI->hasSVEStackSize()) return false; return true; @@ -1668,25 +1696,19 @@ static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } -static unsigned getStackHazardSize(const MachineFunction &MF) { - return MF.getSubtarget().getStreamingHazardSize(); -} - -// Convenience function to determine whether I is an SVE callee save. -static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { +// Convenience function to determine whether I is part of the ZPR callee saves. +static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) { switch (I->getOpcode()) { default: return false; - case AArch64::PTRUE_C_B: case AArch64::LD1B_2Z_IMM: case AArch64::ST1B_2Z_IMM: case AArch64::STR_ZXI: - case AArch64::STR_PXI: case AArch64::LDR_ZXI: - case AArch64::LDR_PXI: - case AArch64::PTRUE_B: case AArch64::CPY_ZPzI_B: case AArch64::CMPNE_PPzZI_B: + case AArch64::PTRUE_C_B: + case AArch64::PTRUE_B: return I->getFlag(MachineInstr::FrameSetup) || I->getFlag(MachineInstr::FrameDestroy); case AArch64::SEH_SavePReg: @@ -1695,6 +1717,23 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { } } +// Convenience function to determine whether I is part of the PPR callee saves. +static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I) { + switch (I->getOpcode()) { + default: + return false; + case AArch64::STR_PXI: + case AArch64::LDR_PXI: + return I->getFlag(MachineInstr::FrameSetup) || + I->getFlag(MachineInstr::FrameDestroy); + } +} + +// Convenience function to determine whether I is part of the SVE callee saves. +static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I) { + return isPartOfZPRCalleeSaves(I) || isPartOfPPRCalleeSaves(I); +} + static void emitShadowCallStackPrologue(const TargetInstrInfo &TII, MachineFunction &MF, MachineBasicBlock &MBB, @@ -1926,8 +1965,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, else AFI->setTaggedBasePointerOffset(MFI.getStackSize()); - const StackOffset &SVEStackSize = getSVEStackSize(MF); - // getStackSize() includes all the locals in its size calculation. We don't // include these locals when computing the stack size of a funclet, as they // are allocated in the parent's stack frame and accessed via the frame @@ -1938,7 +1975,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); - assert(!SVEStackSize && + assert(!AFI->hasSVEStackSize() && "unexpected function without stack frame but with SVE objects"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); @@ -2012,14 +2049,14 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, NumBytes -= FixedObject; // Now allocate space for the GPR callee saves. - while (MBBI != End && IsSVECalleeSave(MBBI)) + while (MBBI != End && isPartOfSVECalleeSaves(MBBI)) ++MBBI; MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI, &HasWinCFI, EmitAsyncCFI); NumBytes -= AFI->getCalleeSavedStackSize(); } else if (CombineSPBump) { - assert(!SVEStackSize && "Cannot combine SP bump with SVE"); + assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI, @@ -2040,7 +2077,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // and pre-inc if we decided to combine the callee-save and local stack // pointer bump above. while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && - !IsSVECalleeSave(MBBI)) { + !isPartOfSVECalleeSaves(MBBI)) { if (CombineSPBump && // Only fix-up frame-setup load/store instructions. (!requiresSaveVG(MF) || !isVGInstruction(MBBI))) @@ -2278,36 +2315,64 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } } - StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize; MachineBasicBlock::iterator CalleeSavesEnd = MBBI; + StackOffset PPRCalleeSavesSize = + StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize()); + StackOffset ZPRCalleeSavesSize = + StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize()); + StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize; + StackOffset PPRLocalsSize = getPPRStackSize(MF) - PPRCalleeSavesSize; + StackOffset ZPRLocalsSize = getZPRStackSize(MF) - ZPRCalleeSavesSize; + StackOffset CFAOffset = StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes); + if (!FPAfterSVECalleeSaves) { + MachineBasicBlock::iterator ZPRCalleeSavesBegin = MBBI, + ZPRCalleeSavesEnd = MBBI; + MachineBasicBlock::iterator PPRCalleeSavesBegin = MBBI, + PPRCalleeSavesEnd = MBBI; - // Process the SVE callee-saves to determine what space needs to be - // allocated. - if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { - LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize - << "\n"); - SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize); - SVELocalsSize = SVEStackSize - SVECalleeSavesSize; - // Find callee save instructions in frame. - // Note: With FPAfterSVECalleeSaves the callee saves have already been + // Process the SVE callee-saves to determine what space needs to be // allocated. - if (!FPAfterSVECalleeSaves) { - MachineBasicBlock::iterator CalleeSavesBegin = MBBI; - assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction"); - while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator()) + + if (PPRCalleeSavesSize) { + LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = " + << PPRCalleeSavesSize.getScalable() << "\n"); + + PPRCalleeSavesBegin = MBBI; + assert(isPartOfPPRCalleeSaves(PPRCalleeSavesBegin) && + "Unexpected instruction"); + while (isPartOfPPRCalleeSaves(MBBI) && MBBI != MBB.getFirstTerminator()) ++MBBI; - CalleeSavesEnd = MBBI; + PPRCalleeSavesEnd = MBBI; + } - StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes); - // Allocate space for the callee saves (if any). - allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false, - nullptr, EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || LocalsSize); + if (ZPRCalleeSavesSize) { + LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = " + << ZPRCalleeSavesSize.getScalable() << "\n"); + ZPRCalleeSavesBegin = MBBI; + assert(isPartOfZPRCalleeSaves(ZPRCalleeSavesBegin) && + "Unexpected instruction"); + while (isPartOfZPRCalleeSaves(MBBI) && MBBI != MBB.getFirstTerminator()) + ++MBBI; + ZPRCalleeSavesEnd = MBBI; } + + // Allocate space for the callee saves (if any). + StackOffset LocalsSize = + PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes); + MachineBasicBlock::iterator CalleeSavesBegin = + AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin + : ZPRCalleeSavesBegin; + allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false, + nullptr, EmitAsyncCFI && !HasFP, CFAOffset, + MFI.hasVarSizedObjects() || LocalsSize); + + CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd + : PPRCalleeSavesEnd; } + CFAOffset += SVECalleeSavesSize; if (EmitAsyncCFI) @@ -2321,6 +2386,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. + StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize; allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding, SVELocalsSize + StackOffset::getFixed(NumBytes), NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, @@ -2370,7 +2436,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject); } else { StackOffset TotalSize = - SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize()); + getSVEStackSize(MF) + + StackOffset::getFixed((int64_t)MFI.getStackSize()); CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); CFIBuilder.insertCFIInst( createDefCFA(*RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP, @@ -2521,7 +2588,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, while (LastPopI != Begin) { --LastPopI; if (!LastPopI->getFlag(MachineInstr::FrameDestroy) || - (!FPAfterSVECalleeSaves && IsSVECalleeSave(LastPopI))) { + (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves(LastPopI))) { ++LastPopI; break; } else if (CombineSPBump) @@ -2571,7 +2638,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, } } - const StackOffset &SVEStackSize = getSVEStackSize(MF); + StackOffset SVEStackSize = getSVEStackSize(MF); // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { @@ -2596,20 +2663,25 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // deallocated. StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize; MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI; - if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { + int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize(); + int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize(); + int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize; + + if (SVECalleeSavedSize) { if (FPAfterSVECalleeSaves) RestoreEnd = MBB.getFirstTerminator(); RestoreBegin = std::prev(RestoreEnd); while (RestoreBegin != MBB.begin() && - IsSVECalleeSave(std::prev(RestoreBegin))) + isPartOfSVECalleeSaves(std::prev(RestoreBegin))) --RestoreBegin; - assert(IsSVECalleeSave(RestoreBegin) && - IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction"); + assert(isPartOfSVECalleeSaves(RestoreBegin) && + isPartOfSVECalleeSaves(std::prev(RestoreEnd)) && + "Unexpected instruction"); StackOffset CalleeSavedSizeAsOffset = - StackOffset::getScalable(CalleeSavedSize); + StackOffset::getScalable(SVECalleeSavedSize); DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset; DeallocateAfter = CalleeSavedSizeAsOffset; } @@ -2795,7 +2867,9 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, const auto &MFI = MF.getFrameInfo(); int64_t ObjectOffset = MFI.getObjectOffset(FI); - StackOffset SVEStackSize = getSVEStackSize(MF); + StackOffset ZPRStackSize = getZPRStackSize(MF); + StackOffset PPRStackSize = getPPRStackSize(MF); + StackOffset SVEStackSize = ZPRStackSize + PPRStackSize; // For VLA-area objects, just emit an offset at the end of the stack frame. // Whilst not quite correct, these objects do live at the end of the frame and @@ -2896,7 +2970,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( bool isCSR = !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); - const StackOffset &SVEStackSize = getSVEStackSize(MF); + const StackOffset SVEStackSize = getSVEStackSize(MF); // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't @@ -3192,10 +3266,13 @@ static void computeCalleeSaveRegisterPairs( FirstReg = Count - 1; } bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize(); - int ScalableByteOffset = - FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize(); + int ScalableByteOffset = FPAfterSVECalleeSaves + ? 0 + : AFI->getZPRCalleeSavedStackSize() + + AFI->getPPRCalleeSavedStackSize(); bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace(); Register LastReg = 0; + bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex(); // When iterating backwards, the loop condition relies on unsigned wraparound. for (unsigned i = FirstReg; i < Count; i += RegInc) { @@ -3225,7 +3302,7 @@ static void computeCalleeSaveRegisterPairs( } // Add the stack hazard size as we transition from GPR->FPR CSRs. - if (AFI->hasStackHazardSlotIndex() && + if (HasCSHazardPadding && (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) && AArch64InstrInfo::isFpOrNEON(RPI.Reg1)) ByteOffset += StackFillDir * StackHazardSize; @@ -3233,7 +3310,7 @@ static void computeCalleeSaveRegisterPairs( int Scale = TRI->getSpillSize(*RPI.RC); // Add the next reg to the pair if it is in the same register class. - if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) { + if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) { MCRegister NextReg = CSI[i + RegInc].getReg(); bool IsFirst = i == FirstReg; switch (RPI.Type) { @@ -3862,10 +3939,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); - const AArch64Subtarget &Subtarget = MF.getSubtarget(); AArch64FunctionInfo *AFI = MF.getInfo(); unsigned UnspilledCSGPR = AArch64::NoRegister; unsigned UnspilledCSGPRPaired = AArch64::NoRegister; @@ -3986,15 +4064,19 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // Calculates the callee saved stack size. unsigned CSStackSize = 0; - unsigned SVECSStackSize = 0; + unsigned ZPRCSStackSize = 0; + unsigned PPRCSStackSize = 0; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); for (unsigned Reg : SavedRegs.set_bits()) { auto *RC = TRI->getMinimalPhysRegClass(Reg); assert(RC && "expected register class!"); auto SpillSize = TRI->getSpillSize(*RC); - if (AArch64::PPRRegClass.contains(Reg) || - AArch64::ZPRRegClass.contains(Reg)) - SVECSStackSize += SpillSize; + bool IsZPR = AArch64::ZPRRegClass.contains(Reg); + bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg); + if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF))) + ZPRCSStackSize += SpillSize; + else if (IsPPR) + PPRCSStackSize += SpillSize; else CSStackSize += SpillSize; } @@ -4004,6 +4086,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // only 64-bit GPRs can be added to SavedRegs. unsigned NumSavedRegs = SavedRegs.count(); + // Determine if a Hazard slot should be used, and increase the CSStackSize by + // StackHazardSize if so. + determineStackHazardSlot(MF, SavedRegs); + if (AFI->hasStackHazardSlotIndex()) + CSStackSize += getStackHazardSize(MF); + // Increase the callee-saved stack size if the function has streaming mode // changes, as we will need to spill the value of the VG register. // For locally streaming functions, we spill both the streaming and @@ -4016,12 +4104,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, CSStackSize += 8; } - // Determine if a Hazard slot should be used, and increase the CSStackSize by - // StackHazardSize if so. - determineStackHazardSlot(MF, SavedRegs); - if (AFI->hasStackHazardSlotIndex()) - CSStackSize += getStackHazardSize(MF); - // If we must call __arm_get_current_vg in the prologue preserve the LR. if (requiresSaveVG(MF) && !Subtarget.hasSVE()) SavedRegs.set(AArch64::LR); @@ -4042,8 +4124,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, }); // If any callee-saved registers are used, the frame cannot be eliminated. + auto [ZPRLocalStackSize, PPRLocalStackSize] = + determineSVEStackSizes(MF, AssignObjectOffsets::No); + int64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize; int64_t SVEStackSize = - alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16); + alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16); bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize; // The CSR spill slots have not been allocated yet, so estimateStackSize @@ -4128,7 +4213,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // instructions. AFI->setCalleeSavedStackSize(AlignedCSStackSize); AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize); - AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16)); + AFI->setSVECalleeSavedStackSize(ZPRCSStackSize, alignTo(PPRCSStackSize, 16)); } bool AArch64FrameLowering::assignCalleeSavedSpillSlots( @@ -4283,7 +4368,6 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, assert((Max == std::numeric_limits::min() || Max + 1 == CS.getFrameIdx()) && "SVE CalleeSaves are not consecutive"); - Min = std::min(Min, CS.getFrameIdx()); Max = std::max(Max, CS.getFrameIdx()); } @@ -4291,15 +4375,21 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, return Min != std::numeric_limits::max(); } -// Process all the SVE stack objects and determine offsets for each -// object. If AssignOffsets is true, the offsets get assigned. -// Fills in the first and last callee-saved frame indices into -// Min/MaxCSFrameIndex, respectively. -// Returns the size of the stack. -static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, - int &MinCSFrameIndex, - int &MaxCSFrameIndex, - bool AssignOffsets) { +static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, + AssignObjectOffsets AssignOffsets, + bool SplitSVEObjects) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *AFI = MF.getInfo(); + + SVEStackSizes SVEStack{}; + + // With SplitSVEObjects we maintain separate stack offsets for predicates + // (PPRs) and SVE vectors (ZPRs). When SplitSVEObjects is disabled predicates + // are included in the SVE vector area. + uint64_t &ZPRStackTop = SVEStack.ZPRStackSize; + uint64_t &PPRStackTop = + SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize; + #ifndef NDEBUG // First process all fixed stack objects. for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) @@ -4308,26 +4398,33 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, "reference."); #endif - auto Assign = [&MFI](int FI, int64_t Offset) { + auto StackForObject = [&](int FI, uint64_t &ZPRStackTop, + uint64_t &PPRStackTop) -> uint64_t & { + return MFI.getStackID(FI) == TargetStackID::ScalableVector ? ZPRStackTop + : PPRStackTop; + }; + + auto Assign = [&MFI, AssignOffsets](int FI, int64_t Offset) { + if (AssignOffsets == AssignObjectOffsets::No) + return; LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); MFI.setObjectOffset(FI, Offset); }; - int64_t Offset = 0; - // Then process all callee saved slots. + int MinCSFrameIndex, MaxCSFrameIndex; if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { - // Assign offsets to the callee save slots. - for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) { - Offset += MFI.getObjectSize(I); - Offset = alignTo(Offset, MFI.getObjectAlign(I)); - if (AssignOffsets) - Assign(I, -Offset); + for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI) { + uint64_t &StackTop = StackForObject(FI, ZPRStackTop, PPRStackTop); + StackTop += MFI.getObjectSize(FI); + StackTop = alignTo(StackTop, MFI.getObjectAlign(FI)); + Assign(FI, -int64_t(StackTop)); } } - // Ensure that the Callee-save area is aligned to 16bytes. - Offset = alignTo(Offset, Align(16U)); + // Ensure the CS area is 16-byte aligned. + PPRStackTop = alignTo(PPRStackTop, Align(16U)); + ZPRStackTop = alignTo(ZPRStackTop, Align(16U)); // Create a buffer of SVE objects to allocate and sort it. SmallVector ObjectsToAllocate; @@ -4337,20 +4434,21 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, int StackProtectorFI = -1; if (MFI.hasStackProtectorIndex()) { StackProtectorFI = MFI.getStackProtectorIndex(); - if (MFI.isScalableStackID(StackProtectorFI)) + if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector) ObjectsToAllocate.push_back(StackProtectorFI); } - for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { - if (!MFI.isScalableStackID(I)) - continue; - if (I == StackProtectorFI) + + for (int FI = 0, E = MFI.getObjectIndexEnd(); FI != E; ++FI) { + if (FI == StackProtectorFI || MFI.isDeadObjectIndex(FI)) continue; - if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex) + if (MaxCSFrameIndex >= FI && FI >= MinCSFrameIndex) continue; - if (MFI.isDeadObjectIndex(I)) + + if (MFI.getStackID(FI) != TargetStackID::ScalableVector && + MFI.getStackID(FI) != TargetStackID::ScalablePredicateVector) continue; - ObjectsToAllocate.push_back(I); + ObjectsToAllocate.push_back(FI); } // Allocate all SVE locals and spills @@ -4363,24 +4461,18 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, report_fatal_error( "Alignment of scalable vectors > 16 bytes is not yet supported"); - Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment); - if (AssignOffsets) - Assign(FI, -Offset); + uint64_t &StackTop = StackForObject(FI, ZPRStackTop, PPRStackTop); + StackTop = alignTo(StackTop + MFI.getObjectSize(FI), Alignment); + Assign(FI, -int64_t(StackTop)); } - return Offset; -} + PPRStackTop = alignTo(PPRStackTop, Align(16U)); + ZPRStackTop = alignTo(ZPRStackTop, Align(16U)); -int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets( - MachineFrameInfo &MFI) const { - int MinCSFrameIndex, MaxCSFrameIndex; - return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false); -} + if (AssignOffsets == AssignObjectOffsets::Yes) + AFI->setStackSizeSVE(SVEStack.ZPRStackSize, SVEStack.PPRStackSize); -int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( - MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const { - return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, - true); + return SVEStack; } /// Attempts to scavenge a register from \p ScavengeableRegs given the used @@ -4694,12 +4786,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && "Upwards growing stack unsupported"); - int MinCSFrameIndex, MaxCSFrameIndex; - int64_t SVEStackSize = - assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex); - - AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U)); - AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex); + (void)determineSVEStackSizes(MF, AssignObjectOffsets::Yes); // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. @@ -5249,7 +5336,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( // Go to common code if we cannot provide sp + offset. if (MFI.hasVarSizedObjects() || - MF.getInfo()->getStackSizeSVE() || + MF.getInfo()->hasSVEStackSize() || MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF)) return getFrameIndexReference(MF, FI, FrameReg); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 6892be4d97b26..971a3c116027d 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -19,6 +19,11 @@ namespace llvm { +struct SVEStackSizes { + uint64_t ZPRStackSize{0}; + uint64_t PPRStackSize{0}; +}; + class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() @@ -147,10 +152,6 @@ class AArch64FrameLowering : public TargetFrameLowering { bool shouldCombineCSRLocalStackBump(MachineFunction &MF, uint64_t StackBumpBytes) const; - int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const; - int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF, - int &MinCSFrameIndex, - int &MaxCSFrameIndex) const; bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, uint64_t StackBumpBytes) const; void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB, @@ -166,6 +167,7 @@ class AArch64FrameLowering : public TargetFrameLowering { int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset, bool FollowupAllocs) const; + /// Make a determination whether a Hazard slot is used and create it if /// needed. void determineStackHazardSlot(MachineFunction &MF, diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp index b4197a04840b7..e6b2e1ce8bb1c 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -23,12 +23,21 @@ using namespace llvm; +static std::optional +getSVEStackSize(const AArch64FunctionInfo &MFI, + uint64_t (AArch64FunctionInfo::*GetStackSize)() const) { + if (!MFI.hasCalculatedStackSizeSVE()) + return std::nullopt; + return (MFI.*GetStackSize)(); +} + yaml::AArch64FunctionInfo::AArch64FunctionInfo( const llvm::AArch64FunctionInfo &MFI) : HasRedZone(MFI.hasRedZone()), - StackSizeSVE(MFI.hasCalculatedStackSizeSVE() - ? std::optional(MFI.getStackSizeSVE()) - : std::nullopt) {} + StackSizeZPR( + getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizeZPR)), + StackSizePPR( + getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)) {} void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) { MappingTraits::mapping(YamlIO, *this); @@ -38,8 +47,9 @@ void AArch64FunctionInfo::initializeBaseYamlFields( const yaml::AArch64FunctionInfo &YamlMFI) { if (YamlMFI.HasRedZone) HasRedZone = YamlMFI.HasRedZone; - if (YamlMFI.StackSizeSVE) - setStackSizeSVE(*YamlMFI.StackSizeSVE); + if (YamlMFI.StackSizeZPR || YamlMFI.StackSizePPR) + setStackSizeSVE(YamlMFI.StackSizeZPR.value_or(0), + YamlMFI.StackSizePPR.value_or(0)); } static std::pair GetSignReturnAddress(const Function &F) { diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 800787cc0b4f5..4b0f5cdf4d4ff 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -74,13 +74,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// Amount of stack frame size, not including callee-saved registers. uint64_t LocalStackSize = 0; - /// The start and end frame indices for the SVE callee saves. - int MinSVECSFrameIndex = 0; - int MaxSVECSFrameIndex = 0; - /// Amount of stack frame size used for saving callee-saved registers. unsigned CalleeSavedStackSize = 0; - unsigned SVECalleeSavedStackSize = 0; + unsigned ZPRCalleeSavedStackSize = 0; + unsigned PPRCalleeSavedStackSize = 0; bool HasCalleeSavedStackSize = false; bool HasSVECalleeSavedStackSize = false; @@ -137,9 +134,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// SVE stack size (for predicates and data vectors) are maintained here /// rather than in FrameInfo, as the placement and Stack IDs are target /// specific. - uint64_t StackSizeSVE = 0; + uint64_t StackSizeZPR = 0; + uint64_t StackSizePPR = 0; - /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid. + /// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid. bool HasCalculatedStackSizeSVE = false; /// Has a value when it is known whether or not the function uses a @@ -300,16 +298,25 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { TailCallReservedStack = bytes; } - bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; } - - void setStackSizeSVE(uint64_t S) { + void setStackSizeSVE(uint64_t ZPR, uint64_t PPR) { + StackSizeZPR = ZPR; + StackSizePPR = PPR; HasCalculatedStackSizeSVE = true; - StackSizeSVE = S; } - uint64_t getStackSizeSVE() const { + uint64_t getStackSizeZPR() const { assert(hasCalculatedStackSizeSVE()); - return StackSizeSVE; + return StackSizeZPR; + } + uint64_t getStackSizePPR() const { + assert(hasCalculatedStackSizeSVE()); + return StackSizePPR; + } + + bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; } + + bool hasSVEStackSize() const { + return getStackSizeZPR() > 0 || getStackSizePPR() > 0; } bool hasStackFrame() const { return HasStackFrame; } @@ -402,23 +409,25 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { } // Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes' - void setSVECalleeSavedStackSize(unsigned Size) { - SVECalleeSavedStackSize = Size; + void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR) { + ZPRCalleeSavedStackSize = ZPR; + PPRCalleeSavedStackSize = PPR; HasSVECalleeSavedStackSize = true; } - unsigned getSVECalleeSavedStackSize() const { + unsigned getZPRCalleeSavedStackSize() const { assert(HasSVECalleeSavedStackSize && - "SVECalleeSavedStackSize has not been calculated"); - return SVECalleeSavedStackSize; + "ZPRCalleeSavedStackSize has not been calculated"); + return ZPRCalleeSavedStackSize; } - - void setMinMaxSVECSFrameIndex(int Min, int Max) { - MinSVECSFrameIndex = Min; - MaxSVECSFrameIndex = Max; + unsigned getPPRCalleeSavedStackSize() const { + assert(HasSVECalleeSavedStackSize && + "PPRCalleeSavedStackSize has not been calculated"); + return PPRCalleeSavedStackSize; } - int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; } - int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; } + unsigned getSVECalleeSavedStackSize() const { + return getZPRCalleeSavedStackSize() + getPPRCalleeSavedStackSize(); + } void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; } unsigned getNumLocalDynamicTLSAccesses() const { @@ -599,7 +608,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { namespace yaml { struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo { std::optional HasRedZone; - std::optional StackSizeSVE; + std::optional StackSizeZPR; + std::optional StackSizePPR; AArch64FunctionInfo() = default; AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI); @@ -611,7 +621,8 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo { template <> struct MappingTraits { static void mapping(IO &YamlIO, AArch64FunctionInfo &MFI) { YamlIO.mapOptional("hasRedZone", MFI.HasRedZone); - YamlIO.mapOptional("stackSizeSVE", MFI.StackSizeSVE); + YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR); + YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR); } }; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index dd23bf51a98c4..71368413744e5 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -644,7 +644,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { if (ST.hasSVE() || ST.isStreaming()) { // Frames that have variable sized objects and scalable SVE objects, // should always use a basepointer. - if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE()) + if (!AFI->hasCalculatedStackSizeSVE() || AFI->hasSVEStackSize()) return true; } @@ -784,7 +784,7 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { assert((!MF.getSubtarget().hasSVE() || AFI->hasCalculatedStackSizeSVE()) && "Expected SVE area to be calculated by this point"); - return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() && + return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->hasSVEStackSize() && !AFI->hasStackHazardSlotIndex(); } diff --git a/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir b/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir index 5d644c3e5416c..718fa6f4c4cb1 100644 --- a/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir +++ b/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir @@ -366,7 +366,8 @@ frameInfo: maxCallFrameSize: 0 localFrameSize: 144 machineFunctionInfo: - stackSizeSVE: 0 + stackSizeZPR: 0 + stackSizePPR: 0 stack: - { id: 0, name: StackGuardSlot, offset: -40, size: 8, alignment: 8, stack-id: default, local-offset: -8 } diff --git a/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir b/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir index 013d93378a204..b7a9892f13977 100644 --- a/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir +++ b/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir @@ -69,7 +69,8 @@ frameInfo: hasCalls: true maxCallFrameSize: 0 machineFunctionInfo: - stackSizeSVE: 0 + stackSizeZPR: 0 + stackSizePPR: 0 stack: - { id: 0, type: spill-slot, offset: -20, size: 4, alignment: 4, stack-id: default } - { id: 1, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,