@@ -330,13 +330,36 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
330
330
331
331
static bool produceCompactUnwindFrame (MachineFunction &MF);
332
332
static bool needsWinCFI (const MachineFunction &MF);
333
- static StackOffset getZPRStackSize (const MachineFunction &MF);
334
- static StackOffset getPPRStackSize (const MachineFunction &MF);
335
- static StackOffset getSVEStackSize (const MachineFunction &MF);
336
333
static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB,
337
334
bool HasCall = false );
338
335
static bool requiresSaveVG (const MachineFunction &MF);
339
- static bool hasSVEStackSize (const MachineFunction &MF);
336
+
337
+ static unsigned getStackHazardSize (const MachineFunction &MF) {
338
+ return MF.getSubtarget <AArch64Subtarget>().getStreamingHazardSize ();
339
+ }
340
+
341
+ // / Returns the size of the entire ZPR stackframe (calleesaves + spills).
342
+ static StackOffset getZPRStackSize (const MachineFunction &MF) {
343
+ const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
344
+ return StackOffset::getScalable (AFI->getStackSizeZPR ());
345
+ }
346
+
347
+ // / Returns the size of the entire PPR stackframe (calleesaves + spills).
348
+ static StackOffset getPPRStackSize (const MachineFunction &MF) {
349
+ const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
350
+ return StackOffset::getScalable (AFI->getStackSizePPR ());
351
+ }
352
+
353
+ // / Returns the size of the entire SVE stackframe (PPRs + ZPRs).
354
+ static StackOffset getSVEStackSize (const MachineFunction &MF) {
355
+ return getZPRStackSize (MF) + getPPRStackSize (MF);
356
+ }
357
+
358
+ // / Returns true if PPRs are spilled as ZPRs.
359
+ static bool arePPRsSpilledAsZPR (const MachineFunction &MF) {
360
+ return MF.getSubtarget ().getRegisterInfo ()->getSpillSize (
361
+ AArch64::PPRRegClass) == 16 ;
362
+ }
340
363
341
364
// / Returns true if a homogeneous prolog or epilog code can be emitted
342
365
// / for the size optimization. If possible, a frame helper call is injected.
@@ -353,8 +376,10 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
353
376
// TODO: Window is supported yet.
354
377
if (needsWinCFI (MF))
355
378
return false ;
379
+
356
380
// TODO: SVE is not supported yet.
357
- if (hasSVEStackSize (MF))
381
+ auto *AFI = MF.getInfo <AArch64FunctionInfo>();
382
+ if (AFI->hasSVEStackSize ())
358
383
return false ;
359
384
360
385
// Bail on stack adjustment needed on return for simplicity.
@@ -365,7 +390,6 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
365
390
if (Exit && getArgumentStackToRestore (MF, *Exit))
366
391
return false ;
367
392
368
- auto *AFI = MF.getInfo <AArch64FunctionInfo>();
369
393
if (AFI->hasSwiftAsyncContext () || AFI->hasStreamingModeChanges ())
370
394
return false ;
371
395
@@ -454,38 +478,6 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
454
478
}
455
479
}
456
480
457
- static unsigned getStackHazardSize (const MachineFunction &MF) {
458
- return MF.getSubtarget <AArch64Subtarget>().getStreamingHazardSize ();
459
- }
460
-
461
- // / Returns the size of the entire ZPR stackframe (calleesaves + spills).
462
- static StackOffset getZPRStackSize (const MachineFunction &MF) {
463
- const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
464
- return StackOffset::getScalable (AFI->getStackSizeZPR ());
465
- }
466
-
467
- // / Returns the size of the entire PPR stackframe (calleesaves + spills).
468
- static StackOffset getPPRStackSize (const MachineFunction &MF) {
469
- const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
470
- return StackOffset::getScalable (AFI->getStackSizePPR ());
471
- }
472
-
473
- // / Returns the size of the entire SVE stackframe (PPRs + ZPRs).
474
- static StackOffset getSVEStackSize (const MachineFunction &MF) {
475
- return getZPRStackSize (MF) + getPPRStackSize (MF);
476
- }
477
-
478
- static bool hasSVEStackSize (const MachineFunction &MF) {
479
- const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
480
- return AFI->getStackSizeZPR () > 0 || AFI->getStackSizePPR () > 0 ;
481
- }
482
-
483
- // / Returns true if PPRs are spilled as ZPRs.
484
- static bool arePPRsSpilledAsZPR (const MachineFunction &MF) {
485
- return MF.getSubtarget ().getRegisterInfo ()->getSpillSize (
486
- AArch64::PPRRegClass) == 16 ;
487
- }
488
-
489
481
bool AArch64FrameLowering::canUseRedZone (const MachineFunction &MF) const {
490
482
if (!EnableRedZone)
491
483
return false ;
@@ -511,7 +503,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
511
503
!Subtarget.hasSVE ();
512
504
513
505
return !(MFI.hasCalls () || hasFP (MF) || NumBytes > RedZoneSize ||
514
- hasSVEStackSize (MF ) || LowerQRegCopyThroughMem);
506
+ AFI-> hasSVEStackSize () || LowerQRegCopyThroughMem);
515
507
}
516
508
517
509
// / hasFPImpl - Return true if the specified function should have a dedicated
@@ -1190,7 +1182,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
1190
1182
1191
1183
// When there is an SVE area on the stack, always allocate the
1192
1184
// callee-saves and spills/locals separately.
1193
- if (hasSVEStackSize (MF ))
1185
+ if (AFI-> hasSVEStackSize ())
1194
1186
return false ;
1195
1187
1196
1188
return true ;
@@ -1634,8 +1626,8 @@ static bool isTargetWindows(const MachineFunction &MF) {
1634
1626
return MF.getSubtarget <AArch64Subtarget>().isTargetWindows ();
1635
1627
}
1636
1628
1637
- // Convenience function to determine whether I is an SVE callee save .
1638
- static bool IsZPRCalleeSave (MachineBasicBlock::iterator I) {
1629
+ // Convenience function to determine whether I is part of the ZPR callee saves .
1630
+ static bool isPartOfZPRCalleeSaves (MachineBasicBlock::iterator I) {
1639
1631
switch (I->getOpcode ()) {
1640
1632
default :
1641
1633
return false ;
@@ -1655,8 +1647,8 @@ static bool IsZPRCalleeSave(MachineBasicBlock::iterator I) {
1655
1647
}
1656
1648
}
1657
1649
1658
- // Convenience function to determine whether I is an SVE predicate callee save .
1659
- static bool IsPPRCalleeSave (MachineBasicBlock::iterator I) {
1650
+ // Convenience function to determine whether I is part of the PPR callee saves .
1651
+ static bool isPartOfPPRCalleeSaves (MachineBasicBlock::iterator I) {
1660
1652
switch (I->getOpcode ()) {
1661
1653
default :
1662
1654
return false ;
@@ -1667,8 +1659,9 @@ static bool IsPPRCalleeSave(MachineBasicBlock::iterator I) {
1667
1659
}
1668
1660
}
1669
1661
1670
- static bool IsSVECalleeSave (MachineBasicBlock::iterator I) {
1671
- return IsZPRCalleeSave (I) || IsPPRCalleeSave (I);
1662
+ // Convenience function to determine whether I is part of the SVE callee saves.
1663
+ static bool isPartOfSVECalleeSaves (MachineBasicBlock::iterator I) {
1664
+ return isPartOfZPRCalleeSaves (I) || isPartOfPPRCalleeSaves (I);
1672
1665
}
1673
1666
1674
1667
static void emitShadowCallStackPrologue (const TargetInstrInfo &TII,
@@ -1912,7 +1905,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
1912
1905
IsFunclet ? getWinEHFuncletFrameSize (MF) : MFI.getStackSize ();
1913
1906
if (!AFI->hasStackFrame () && !windowsRequiresStackProbe (MF, NumBytes)) {
1914
1907
assert (!HasFP && " unexpected function without stack frame but with FP" );
1915
- assert (!hasSVEStackSize (MF ) &&
1908
+ assert (!AFI-> hasSVEStackSize () &&
1916
1909
" unexpected function without stack frame but with SVE objects" );
1917
1910
// All of the stack allocation is for locals.
1918
1911
AFI->setLocalStackSize (NumBytes);
@@ -1986,14 +1979,14 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
1986
1979
NumBytes -= FixedObject;
1987
1980
1988
1981
// Now allocate space for the GPR callee saves.
1989
- while (MBBI != End && IsSVECalleeSave (MBBI))
1982
+ while (MBBI != End && isPartOfSVECalleeSaves (MBBI))
1990
1983
++MBBI;
1991
1984
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec (
1992
1985
MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize (), NeedsWinCFI,
1993
1986
&HasWinCFI, EmitAsyncCFI);
1994
1987
NumBytes -= AFI->getCalleeSavedStackSize ();
1995
1988
} else if (CombineSPBump) {
1996
- assert (!hasSVEStackSize (MF ) && " Cannot combine SP bump with SVE" );
1989
+ assert (!AFI-> hasSVEStackSize () && " Cannot combine SP bump with SVE" );
1997
1990
emitFrameOffset (MBB, MBBI, DL, AArch64::SP, AArch64::SP,
1998
1991
StackOffset::getFixed (-NumBytes), TII,
1999
1992
MachineInstr::FrameSetup, false , NeedsWinCFI, &HasWinCFI,
@@ -2014,7 +2007,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
2014
2007
// and pre-inc if we decided to combine the callee-save and local stack
2015
2008
// pointer bump above.
2016
2009
while (MBBI != End && MBBI->getFlag (MachineInstr::FrameSetup) &&
2017
- !IsSVECalleeSave (MBBI)) {
2010
+ !isPartOfSVECalleeSaves (MBBI)) {
2018
2011
if (CombineSPBump &&
2019
2012
// Only fix-up frame-setup load/store instructions.
2020
2013
(!requiresSaveVG (MF) || !isVGInstruction (MBBI)))
@@ -2278,8 +2271,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
2278
2271
<< PPRCalleeSavesSize.getScalable () << " \n " );
2279
2272
2280
2273
PPRCalleeSavesBegin = MBBI;
2281
- assert (IsPPRCalleeSave (PPRCalleeSavesBegin) && " Unexpected instruction" );
2282
- while (IsPPRCalleeSave (MBBI) && MBBI != MBB.getFirstTerminator ())
2274
+ assert (isPartOfPPRCalleeSaves (PPRCalleeSavesBegin) &&
2275
+ " Unexpected instruction" );
2276
+ while (isPartOfPPRCalleeSaves (MBBI) && MBBI != MBB.getFirstTerminator ())
2283
2277
++MBBI;
2284
2278
PPRCalleeSavesEnd = MBBI;
2285
2279
}
@@ -2288,8 +2282,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
2288
2282
LLVM_DEBUG (dbgs () << " ZPRCalleeSavedStackSize = "
2289
2283
<< ZPRCalleeSavesSize.getScalable () << " \n " );
2290
2284
ZPRCalleeSavesBegin = MBBI;
2291
- assert (IsZPRCalleeSave (ZPRCalleeSavesBegin) && " Unexpected instruction" );
2292
- while (IsZPRCalleeSave (MBBI) && MBBI != MBB.getFirstTerminator ())
2285
+ assert (isPartOfZPRCalleeSaves (ZPRCalleeSavesBegin) &&
2286
+ " Unexpected instruction" );
2287
+ while (isPartOfZPRCalleeSaves (MBBI) && MBBI != MBB.getFirstTerminator ())
2293
2288
++MBBI;
2294
2289
ZPRCalleeSavesEnd = MBBI;
2295
2290
}
@@ -2523,7 +2518,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2523
2518
while (LastPopI != Begin) {
2524
2519
--LastPopI;
2525
2520
if (!LastPopI->getFlag (MachineInstr::FrameDestroy) ||
2526
- (!FPAfterSVECalleeSaves && IsSVECalleeSave (LastPopI))) {
2521
+ (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves (LastPopI))) {
2527
2522
++LastPopI;
2528
2523
break ;
2529
2524
} else if (CombineSPBump)
@@ -2608,11 +2603,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2608
2603
2609
2604
RestoreBegin = std::prev (RestoreEnd);
2610
2605
while (RestoreBegin != MBB.begin () &&
2611
- IsSVECalleeSave (std::prev (RestoreBegin)))
2606
+ isPartOfSVECalleeSaves (std::prev (RestoreBegin)))
2612
2607
--RestoreBegin;
2613
2608
2614
- assert (IsSVECalleeSave (RestoreBegin) &&
2615
- IsSVECalleeSave (std::prev (RestoreEnd)) && " Unexpected instruction" );
2609
+ assert (isPartOfSVECalleeSaves (RestoreBegin) &&
2610
+ isPartOfSVECalleeSaves (std::prev (RestoreEnd)) &&
2611
+ " Unexpected instruction" );
2616
2612
2617
2613
StackOffset CalleeSavedSizeAsOffset =
2618
2614
StackOffset::getScalable (SVECalleeSavedSize);
@@ -4315,14 +4311,14 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets,
4315
4311
bool SplitSVEObjects = false ) {
4316
4312
MachineFrameInfo &MFI = MF.getFrameInfo ();
4317
4313
4318
- int64_t ZPRStack = 0 ;
4319
- int64_t PPRStack = 0 ;
4314
+ SVEStackSizes SVEStack{};
4320
4315
4321
- auto [ZPROffset, PPROffset] = [&] {
4322
- if (SplitSVEObjects)
4323
- return std::tie (ZPRStack, PPRStack);
4324
- return std::tie (ZPRStack, ZPRStack);
4325
- }();
4316
+ // With SplitSVEObjects we maintain separate stack offsets for predicates
4317
+ // (PPRs) and SVE vectors (ZPRs). When SplitSVEObjects is disabled predicates
4318
+ // are included in the SVE vector area.
4319
+ int64_t &ZPROffset = SVEStack.ZPRStackSize ;
4320
+ int64_t &PPROffset =
4321
+ SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize ;
4326
4322
4327
4323
#ifndef NDEBUG
4328
4324
// First process all fixed stack objects.
@@ -4404,14 +4400,7 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets,
4404
4400
4405
4401
PPROffset = alignTo (PPROffset, Align (16U ));
4406
4402
ZPROffset = alignTo (ZPROffset, Align (16U ));
4407
-
4408
- if (&ZPROffset != &PPROffset) {
4409
- // SplitSVEObjects (PPRs and ZPRs allocated to separate areas).
4410
- return SVEStackSizes{ZPROffset, PPROffset};
4411
- }
4412
- // When SplitSVEObjects is disabled just attribute all the stack to ZPRs.
4413
- // Determining the split is not necessary.
4414
- return SVEStackSizes{ZPROffset, 0 };
4403
+ return SVEStack;
4415
4404
}
4416
4405
4417
4406
SVEStackSizes
@@ -4736,8 +4725,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
4736
4725
" Upwards growing stack unsupported" );
4737
4726
4738
4727
auto [ZPRStackSize, PPRStackSize] = assignSVEStackObjectOffsets (MF);
4739
- AFI->setStackSizeZPR (ZPRStackSize);
4740
- AFI->setStackSizePPR (PPRStackSize);
4728
+ AFI->setStackSizeSVE (ZPRStackSize, PPRStackSize);
4741
4729
4742
4730
// If this function isn't doing Win64-style C++ EH, we don't need to do
4743
4731
// anything.
@@ -5269,7 +5257,8 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
5269
5257
}
5270
5258
5271
5259
// Go to common code if we cannot provide sp + offset.
5272
- if (MFI.hasVarSizedObjects () || hasSVEStackSize (MF) ||
5260
+ if (MFI.hasVarSizedObjects () ||
5261
+ MF.getInfo <AArch64FunctionInfo>()->hasSVEStackSize () ||
5273
5262
MF.getSubtarget ().getRegisterInfo ()->hasStackRealignment (MF))
5274
5263
return getFrameIndexReference (MF, FI, FrameReg);
5275
5264
0 commit comments