Index: llvm/lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -105,11 +105,12 @@ } } + int64_t estimateSVEStackObjectOffsets(const MachineFrameInfo &MF) const; + private: bool shouldCombineCSRLocalStackBump(MachineFunction &MF, uint64_t StackBumpBytes) const; - int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const; int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF, int &MinCSFrameIndex, int &MaxCSFrameIndex) const; Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -154,6 +154,7 @@ #include "llvm/Target/TargetOptions.h" #include #include +#include #include #include @@ -2586,15 +2587,18 @@ return Min != std::numeric_limits::max(); } -// Process all the SVE stack objects and determine offsets for each -// object. If AssignOffsets is true, the offsets get assigned. -// Fills in the first and last callee-saved frame indices into -// Min/MaxCSFrameIndex, respectively. -// Returns the size of the stack. -static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, - int &MinCSFrameIndex, - int &MaxCSFrameIndex, - bool AssignOffsets) { +/// Process all the SVE stack objects and determine offsets for each +/// object. \p AssignFn can be used to assign the object offsets, e.g. +/// using `MF.setObjectOffset()`, similarly AlignFn can be used to +/// set the alignment of an object. +/// This methods returns the size of the stack, and also returns the frame +/// indices of the first- and last callee-saves in MinCSFrameIndex and +/// MaxCSFrameIndex respectively. +static int64_t +determineSVEStackObjectOffsets(const MachineFrameInfo &MFI, + int &MinCSFrameIndex, int &MaxCSFrameIndex, + std::function AssignFn, + std::function AlignFn) { // First process all fixed stack objects. int64_t Offset = 0; for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) @@ -2604,22 +2608,16 @@ Offset = FixedOffset; } - auto Assign = [&MFI](int FI, int64_t Offset) { - LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); - MFI.setObjectOffset(FI, Offset); - }; - // Then process all callee saved slots. if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { // Make sure to align the last callee save slot. - MFI.setObjectAlignment(MaxCSFrameIndex, Align(16)); + AlignFn(MaxCSFrameIndex, Align(16U)); // Assign offsets to the callee save slots. for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) { Offset += MFI.getObjectSize(I); Offset = alignTo(Offset, MFI.getObjectAlign(I)); - if (AssignOffsets) - Assign(I, -Offset); + AssignFn(I, -Offset); } } @@ -2648,23 +2646,32 @@ "Alignment of scalable vectors > 16 bytes is not yet supported"); Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment); - if (AssignOffsets) - Assign(FI, -Offset); + AssignFn(FI, -Offset); } return Offset; } int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets( - MachineFrameInfo &MFI) const { + const MachineFrameInfo &MFI) const { int MinCSFrameIndex, MaxCSFrameIndex; - return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false); + return determineSVEStackObjectOffsets( + MFI, MinCSFrameIndex, MaxCSFrameIndex, + /* AssignFn */ [](int FI, int64_t Offset) {}, + /* AlignFn */ [](int FI, Align A) {}); } int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const { + auto AssignFn = [&MFI](int FI, int64_t Offset) { + LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); + MFI.setObjectOffset(FI, Offset); + }; + auto AlignFn = [&MFI](int FI, Align A) { + MFI.setObjectAlignment(FI, A); + }; return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, - true); + AssignFn, AlignFn); } void AArch64FrameLowering::processFunctionBeforeFrameFinalized( Index: llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -343,6 +343,15 @@ if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) { if (needsStackRealignment(MF)) return true; + + if (MF.getSubtarget().hasSVE()) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + // Frames that have variable sized objects and scalable SVE objects, + // should always use a basepointer. + if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE()) + return true; + } + // Conservatively estimate whether the negative offset from the frame // pointer will be sufficient to reach. If a function has a smallish // frame, it's less likely to have lots of spills and callee saved @@ -379,8 +388,9 @@ // (closer to SP). // // The beginning works most reliably if we have a frame pointer. + // In the presence of SVE however, it is better to use SP or BP. const AArch64FrameLowering &TFI = *getFrameLowering(MF); - return TFI.hasFP(MF); + return TFI.hasFP(MF) && !TFI.estimateSVEStackObjectOffsets(MF.getFrameInfo()); } bool AArch64RegisterInfo::requiresFrameIndexScavenging( Index: llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir @@ -0,0 +1,23 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog -mattr=+sve %s -o - | FileCheck %s +--- +# This test verifies that the basepointer is available in presence of SVE stack objects. +name: hasBasepointer +# CHECK-LABEL: name: hasBasepointer +# CHECK: bb.0: +# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: $x19 = ADDXri $sp, 0, 0 +# CHECK: STRXui $x0, $x19, 0 +tracksRegLiveness: true +frameInfo: + isFrameAddressTaken: true +stack: + - { id: 0, type: variable-sized, alignment: 1 } + - { id: 1, name: '', size: 16, alignment: 8 } + - { id: 2, stack-id: sve-vec, size: 16, alignment: 16 } +body: | + bb.0: + liveins: $x0 + STRXui $x0, %stack.1, 0 + RET_ReallyLR +... Index: llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir @@ -0,0 +1,28 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog -mattr=+sve %s -o - | FileCheck %s +--- +# This test verifies that the emergency scavenging slot is located near the SP/BP. +name: LateScavengingSlot +# CHECK-LABEL: name: LateScavengingSlot +# CHECK: bb.0: +# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 8, 12 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 1 +# CHECK-NEXT: $[[SCRATCH]] = ADDVL_XXI $fp, -1 +# CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 0 +# CHECK: bb.1: +tracksRegLiveness: true +frameInfo: + isFrameAddressTaken: true +stack: + - { id: 0, name: '', size: 32761, alignment: 8 } + - { id: 1, stack-id: sve-vec, size: 16, alignment: 16 } +body: | + bb.0: + liveins: $x0, $x8 + STRXui $x0, %stack.1, 0 + B %bb.1 + bb.1: + liveins: $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr + RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr +... Index: llvm/test/CodeGen/AArch64/framelayout-sve.mir =================================================================== --- llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s +# RUN: llc -mattr=+sve -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s # # Test allocation and deallocation of SVE objects on the stack, # as well as using a combination of scalable and non-scalable @@ -30,7 +30,7 @@ define void @test_address_sve_fp() nounwind { entry: unreachable } define void @test_stack_arg_sve() nounwind { entry: unreachable } define void @test_address_sve_out_of_range() nounwind { entry: unreachable } - define void @test_address_gpr_vla_nobp() nounwind { entry: unreachable } + define void @test_address_gpr_vla() nounwind { entry: unreachable } define aarch64_sve_vector_pcs void @save_restore_pregs_sve() nounwind { entry: unreachable } define aarch64_sve_vector_pcs void @save_restore_zregs_sve() nounwind { entry: unreachable } define aarch64_sve_vector_pcs void @save_restore_sve() nounwind { entry: unreachable } @@ -335,23 +335,23 @@ RET_ReallyLR --- ... -# Test that non-SVE objects are accessed from FP when there is no BP, -# but the SP cannot be used because of variable-length arrays. +# Test that non-SVE objects are accessed from BP when there are +# variable length arrays, because it will be more expensive to +# access from the FP when there are also SVE objects on the stack. # # +----------+ <- FP # | %fstack.0| // 16 scalable bytes # +----------+ <- @FP - 16 scalable bytes # | %stack.0 | // 16 bytes -# +----------+ <- @FP - 16 scalable bytes - 16b +# +----------+ <- @BP # : %stack.1 : // variable length # +----------+ <- SP -# CHECK-LABEL: name: test_address_gpr_vla_nobp -# CHECK: bb.0.entry: -# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $fp, -1 -# CHECK-NEXT: STURXi $xzr, killed $[[TMP]], -16 -# CHECK: RET_ReallyLR -name: test_address_gpr_vla_nobp +# CHECK-LABEL: name: test_address_gpr_vla +# CHECK: bb.0.entry: +# CHECK: STRXui $xzr, $x19, 0 +# CHECK: RET_ReallyLR +name: test_address_gpr_vla frameInfo: maxAlignment: 16 fixedStack: