diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1757,6 +1757,7 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo(); DebugLoc DL; bool NeedsWinCFI = needsWinCFI(MF); bool EmitCFI = MF.getInfo()->needsAsyncDwarfUnwindInfo(); @@ -1909,7 +1910,6 @@ // When we are about to restore the CSRs, the CFA register is SP again. if (EmitCFI && hasFP(MF)) { - const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo(); unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, NumBytes)); @@ -1953,6 +1953,15 @@ // Deallocate the SVE area. if (SVEStackSize) { + // If we have a base pointer and SVE callee saves we must restore the stack + // pointer from the base pointer, we need to explicitly deallocate variable + // length stack allocations in order to correctly restore the SVE CSRs. + if (!IsFunclet && RegInfo.hasBasePointer(MF) && + AFI->getSVECalleeSavedStackSize()) + emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, + RegInfo.getBaseRegister(), StackOffset::getFixed(0), TII, + MachineInstr::FrameDestroy); + if (AFI->isStackRealigned()) { if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { // Set SP to start of SVE callee-save area from which they can diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll --- a/llvm/test/CodeGen/AArch64/sve-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll @@ -66,6 +66,7 @@ ; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: bl bar +; CHECK-NEXT: mov sp, x19 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload