diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1496,7 +1496,15 @@ // function, including the funclet. int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); - if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { + + // Alignment is required for the parent frame, not the funclet + const bool NeedsRealignment = + NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF); + int64_t RealignmentPadding = + NeedsRealignment ? MFI.getMaxAlign().value() - 16 : 0; + + if (!AFI->hasStackFrame() && + !windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) { assert(!HasFP && "unexpected function without stack frame but with FP"); assert(!SVEStackSize && "unexpected function without stack frame but with SVE objects"); @@ -1638,8 +1646,8 @@ if (EmitCFI) emitCalleeSavedGPRLocations(MBB, MBBI); - if (windowsRequiresStackProbe(MF, NumBytes)) { - uint64_t NumWords = NumBytes >> 4; + if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) { + uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4; if (NeedsWinCFI) { HasWinCFI = true; // alloc_l can hold at most 256MB, so assume that NumBytes doesn't @@ -1731,6 +1739,23 @@ .setMIFlag(MachineInstr::FrameSetup); } NumBytes = 0; + + if (NeedsRealignment) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15) + .addReg(AArch64::SP) + .addImm(RealignmentPadding) + .addImm(0); + + uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) + .addReg(AArch64::X15, RegState::Kill) + .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)); + AFI->setStackRealigned(true); + + // No need for SEH instructions here; if we're realigning the stack, + // we've set a frame pointer and already finished the SEH prologue. + assert(!NeedsWinCFI); + } } StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {}; @@ -1769,9 +1794,6 @@ // Allocate space for the rest of the frame. if (NumBytes) { - // Alignment is required for the parent frame, not the funclet - const bool NeedsRealignment = - !IsFunclet && RegInfo->hasStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NeedsRealignment) { diff --git a/llvm/test/CodeGen/AArch64/win-align-chkstk.ll b/llvm/test/CodeGen/AArch64/win-align-chkstk.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/win-align-chkstk.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=aarch64-windows | FileCheck %s + +define dso_local void @func() { +entry: + %buf = alloca [8192 x i8], align 32 + %arraydecay = getelementptr inbounds [8192 x i8], ptr %buf, i64 0, i64 0 + call void @other(ptr noundef %arraydecay) + ret void +} + +declare dso_local void @other(ptr noundef) + +; CHECK-LABEL: func: +; CHECK-NEXT: .seh_proc func +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: str x28, [sp, #-32]! +; CHECK-NEXT: .seh_save_reg_x x28, 32 +; CHECK-NEXT: stp x29, x30, [sp, #8] +; CHECK-NEXT: .seh_save_fplr 8 +; CHECK-NEXT: add x29, sp, #8 +; CHECK-NEXT: .seh_add_fp 8 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: mov x15, #513 +; CHECK-NEXT: bl __chkstk +; CHECK-NEXT: sub sp, sp, x15, lsl #4 +; CHECK-NEXT: add x15, sp, #16 +; CHECK-NEXT: and sp, x15, #0xffffffffffffffe0