diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1501,7 +1501,14 @@ // function, including the funclet. int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); - if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { + + // Alignment is required for the parent frame, not the funclet + const bool NeedsRealignment = + NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF); + int64_t RealignmentBytes = NeedsRealignment ? MFI.getMaxAlign().value() : 0; + + if (!AFI->hasStackFrame() && + !windowsRequiresStackProbe(MF, NumBytes + RealignmentBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); assert(!SVEStackSize && "unexpected function without stack frame but with SVE objects"); @@ -1643,8 +1650,8 @@ if (EmitCFI) emitCalleeSavedGPRLocations(MBB, MBBI); - if (windowsRequiresStackProbe(MF, NumBytes)) { - uint64_t NumWords = NumBytes >> 4; + if (windowsRequiresStackProbe(MF, NumBytes + RealignmentBytes)) { + uint64_t NumWords = (NumBytes + RealignmentBytes) >> 4; if (NeedsWinCFI) { HasWinCFI = true; // alloc_l can hold at most 256MB, so assume that NumBytes doesn't @@ -1736,6 +1743,28 @@ .setMIFlag(MachineInstr::FrameSetup); } NumBytes = 0; + + if (NeedsRealignment) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15) + .addReg(AArch64::SP) + .addImm(RealignmentBytes - 1) + .addImm(0); + + const unsigned NrBitsToZero = Log2(MFI.getMaxAlign()); + assert(NrBitsToZero > 1); + + // AND SP, X9, 0b11111...0000 + // The logical immediates have a non-trivial encoding. The following + // formula computes the encoded immediate with all ones but + // NrBitsToZero zero bits as least significant bits. + uint32_t andMaskEncoded = (1 << 12) // = N + | ((64 - NrBitsToZero) << 6) // immr + | ((64 - NrBitsToZero - 1) << 0); // imms + + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) + .addReg(AArch64::X15, RegState::Kill) + .addImm(andMaskEncoded); + } } StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {}; @@ -1774,9 +1803,6 @@ // Allocate space for the rest of the frame. if (NumBytes) { - // Alignment is required for the parent frame, not the funclet - const bool NeedsRealignment = - !IsFunclet && RegInfo->hasStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NeedsRealignment) { diff --git a/llvm/test/CodeGen/AArch64/win-align-chkstk.ll b/llvm/test/CodeGen/AArch64/win-align-chkstk.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/win-align-chkstk.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=aarch64-windows | FileCheck %s + +define dso_local void @func() { +entry: + %buf = alloca [8192 x i8], align 32 + %arraydecay = getelementptr inbounds [8192 x i8], ptr %buf, i64 0, i64 0 + call void @other(ptr noundef %arraydecay) + ret void +} + +declare dso_local void @other(ptr noundef) + +; CHECK-LABEL: func: +; CHECK-NEXT: .seh_proc func +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: str x28, [sp, #-32]! +; CHECK-NEXT: .seh_save_reg_x x28, 32 +; CHECK-NEXT: stp x29, x30, [sp, #8] +; CHECK-NEXT: .seh_save_fplr 8 +; CHECK-NEXT: add x29, sp, #8 +; CHECK-NEXT: .seh_add_fp 8 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: mov x15, #514 +; CHECK-NEXT: bl __chkstk +; CHECK-NEXT: sub sp, sp, x15, lsl #4 +; CHECK-NEXT: add x15, sp, #31 +; CHECK-NEXT: and sp, x15, #0xffffffffffffffe0