diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1643,8 +1643,16 @@ if (EmitCFI) emitCalleeSavedGPRLocations(MBB, MBBI); - if (windowsRequiresStackProbe(MF, NumBytes)) { - uint64_t NumWords = NumBytes >> 4; + // Alignment is required for the parent frame, not the funclet + const bool NeedsRealignment = + NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF); + int64_t RealignmentPadding = + (NeedsRealignment && MFI.getMaxAlign() > Align(16)) + ? MFI.getMaxAlign().value() - 16 + : 0; + + if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) { + uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4; if (NeedsWinCFI) { HasWinCFI = true; // alloc_l can hold at most 256MB, so assume that NumBytes doesn't @@ -1736,6 +1744,23 @@ .setMIFlag(MachineInstr::FrameSetup); } NumBytes = 0; + + if (RealignmentPadding > 0) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15) + .addReg(AArch64::SP) + .addImm(RealignmentPadding) + .addImm(0); + + uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) + .addReg(AArch64::X15, RegState::Kill) + .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)); + AFI->setStackRealigned(true); + + // No need for SEH instructions here; if we're realigning the stack, + // we've set a frame pointer and already finished the SEH prologue. + assert(!NeedsWinCFI); + } } StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {}; @@ -1774,9 +1799,6 @@ // Allocate space for the rest of the frame. if (NumBytes) { - // Alignment is required for the parent frame, not the funclet - const bool NeedsRealignment = - !IsFunclet && RegInfo->hasStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NeedsRealignment) { diff --git a/llvm/test/CodeGen/AArch64/win-align-chkstk.ll b/llvm/test/CodeGen/AArch64/win-align-chkstk.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/win-align-chkstk.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=aarch64-windows | FileCheck %s + +define dso_local void @func() { +entry: + %buf = alloca [8192 x i8], align 32 + %arraydecay = getelementptr inbounds [8192 x i8], ptr %buf, i64 0, i64 0 + call void @other(ptr noundef %arraydecay) + ret void +} + +declare dso_local void @other(ptr noundef) + +; CHECK-LABEL: func: +; CHECK-NEXT: .seh_proc func +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: str x28, [sp, #-32]! +; CHECK-NEXT: .seh_save_reg_x x28, 32 +; CHECK-NEXT: stp x29, x30, [sp, #8] +; CHECK-NEXT: .seh_save_fplr 8 +; CHECK-NEXT: add x29, sp, #8 +; CHECK-NEXT: .seh_add_fp 8 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: mov x15, #513 +; CHECK-NEXT: bl __chkstk +; CHECK-NEXT: sub sp, sp, x15, lsl #4 +; CHECK-NEXT: add x15, sp, #16 +; CHECK-NEXT: and sp, x15, #0xffffffffffffffe0 diff --git a/llvm/test/CodeGen/AArch64/win-realign.ll b/llvm/test/CodeGen/AArch64/win-realign.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/win-realign.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-windows | FileCheck %s + +define dso_local void @func() #0 { +; CHECK-LABEL: func: +; CHECK: .seh_proc func +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr_x 16 +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .seh_set_fp +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr_x 16 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +entry: + ret void +} + +attributes #0 = { uwtable "frame-pointer"="none" "stackrealign" }