Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -579,6 +579,12 @@ !F.hasFnAttribute("no-stack-arg-probe"); } +static bool needsWinCFI(const MachineFunction &MF) { + const Function &F = MF.getFunction(); + return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + F.needsUnwindTableEntry(); +} + bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( MachineFunction &MF, uint64_t StackBumpBytes) const { AArch64FunctionInfo *AFI = MF.getInfo(); @@ -589,6 +595,18 @@ if (AFI->getLocalStackSize() == 0) return false; + // For WinCFI, if optimizing for size, prefer to not combine the stack bump + // (to force a stp with predecrement) to match the packed unwind format, + // provided that there actually are any callee saved registers to merge the + // decrement with. + // This is potentially marginally slower, but allows using the packed + // unwind format for functions that both have a local area and callee saved + // registers. Using the packed unwind format notably reduces the size of + // the unwind info. + if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 && + MF.getFunction().hasOptSize()) + return false; + // 512 is the maximum immediate for stp/ldp that will be used for // callee-save save/restores if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes)) @@ -982,12 +1000,6 @@ // } -static bool needsWinCFI(const MachineFunction &MF) { - const Function &F = MF.getFunction(); - return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && - F.needsUnwindTableEntry(); -} - static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } Index: llvm/test/CodeGen/AArch64/wineh-frame-predecrement.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/wineh-frame-predecrement.mir @@ -0,0 +1,70 @@ +# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \ +# RUN: -stop-after=prologepilog | FileCheck %s + +# Check that the callee-saved registers are saved starting with a STP +# with predecrement, followed by a separate stack adjustment later, +# if the optsize attribute is set. + +# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -2 +# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -16 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: frame-setup SEH_StackAlloc 16 +# CHECK-NEXT: frame-setup SEH_PrologEnd + +--- | + + define dso_local i32 @func(i32 %a) optsize { ret i32 %a } + +... +--- +name: func +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 4 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x19, $x20 + + renamable $x8 = ADDXri %stack.0, 0, 0 + $x19 = ADDXrr $x0, $x8 + $x20 = ADDXrr $x19, $x0 + $x0 = ADDXrr $x0, killed $x20 + + RET_ReallyLR + +...