diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -26,6 +26,8 @@ class X86RegisterInfo; class X86FrameLowering : public TargetFrameLowering { + class FrameBuilder; + friend class FrameBuilder; public: X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride); @@ -251,14 +253,14 @@ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int Offset) const; + unsigned getPSPSlotOffsetFromSP(const MachineFunction &MF) const; + /// Adjusts the stack pointer using LEA, SUB, or ADD. MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Offset, bool InEpilogue) const; - unsigned getPSPSlotOffsetFromSP(const MachineFunction &MF) const; - unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const; /// Materialize the catchret target MBB in RAX. diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1388,146 +1388,184 @@ return !isWin64Prologue(MF) && MF.needsFrameMoves(); } -/// emitPrologue - Push callee-saved registers onto the stack, which -/// automatically adjust the stack pointer. Adjust the stack pointer to allocate -/// space for local variables. Also emit labels used by the exception handler to -/// generate the exception handling frames. +class X86FrameLowering::FrameBuilder final { + const Function &F; + + MachineFunction &MF; + MachineFrameInfo &MFI; + MachineBasicBlock &MBB; + MachineModuleInfo &MMI; + + const X86Subtarget &STI; + const X86InstrInfo &TII; + const X86FrameLowering &TFL; + const X86RegisterInfo *TRI; + X86MachineFunctionInfo *TFI; + + // Indicates if the target uses Windows CFI directives. + bool TargetUsesWinCFI; + + bool Is32Bit; + bool Is64Bit; + uint64_t SlotSize; + bool IsWin64Prologue; + bool HasStackRealignment; + Register StackPointer; + + // Indiciates if the frame has a frame pointer. + bool HasFramePointer; + bool Uses64BitFramePointer; + Register FramePointer; + const Register MachineFramePointer; + + // Indicates if we are building the frame for a funclet. + bool IsFunclet; + // Indicates if wea re building the frame for a CLR funclet. + bool IsCLRFunclet = false; + // Identifies the register that is the funclet frame establisher. + Register FuncletFrameEstablisher = X86::NoRegister; + + // Indicates if the frame needs stack probes to be emitted. + bool ShouldEmitStackProbe; + unsigned StackProbeSize; + + // Indicaztes the exception handling personality for the function. If the + // function does not have an associated personality, this will be set to + // `EHPersonality::Unknown`. + EHPersonality Personality = EHPersonality::Unknown; -/* - Here's a gist of what gets emitted: + // Indicates if the frame needs CFI directives to be emitted for FPO. + bool ShouldEmitWinFPO; - ; Establish frame pointer, if needed - [if needs FP] - push %rbp - .cfi_def_cfa_offset 16 - .cfi_offset %rbp, -16 - .seh_pushreg %rpb - mov %rsp, %rbp - .cfi_def_cfa_register %rbp + // Indicates if the frame needs CFI directives to be emitted. + bool ShouldEmitWinCFI; - ; Spill general-purpose registers - [for all callee-saved GPRs] - pushq % - [if not needs FP] - .cfi_def_cfa_offset (offset from RETADDR) - .seh_pushreg % + // Indicates if the frame needs CFI directives to be emitted. + bool ShouldEmitDWARFCFI; - ; If the required stack alignment > default stack alignment - ; rsp needs to be re-aligned. This creates a "re-alignment gap" - ; of unknown size in the stack frame. - [if stack needs re-alignment] - and $MASK, %rsp + // The insertion point for the basic block. + MachineBasicBlock::iterator MBBI; - ; Allocate space for locals - [if target is Windows and allocated space > 4096 bytes] - ; Windows needs special care for allocations larger - ; than one page. - mov $NNN, %rax - call ___chkstk_ms/___chkstk - sub %rax, %rsp - [else] - sub $NNN, %rsp - - [if needs FP] - .seh_stackalloc (size of XMM spill slots) - .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots - [else] - .seh_stackalloc NNN - - ; Spill XMMs - ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, - ; they may get spilled on any platform, if the current function - ; calls @llvm.eh.unwind.init - [if needs FP] - [for all callee-saved XMM registers] - movaps %, -MMM(%rbp) - [for all callee-saved XMM registers] - .seh_savexmm %, (-MMM + SEHFrameOffset) - ; i.e. the offset relative to (%rbp - SEHFrameOffset) - [else] - [for all callee-saved XMM registers] - movaps %, KKK(%rsp) - [for all callee-saved XMM registers] - .seh_savexmm %, KKK - - .seh_endprologue - - [if needs base pointer] - mov %rsp, %rbx - [if needs to restore base pointer] - mov %rsp, -MMM(%rbp) - - ; Emit CFI info - [if needs FP] - [for all callee-saved registers] - .cfi_offset %, (offset from %rbp) - [else] - .cfi_def_cfa_offset (offset from RETADDR) - [for all callee-saved registers] - .cfi_offset %, (offset from %rsp) - - Notes: - - .seh directives are emitted only for Windows 64 ABI - - .cv_fpo directives are emitted on win32 when emitting CodeView - - .cfi directives are emitted for all other ABIs - - for 32-bit code, substitute %e?? registers for %r?? -*/ + // The debug location associated with any instruction emitted. This should be + // the unknown value for the prologue as the first non-debug instruction + // establishes the end of the prologue. + DebugLoc DL; -void X86FrameLowering::emitPrologue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - assert(&STI == &MF.getSubtarget() && - "MF used frame lowering for wrong subtarget"); - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo &MFI = MF.getFrameInfo(); - const Function &Fn = MF.getFunction(); - MachineModuleInfo &MMI = MF.getMMI(); - X86MachineFunctionInfo *X86FI = MF.getInfo(); - uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. - uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate. - bool IsFunclet = MBB.isEHFuncletEntry(); - EHPersonality Personality = EHPersonality::Unknown; - if (Fn.hasPersonalityFn()) - Personality = classifyEHPersonality(Fn.getPersonalityFn()); - bool FnHasClrFunclet = - MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; - bool IsClrFunclet = IsFunclet && FnHasClrFunclet; - bool HasFP = hasFP(MF); - bool IsWin64Prologue = isWin64Prologue(MF); - bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); - // FIXME: Emit FPO data for EH funclets. - bool NeedsWinFPO = - !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); - bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; - bool NeedsDwarfCFI = needsDwarfCFI(MF); - Register FramePtr = TRI->getFrameRegister(MF); - const Register MachineFramePtr = - STI.isTarget64BitILP32() - ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; - Register BasePtr = TRI->getBaseRegister(); + // Indicates if teh frame has any Windows CFI directives. bool HasWinCFI = false; - // Debug location must be unknown since the first debug location is used - // to determine the end of the prologue. - DebugLoc DL; - - // Space reserved for stack-based arguments when making a (ABI-guaranteed) - // tail call. - unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); - if (TailCallArgReserveSize && IsWin64Prologue) - report_fatal_error("Can't handle guaranteed tail call under win64 yet"); +public: + FrameBuilder(const X86FrameLowering &TFL, MachineFunction &MF, + MachineBasicBlock &MBB) + : F(MF.getFunction()), MF(MF), MFI(MF.getFrameInfo()), MBB(MBB), + MMI(MF.getMMI()), STI(MF.getSubtarget()), + TII(*STI.getInstrInfo()), TFL(TFL), TRI(STI.getRegisterInfo()), + TFI(MF.getInfo()), Is32Bit(STI.is32Bit()), + Is64Bit(STI.is64Bit()), SlotSize(TRI->getSlotSize()), + IsWin64Prologue(TFL.isWin64Prologue(MF)), + HasStackRealignment(TRI->hasStackRealignment(MF)), + StackPointer(TRI->getStackRegister()), HasFramePointer(TFL.hasFP(MF)), + Uses64BitFramePointer(TFL.Uses64BitFramePtr), + FramePointer(TRI->getFrameRegister(MF)), + MachineFramePointer(STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(FramePointer, + 64)) + : FramePointer), + MBBI(MBB.begin()) { + const MCAsmInfo *MAI = MF.getTarget().getMCAsmInfo(); + const X86TargetLowering *TLI = STI.getTargetLowering(); + + TargetUsesWinCFI = MAI->usesWindowsCFI(); + + if (F.hasPersonalityFn()) + Personality = classifyEHPersonality(F.getPersonalityFn()); + + IsFunclet = MBB.isEHFuncletEntry(); + if (IsFunclet) { + assert(HasFramePointer && + "funclets without frame pointers not yet implemented"); + + IsCLRFunclet = + MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; + + if (IsCLRFunclet) + FuncletFrameEstablisher = Uses64BitFramePointer ? X86::RCX : X86::ECX; + else + FuncletFrameEstablisher = Uses64BitFramePointer ? X86::RDX : X86::EDX; + } - const bool EmitStackProbeCall = - STI.getTargetLowering()->hasStackProbeSymbol(MF); - unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); + // FIXME: we should emit FPO data for funclets as well. + ShouldEmitWinFPO = + STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag() && !IsFunclet; + + // Emit CFI directives if the frame needs to support unwinding or if we need + // to emit FPO data. + ShouldEmitWinCFI = + (TargetUsesWinCFI && F.needsUnwindTableEntry()) || ShouldEmitWinFPO; + + ShouldEmitDWARFCFI = !MAI->usesWindowsCFI() && MF.needsFrameMoves(); + + ShouldEmitStackProbe = TLI->hasStackProbeSymbol(MF); + StackProbeSize = TLI->getStackProbeSize(MF); + } + + void EmitPrologue() { + int SEHFrameOffset = 0; + bool PushedRegs = false; + uint64_t NumBytes, ParentFrameNumBytes; + Register SPOrEstablisher = + IsFunclet ? FuncletFrameEstablisher + : static_cast(StackPointer); + uint64_t StackSize = MF.getFrameInfo().getStackSize(); + + EncodeSwiftAsyncContextIntoFramePointer(); + RealignStackForInterruptCC(StackSize); + ConfigureRedZoneReuse(StackSize); + EmitMandatoryTailCallArgumentReservation(); + // Immediately spill establisher into the home slot. The runtime cares about + // this. + EmitFuncletEstablisherSpill(); + EmitFramePointer(); + EmitCFIForRegisterSpills(PushedRegs); + // Realign stack after we pushed callee-saved registers (so that we'll be + // able to calculate their offsets from the frame pointer). + EmitEarlyStackRealignment(); + EmitStackAdjustment(StackSize, NumBytes, ParentFrameNumBytes); + EmitCLRFuncletRootEstablisher(); + EmitWin64FramePointer(ParentFrameNumBytes, SPOrEstablisher, SEHFrameOffset); + Emit32BitFuncletStackPointerSpill(); + EmitFPRSpillCFI(SEHFrameOffset); + if (HasWinCFI) + EmitWinCFI(X86::SEH_EndPrologue); + EmitCLRFuncletPSPInfo(); + // Realign stack after we spilled callee-saved registers (so that we'll be + // able to calculate their offsets from the frame pointer). + EmitLateStackRealignment(SPOrEstablisher); + EmitBasePointerSetup(SPOrEstablisher); + EmitDWARFFrameMoves(StackSize, NumBytes, PushedRegs); + ClearDirectionForInterruptCC(); + } + + ~FrameBuilder() { + // At this point we know if the function has WinCFI or not. + if (!(IsFunclet && Is32Bit)) + MF.setHasWinCFI(HasWinCFI); + } + +private: + void EncodeSwiftAsyncContextIntoFramePointer() { + if (!HasFramePointer) + return; + if (!TFI->hasSwiftAsyncContext()) + return; - if (HasFP && X86FI->hasSwiftAsyncContext()) { switch (MF.getTarget().Options.SwiftAsyncFramePointer) { case SwiftAsyncFramePointerMode::DeploymentBased: if (STI.swiftAsyncContextIsDynamicallySet()) { // The special symbol below is absolute and has a *value* suitable to be // combined with the frame pointer directly. - BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr) - .addUse(MachineFramePtr) + BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePointer) + .addUse(MachineFramePointer) .addUse(X86::RIP) .addImm(1) .addUse(X86::NoRegister) @@ -1539,8 +1577,8 @@ [[fallthrough]]; case SwiftAsyncFramePointerMode::Always: - BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr) - .addUse(MachineFramePtr) + BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePointer) + .addUse(MachineFramePointer) .addImm(60) .setMIFlag(MachineInstr::FrameSetup); break; @@ -1550,536 +1588,695 @@ } } - // Re-align the stack on 64-bit if the x86-interrupt calling convention is - // used and an error code was pushed, since the x86-64 ABI requires a 16-byte - // stack alignment. - if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit && - Fn.arg_size() == 2) { - StackSize += 8; - MFI.setStackSize(StackSize); - emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false); - } - - // If this is x86-64 and the Red Zone is not disabled, if we are a leaf - // function, and use up to 128 bytes of stack space, don't have a frame - // pointer, calls, or dynamic alloca then we do not need to adjust the - // stack pointer (we fit in the Red Zone). We also check that we don't - // push and pop from the stack. - if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) && - !MFI.hasVarSizedObjects() && // No dynamic alloca. - !MFI.adjustsStack() && // No calls. - !EmitStackProbeCall && // No stack probes. - !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. - !MF.shouldSplitStack()) { // Regular stack - uint64_t MinSize = - X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta(); - if (HasFP) MinSize += SlotSize; - X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); - StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); - MFI.setStackSize(StackSize); - } - - // Insert stack pointer adjustment for later moving of return addr. Only - // applies to tail call optimized functions where the callee argument stack - // size is bigger than the callers. - if (TailCallArgReserveSize != 0) { - BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize, - /*InEpilogue=*/false) - .setMIFlag(MachineInstr::FrameSetup); - } + void EmitSwiftAsyncContextSpill() { + // Swift async_context should not be emitted into a funclet. + if (IsFunclet) + return; - // Mapping for machine moves: - // - // DST: VirtualFP AND - // SRC: VirtualFP => DW_CFA_def_cfa_offset - // ELSE => DW_CFA_def_cfa - // - // SRC: VirtualFP AND - // DST: Register => DW_CFA_def_cfa_register - // - // ELSE - // OFFSET < 0 => DW_CFA_offset_extended_sf - // REG < 64 => DW_CFA_offset + Reg - // ELSE => DW_CFA_offset_extended + if (!TFI->hasSwiftAsyncContext()) + return; - uint64_t NumBytes = 0; - int stackGrowth = -SlotSize; - - // Find the funclet establisher parameter - Register Establisher = X86::NoRegister; - if (IsClrFunclet) - Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; - else if (IsFunclet) - Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX; - - if (IsWin64Prologue && IsFunclet && !IsClrFunclet) { - // Immediately spill establisher into the home slot. - // The runtime cares about this. - // MOV64mr %rdx, 16(%rsp) - unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16) - .addReg(Establisher) - .setMIFlag(MachineInstr::FrameSetup); - MBB.addLiveIn(Establisher); - } + const auto &Attrs = MF.getFunction().getAttributes(); - if (HasFP) { - assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved"); + // Before we update the live frame pointer we have to ensure there's a valid + // (or null) asynchronous context in its slot just before FP in the frame + // record, so store it now. + if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) { + // We have an initial context in r14, store it just before the frame + // pointer. + MBB.addLiveIn(X86::R14); + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) + .addReg(X86::R14) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // No initial context, store null so that there's no pointer that + // could be misused. + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8)) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + } - // Calculate required stack adjustment. - uint64_t FrameSize = StackSize - SlotSize; - // If required, include space for extra hidden slot for stashing base pointer. - if (X86FI->getRestoreBasePointer()) - FrameSize += SlotSize; + EmitWinCFI(X86::SEH_PushReg, {X86::R14}); - NumBytes = FrameSize - - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); + BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePointer) + .addUse(X86::RSP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(8) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP) + .addUse(X86::RSP) + .addImm(8) + .setMIFlag(MachineInstr::FrameSetup); + } - // Callee-saved registers are pushed on stack before the stack is realigned. - if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) - NumBytes = alignTo(NumBytes, MaxAlign); + void EmitStackProbe(uint64_t NumBytes) { + if (!ShouldEmitStackProbe) + return; - // Save EBP/RBP into the appropriate stack slot. - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) - .addReg(MachineFramePtr, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); + // Windows, cygwin, and MinGW require a prologue helper routine when + // allocating more than 4K bytes on the stack. Windows uses `__chkstk` and + // cygwin/MinGW use `__alloca`. `__alloca` and the 32-bit version of + // `__chkstk` will probe the stack and adjust the stack pointer in one go. + // The 64-bit version of `__chkstk` is only responsible for probing the + // stack. The 64-bit prologue is responsible for adjusting the stack + // pointer. Touching the stack at 4K increments is necessary to ensure that + // the guard pages used by the OS virtual memory manager are allocated in + // correct sequence. + + assert(!TFI->getUsesRedZone() && + "The Red Zone is not accounted for in stack probes"); - if (NeedsDwarfCFI) { - // Mark the place where EBP/RBP was saved. - // Define the current CFA rule to use the provided offset. - assert(StackSize); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth), - MachineInstr::FrameSetup); + // Check whether eax is livein for this block. + bool IsEAXLiveIn = isEAXLiveIn(MBB); - // Change the rule for the FramePtr to be an "offset" rule. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset(nullptr, DwarfFramePtr, - 2 * stackGrowth), - MachineInstr::FrameSetup); - } + // Save eax/rax if it is a live-in. + if (IsEAXLiveIn) + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(Is64Bit ? X86::RAX : X86::EAX, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) - .addImm(FramePtr) + if (Is64Bit) { + // Handle the 64-bit Windows ABI case where we need to call `__chkstk`. + // The function prologue is responsible for adjusting the stack pointer. + int64_t Alloc = IsEAXLiveIn ? NumBytes - 8 : NumBytes; + BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX) + .addImm(Alloc) .setMIFlag(MachineInstr::FrameSetup); + } else { + // Allocate NumBytes - 4 bytes on the stack in case of eax being a livein. + // We'll also use 4 already allocated bytes for eax. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(IsEAXLiveIn ? NumBytes - 4 : NumBytes) + .setMIFlag(MachineInstr::FrameSetup); } - if (!IsFunclet) { - if (X86FI->hasSwiftAsyncContext()) { - const auto &Attrs = MF.getFunction().getAttributes(); - - // Before we update the live frame pointer we have to ensure there's a - // valid (or null) asynchronous context in its slot just before FP in - // the frame record, so store it now. - if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) { - // We have an initial context in r14, store it just before the frame - // pointer. - MBB.addLiveIn(X86::R14); - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) - .addReg(X86::R14) - .setMIFlag(MachineInstr::FrameSetup); - } else { - // No initial context, store null so that there's no pointer that - // could be misused. - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8)) - .addImm(0) - .setMIFlag(MachineInstr::FrameSetup); - } - - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) - .addImm(X86::R14) - .setMIFlag(MachineInstr::FrameSetup); - } + // Call `__chkstk`, `__chkstk_ms`, or `__alloca`. + TFL.emitStackProbe(MF, MBB, MBBI, DL, true); - BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr) - .addUse(X86::RSP) - .addImm(1) - .addUse(X86::NoRegister) - .addImm(8) - .addUse(X86::NoRegister) - .setMIFlag(MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP) - .addUse(X86::RSP) - .addImm(8) + // Restore eax/rax if lt is a live-in. + if (IsEAXLiveIn) + addRegOffset(BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rm : X86::MOV32rm), + Is64Bit ? X86::RAX : X86::EAX), + StackPointer, false, NumBytes - (Is64Bit ? 8 : 4)) .setMIFlag(MachineInstr::FrameSetup); - } + } - if (!IsWin64Prologue && !IsFunclet) { - // Update EBP with the new base value. - if (!X86FI->hasSwiftAsyncContext()) - BuildMI(MBB, MBBI, DL, - TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), - FramePtr) - .addReg(StackPtr) - .setMIFlag(MachineInstr::FrameSetup); - - if (NeedsDwarfCFI) { - // Mark effective beginning of when frame pointer becomes valid. - // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI( - MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), - MachineInstr::FrameSetup); - } + void EmitWinCFI(unsigned CFI, std::initializer_list Args = {}) { + if (!ShouldEmitWinCFI) + return; - if (NeedsWinFPO) { - // .cv_fpo_setframe $FramePtr - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) - .addImm(FramePtr) - .addImm(0) - .setMIFlag(MachineInstr::FrameSetup); - } - } - } - } else { - assert(!IsFunclet && "funclets without FPs not yet implemented"); - NumBytes = StackSize - - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); - } + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(CFI)); + for (int64_t Imm : Args) + MIB.addImm(Imm); + MIB.setMIFlag(MachineInstr::FrameSetup); - // Update the offset adjustment, which is mainly used by codeview to translate - // from ESP to VFRAME relative local variable offsets. - if (!IsFunclet) { - if (HasFP && TRI->hasStackRealignment(MF)) - MFI.setOffsetAdjustment(-NumBytes); - else - MFI.setOffsetAdjustment(-StackSize); + HasWinCFI = true; } - // For EH funclets, only allocate enough space for outgoing calls. Save the - // NumBytes value that we would've used for the parent frame. - unsigned ParentFrameNumBytes = NumBytes; - if (IsFunclet) - NumBytes = getWinEHFuncletFrameSize(MF); + void EmitDWARFCFI(MCCFIInstruction &&CFI) { + // Mapping for machine moves: + // + // DST: VirtualFP AND + // SRC: VirtualFP => DW_CFA_def_cfa_offset + // ELSE => DW_CFA_def_cfa + // + // SRC: VirtualFP AND + // DST: Register => DW_CFA_def_cfa_register + // + // ELSE + // OFFSET < 0 => DW_CFA_offset_extended_sf + // REG < 64 => DW_CFA_offset + Reg + // ELSE => DW_CFA_offset_extended - // Skip the callee-saved push instructions. - bool PushedRegs = false; - int StackOffset = 2 * stackGrowth; + if (!ShouldEmitDWARFCFI) + return; - while (MBBI != MBB.end() && - MBBI->getFlag(MachineInstr::FrameSetup) && - (MBBI->getOpcode() == X86::PUSH32r || - MBBI->getOpcode() == X86::PUSH64r)) { - PushedRegs = true; - Register Reg = MBBI->getOperand(0).getReg(); - ++MBBI; + unsigned CFIIndex = MF.addFrameInst(CFI); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } - if (!HasFP && NeedsDwarfCFI) { - // Mark callee-saved push instruction. - // Define the current CFA rule to use the provided offset. - assert(StackSize); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset), - MachineInstr::FrameSetup); - StackOffset += stackGrowth; - } + void RealignStackForInterruptCC(uint64_t &StackSize) { + // Re-align the stack on 64-bit if the frame is for a X86 Interrupt handler + // and an error code was pushed, since the x86-64 ABI requires a 16-byte + // stack alignment. - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) - .addImm(Reg) - .setMIFlag(MachineInstr::FrameSetup); - } + if (!Is64Bit) + return; + + // Ensure that we are an x86 interrupt calling convention frame. + if (F.getCallingConv() != CallingConv::X86_INTR) + return; + if (F.arg_size() != 2) + return; + + StackSize += 8; + MFI.setStackSize(StackSize); + TFL.emitSPUpdate(MBB, MBBI, DL, -8, /* InEpilogue */false); } - // Realign stack after we pushed callee-saved registers (so that we'll be - // able to calculate their offsets from the frame pointer). - // Don't do this for Win64, it needs to realign the stack after the prologue. - if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) { - assert(HasFP && "There should be a frame pointer if stack is realigned."); - BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); + void ClearDirectionForInterruptCC() { + // FIXME: 32-bit funclets should not be interrupt handlers, should this be + // an assert? + if (IsFunclet && Is32Bit) + return; - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign)) - .addImm(MaxAlign) - .setMIFlag(MachineInstr::FrameSetup); - } + // X86 Interrupt handlers cannot assume anything about the direction flag + // (DF in EFLAGS register). Clear this flag by creating `cld` instruction in + // each prologue of interrupt handler function. + + if (F.getCallingConv() != CallingConv::X86_INTR) + return; + + // + // FIXME: Create `cld` instruction only in these cases: + // 1. The interrupt handling function uses any of the `rep` instructions. + // 2. Interrupt handling function calls another function. + // + BuildMI(MBB, MBBI, DL, TII.get(X86::CLD)) + .setMIFlag(MachineInstr::FrameSetup); } - // If there is an SUB32ri of ESP immediately before this instruction, merge - // the two. This can be the case when tail call elimination is enabled and - // the callee has more arguments then the caller. - NumBytes -= mergeSPUpdates(MBB, MBBI, true); - - // Adjust stack pointer: ESP -= numbytes. - - // Windows and cygwin/mingw require a prologue helper routine when allocating - // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw - // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the - // stack and adjust the stack pointer in one go. The 64-bit version of - // __chkstk is only responsible for probing the stack. The 64-bit prologue is - // responsible for adjusting the stack pointer. Touching the stack at 4K - // increments is necessary to ensure that the guard pages used by the OS - // virtual memory manager are allocated in correct sequence. - uint64_t AlignedNumBytes = NumBytes; - if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) - AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign); - if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) { - assert(!X86FI->getUsesRedZone() && - "The Red Zone is not accounted for in stack probes"); + void ConfigureRedZoneReuse(uint64_t &StackSize) { + // If this is x86-64 and the Red Zone is not disabled, if we are a leaf + // function, and use up to 128 bytes of stack space, don't have a frame + // pointer, calls, or dynamic alloca then we do not need to adjust the + // stack pointer (we fit in the Red Zone). We also check that we don't + // push and pop from the stack. - // Check whether EAX is livein for this block. - bool isEAXAlive = isEAXLiveIn(MBB); + if (!TFL.has128ByteRedZone(MF)) + return; - if (isEAXAlive) { - if (Is64Bit) { - // Save RAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) - .addReg(X86::RAX, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); - } else { - // Save EAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); - } - } + if (HasStackRealignment) + return; - if (Is64Bit) { - // Handle the 64-bit Windows ABI case where we need to call __chkstk. - // Function prologue is responsible for adjusting the stack pointer. - int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; - BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX) - .addImm(Alloc) - .setMIFlag(MachineInstr::FrameSetup); - } else { - // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. - // We'll also use 4 already allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) - .setMIFlag(MachineInstr::FrameSetup); - } + // Use of dynamic allocas prevents redzone reuse. + if (MFI.hasVarSizedObjects()) + return; - // Call __chkstk, __chkstk_ms, or __alloca. - emitStackProbe(MF, MBB, MBBI, DL, true); + // Stack adjustments (calls) prevents redzone reuse, must be a + // leaf-function. + if (MFI.adjustsStack() || MFI.hasCopyImplyingStackAdjustment()) + return; - if (isEAXAlive) { - // Restore RAX/EAX - MachineInstr *MI; - if (Is64Bit) - MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX), - StackPtr, false, NumBytes - 8); - else - MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), - StackPtr, false, NumBytes - 4); - MI->setFlag(MachineInstr::FrameSetup); - MBB.insert(MBBI, MI); - } - } else if (NumBytes) { - emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false); + // Stack Probes prevent redzone reuse due to calls. + if (ShouldEmitStackProbe) + return; + + // Split stacks will prevent reuse of redzones. + if (MF.shouldSplitStack()) + return; + + uint64_t MinSize = + TFI->getCalleeSavedFrameSize() - TFI->getTCReturnAddrDelta(); + if (HasFramePointer) + MinSize += SlotSize; + + TFI->setUsesRedZone(MinSize > 0 || StackSize > 0); + StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); + MFI.setStackSize(StackSize); } - if (NeedsWinCFI && NumBytes) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) - .addImm(NumBytes) + void EmitFuncletEstablisherSpill() { + if (!IsWin64Prologue) + return; + + if (!IsFunclet) + return; + + // The CLR funclet does not need to spill the establisher. + if (IsCLRFunclet) + return; + + unsigned MOVmr = Uses64BitFramePointer ? X86::MOV64mr : X86::MOV32mr; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), + StackPointer, true, 16) + .addReg(FuncletFrameEstablisher) .setMIFlag(MachineInstr::FrameSetup); + MBB.addLiveIn(FuncletFrameEstablisher); } - int SEHFrameOffset = 0; - unsigned SPOrEstablisher; - if (IsFunclet) { - if (IsClrFunclet) { - // The establisher parameter passed to a CLR funclet is actually a pointer - // to the (mostly empty) frame of its nearest enclosing funclet; we have - // to find the root function establisher frame by loading the PSPSym from - // the intermediate frame. - unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); - MachinePointerInfo NoInfo; - MBB.addLiveIn(Establisher); - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher), - Establisher, false, PSPSlotOffset) - .addMemOperand(MF.getMachineMemOperand( - NoInfo, MachineMemOperand::MOLoad, SlotSize, Align(SlotSize))); - ; - // Save the root establisher back into the current funclet's (mostly - // empty) frame, in case a sub-funclet or the GC needs it. - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, - false, PSPSlotOffset) - .addReg(Establisher) - .addMemOperand(MF.getMachineMemOperand( - NoInfo, - MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, - SlotSize, Align(SlotSize))); + void EmitCLRFuncletRootEstablisher() { + if (!IsCLRFunclet) + return; + + assert(IsFunclet && "CLR funclet should be classified as a funclet"); + + // The establisher parameter passed to a CLR funclet is actually a pointer + // to the (mostly empty) frame of its nearest enclosing funclet; we have to + // find the root function establisher frame by loading the PSPSym from the + // intermediate frame. + MachinePointerInfo NoInfo; + unsigned PSPSlotOffset = TFL.getPSPSlotOffsetFromSP(MF); + + MBB.addLiveIn(FuncletFrameEstablisher); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), + FuncletFrameEstablisher), + FuncletFrameEstablisher, false, PSPSlotOffset) + .addMemOperand(MF.getMachineMemOperand(NoInfo, MachineMemOperand::MOLoad, + SlotSize, Align(SlotSize))); + + // Save the root establisher back into the current funclet's (mostly empty) + // frame, in case a sub-funclet or the GC needs it. + auto MOFlags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), + StackPointer, false, PSPSlotOffset) + .addReg(FuncletFrameEstablisher) + .addMemOperand(MF.getMachineMemOperand(NoInfo, MOFlags, SlotSize, + Align(SlotSize))); + } + + void EmitCLRFuncletPSPInfo() { + // We only emit the PSPInfo for the CoreCLR EH frames that are non-funclet. + if (IsFunclet) + return; + if (!MF.hasEHFunclets() || Personality != EHPersonality::CoreCLR) + return; + + // Save the so-called Initial-SP (i.e. the value of the stack pointer + // immediately after the prolog) into the PSPSlot so that funclets and the + // GC can recover it. + const WinEHFuncInfo *WinEHInfo = MF.getWinEHFuncInfo(); + MachinePointerInfo PSPInfo = + MachinePointerInfo::getFixedStack(MF, WinEHInfo->PSPSymFrameIdx); + + unsigned PSPSlotOffset = TFL.getPSPSlotOffsetFromSP(MF); + auto MOFlags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), + StackPointer, false, PSPSlotOffset) + .addReg(StackPointer) + .addMemOperand(MF.getMachineMemOperand(PSPInfo, MOFlags, SlotSize, + Align(SlotSize))); + } + + void EmitCFIForRegisterSpills(bool &HasSpills) { + int64_t Offset = 2 * SlotSize; + + MachineBasicBlock::iterator MBBE = MBB.end(); + while (MBBI != MBBE && MBBI->getFlag(MachineInstr::FrameSetup) && + (MBBI->getOpcode() == X86::PUSH32r || + MBBI->getOpcode() == X86::PUSH64r)) { + Register Reg = MBBI->getOperand(0).getReg(); + ++MBBI; + + if (!HasFramePointer) { + // Mark callee-saved push instruction. Define the current CFA rule to + // use the provided offset. + EmitDWARFCFI(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset)); + Offset += SlotSize; + } + EmitWinCFI(X86::SEH_PushReg, {Reg}); + + HasSpills = true; } - SPOrEstablisher = Establisher; - } else { - SPOrEstablisher = StackPtr; } - if (IsWin64Prologue && HasFP) { - // Set RBP to a small fixed offset from RSP. In the funclet case, we base - // this calculation on the incoming establisher, which holds the value of - // RSP from the parent frame at the end of the prologue. - SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); - if (SEHFrameOffset) - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), - SPOrEstablisher, false, SEHFrameOffset); - else - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr) - .addReg(SPOrEstablisher); - - // If this is not a funclet, emit the CFI describing our frame pointer. - if (NeedsWinCFI && !IsFunclet) { - assert(!NeedsWinFPO && "this setframe incompatible with FPO data"); - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) - .addImm(FramePtr) - .addImm(SEHFrameOffset) + void EmitBasePointerSetup(Register StackPointer) { + // 32-bit funclets do not have base pointers. + if (IsFunclet && Is32Bit) + return; + + if (!TRI->hasBasePointer(MF)) + return; + + // Update the base pointer with the current stack pointer. + BuildMI(MBB, MBBI, DL, + TII.get(Uses64BitFramePointer ? X86::MOV64rr : X86::MOV32rr), + TRI->getBaseRegister()) + .addReg(StackPointer) + .setMIFlag(MachineInstr::FrameSetup); + + // Stash value of base pointer. Saving RSP instead of EBP shortens + // dependence chain. Used by SjLj EH. + if (TFI->getRestoreBasePointer()) + addRegOffset(BuildMI(MBB, MBBI, DL, + TII.get(Uses64BitFramePointer ? X86::MOV64mr + : X86::MOV32mr)), + FramePointer, true, TFI->getRestoreBasePointerOffset()) + .addReg(StackPointer) .setMIFlag(MachineInstr::FrameSetup); - if (isAsynchronousEHPersonality(Personality)) - MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset; - } - } else if (IsFunclet && STI.is32Bit()) { - // Reset EBP / ESI to something good for funclets. - MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); - // If we're a catch funclet, we can be returned to via catchret. Save ESP - // into the registration node so that the runtime will restore it for us. - if (!MBB.isCleanupFuncletEntry()) { - assert(Personality == EHPersonality::MSVC_CXX); - Register FrameReg; - int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex; - int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed(); - // ESP is the first field, so no extra displacement is needed. - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg, - false, EHRegOffset) - .addReg(X86::ESP); + + // Stash the value of the frame pointer relative to the base pointer for + // Win32 EH. This supports Win32 EH, which does the inverse of the above: it + // recovers the frame pointer from the base pointer rather than the other + // way around. + if (TFI->getHasSEHFramePtrSave() && !IsFunclet) { + Register Reg; + int Offset = + TFL.getFrameIndexReference(MF, TFI->getSEHFramePtrSaveIndex(), Reg) + .getFixed(); + assert(Reg == TRI->getBaseRegister()); + addRegOffset(BuildMI(MBB, MBBI, DL, + TII.get(Uses64BitFramePointer ? X86::MOV64mr + : X86::MOV32mr)), + Reg, true, Offset) + .addReg(FramePointer) + .setMIFlag(MachineInstr::FrameSetup); } } - while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { - const MachineInstr &FrameInstr = *MBBI; - ++MBBI; + void EmitFPRSpillCFI(unsigned SEHFrameOffset) { + MachineBasicBlock::iterator MBBE = MBB.end(); + while (MBBI != MBBE && MBBI->getFlag(MachineInstr::FrameSetup)) { + const MachineInstr &MI = *MBBI; + ++MBBI; - if (NeedsWinCFI) { int FI; - if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { + if (unsigned Reg = TII.isStoreToStackSlot(MI, FI)) { if (X86::FR64RegClass.contains(Reg)) { - int Offset; - Register IgnoredFrameReg; - if (IsWin64Prologue && IsFunclet) - Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg); - else - Offset = - getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() + - SEHFrameOffset; - - HasWinCFI = true; - assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data"); - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) - .addImm(Reg) - .addImm(Offset) - .setMIFlag(MachineInstr::FrameSetup); + Register Ignored; + int Offset = + (IsWin64Prologue && IsFunclet) + ? TFL.getWin64EHFrameIndexRef(MF, FI, Ignored) + : (TFL.getFrameIndexReference(MF, FI, Ignored).getFixed() + SEHFrameOffset); + assert(!ShouldEmitWinFPO && "SEH_SaveXMM incompatible with FPO data"); + EmitWinCFI(X86::SEH_SaveXMM, {Reg, Offset}); } } } } - if (NeedsWinCFI && HasWinCFI) - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) + void EmitFramePointer() { + if (!HasFramePointer) + return; + + assert(MF.getRegInfo().isReserved(MachineFramePointer) && + "FP should be reserved"); + + // Save ebp/rbp into the appropriate stack slot. + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(MachineFramePointer, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (FnHasClrFunclet && !IsFunclet) { - // Save the so-called Initial-SP (i.e. the value of the stack pointer - // immediately after the prolog) into the PSPSlot so that funclets - // and the GC can recover it. - unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); - auto PSPInfo = MachinePointerInfo::getFixedStack( - MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx); - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false, - PSPSlotOffset) - .addReg(StackPtr) - .addMemOperand(MF.getMachineMemOperand( - PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, - SlotSize, Align(SlotSize))); - } - - // Realign stack after we spilled callee-saved registers (so that we'll be - // able to calculate their offsets from the frame pointer). - // Win64 requires aligning the stack after the prologue. - if (IsWin64Prologue && TRI->hasStackRealignment(MF)) { - assert(HasFP && "There should be a frame pointer if stack is realigned."); - BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign); - } - - // We already dealt with stack realignment and funclets above. - if (IsFunclet && STI.is32Bit()) - return; + // Mark the place where EBP/RBP was saved. Define the current CFA rule to + // use the provided offset. + unsigned FPRegister = TRI->getDwarfRegNum(MachineFramePointer, true); + EmitDWARFCFI(MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * -SlotSize)); + EmitDWARFCFI(MCCFIInstruction::createOffset(nullptr, FPRegister, + 2 * -SlotSize)); - // If we need a base pointer, set it up here. It's whatever the value - // of the stack pointer is at this point. Any variable size objects - // will be allocated after this, so we can still use the base pointer - // to reference locals. - if (TRI->hasBasePointer(MF)) { - // Update the base pointer with the current stack pointer. - unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; - BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) - .addReg(SPOrEstablisher) + EmitWinCFI(X86::SEH_PushReg, {FramePointer}); + + // Swift async_context is homed prior to the frame pointer. + EmitSwiftAsyncContextSpill(); + + if (IsWin64Prologue || IsFunclet) + return; + + // `SwiftAsyncContextSpill` has already spilled the frame pointer after the + // Swift async_context. + if (!TFI->hasSwiftAsyncContext()) + BuildMI(MBB, MBBI, DL, + TII.get(Uses64BitFramePointer ? X86::MOV64rr : X86::MOV32rr), + FramePointer) + .addReg(StackPointer) + .setMIFlag(MachineInstr::FrameSetup); + + // Mark effective beginning of when frame pointer becomes valid. Define the + // current CFA to use the EBP/RBP register. + EmitDWARFCFI(MCCFIInstruction::createDefCfaRegister(nullptr, FPRegister)); + + // .cv_fpo_setframe $FramePtr + EmitWinCFI(X86::SEH_SetFrame, {FramePointer, 0}); + } + + void EmitMandatoryTailCallArgumentReservation() { + int Reservation = TFI->getTCReturnAddrDelta(); + if (Reservation == 0) + return; + + if (IsWin64Prologue) + report_fatal_error("Can't handle guaranteed tail call under Win64 yet"); + + // Insert stack pointer adjustment for later moving of return addr. Only + // applies to tail call optimized functions where the callee argument stack + // size is bigger than the callers. + TFL.BuildStackAdjustment(MBB, MBBI, DL, Reservation, /*InEpilogue*/false) .setMIFlag(MachineInstr::FrameSetup); - if (X86FI->getRestoreBasePointer()) { - // Stash value of base pointer. Saving RSP instead of EBP shortens - // dependence chain. Used by SjLj EH. - unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), - FramePtr, true, X86FI->getRestoreBasePointerOffset()) - .addReg(SPOrEstablisher) - .setMIFlag(MachineInstr::FrameSetup); + } + + void EmitEarlyStackRealignment() { + // Don't do this for Win64, it needs to realign the stack after the + // prologue. + if (IsWin64Prologue) + return; + + if (IsFunclet) + return; + + if (!HasStackRealignment) + return; + + assert(HasFramePointer && + "There should be a frame pointer if stack is realigned."); + + uint64_t Alignment = TFL.calculateMaxStackAlign(MF); + TFL.BuildStackAlignAND(MBB, MBBI, DL, StackPointer, Alignment); + EmitWinCFI(X86::SEH_StackAlign, {static_cast(Alignment)}); + } + + void EmitLateStackRealignment(Register StackPointer) { + // The non-Win64 targets have performed the stack re-alignment in the early + // phase. + if (!IsWin64Prologue) + return; + + if (!HasStackRealignment) + return; + + assert(HasFramePointer && + "There should be a frame pointer if stack is realigned."); + + uint64_t Alignment = TFL.calculateMaxStackAlign(MF); + TFL.BuildStackAlignAND(MBB, MBBI, DL, StackPointer, Alignment); + } + + void EmitStackAdjustment(uint64_t StackSize, uint64_t &FrameSize, + uint64_t &ParentFrameSize) { + const uint64_t Alignment = TFL.calculateMaxStackAlign(MF); + + FrameSize = + StackSize - TFI->getCalleeSavedFrameSize() + TFI->getTCReturnAddrDelta(); + + if (HasFramePointer) { + // Include extra hidden slot for the base pointer, if needed. + FrameSize -= (TFI->getRestoreBasePointer() ? 0 : SlotSize); + + // Callee-saved registers are pushed on the stack before the stack is + // realigned. + if (HasStackRealignment && !IsWin64Prologue) + FrameSize = alignTo(FrameSize, Alignment); } - if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) { - // Stash the value of the frame pointer relative to the base pointer for - // Win32 EH. This supports Win32 EH, which does the inverse of the above: - // it recovers the frame pointer from the base pointer rather than the - // other way around. - unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; - Register UsedReg; - int Offset = - getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg) - .getFixed(); - assert(UsedReg == BasePtr); - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset) - .addReg(FramePtr) - .setMIFlag(MachineInstr::FrameSetup); + // Update the offset adjustment, which is mainly used by codeview to + // translate from ESP to VFRAME relative local variable offsets. + if (!IsFunclet) { + if (HasFramePointer && HasStackRealignment) + MFI.setOffsetAdjustment(-FrameSize); + else + MFI.setOffsetAdjustment(-StackSize); + } + + // For EH funclets, only allocate enough space for outgoing calls. Save the + // FrameSize value that we would've used for the parent frame. + ParentFrameSize = FrameSize; + if (IsFunclet) + FrameSize = TFL.getWinEHFuncletFrameSize(MF); + + // If there is an SUB32ri of ESP immediately before this instruction, merge + // the two. This can be the case when tail call elimination is enabled and + // the callee has more arguments then the caller. + FrameSize -= TFL.mergeSPUpdates(MBB, MBBI, true); + + uint64_t AlignedNumBytes = FrameSize; + if (IsWin64Prologue && !IsFunclet && HasStackRealignment) + AlignedNumBytes = alignTo(AlignedNumBytes, Alignment); + + // Adjust stack pointer: ESP -= FrameSize. + + if (AlignedNumBytes >= StackProbeSize && ShouldEmitStackProbe) { + EmitStackProbe(FrameSize); + } else if (FrameSize) { + TFL.emitSPUpdate(MBB, MBBI, DL, -(int64_t)FrameSize, /*InEpilogue=*/false); } + + if (FrameSize) + EmitWinCFI(X86::SEH_StackAlloc, {static_cast(FrameSize)}); } - if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { + void EmitDWARFFrameMoves(uint64_t StackSize, uint64_t FrameSize, + bool HaveSpills) { + // 32-bit funclets do not have DWARF CFI for frame moves. + if (IsFunclet && Is32Bit) + return; + + if (!ShouldEmitDWARFCFI) + return; + + if (!HaveSpills && (HasFramePointer || FrameSize == 0)) + return; + // Mark end of stack pointer adjustment. - if (!HasFP && NumBytes) { + + if (!HasFramePointer && FrameSize) { // Define the current CFA rule to use the provided offset. assert(StackSize); - BuildCFI( - MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth), - MachineInstr::FrameSetup); + EmitDWARFCFI(MCCFIInstruction::cfiDefCfaOffset(nullptr, + StackSize + SlotSize)); } - // Emit DWARF info specifying the offsets of the callee-saved registers. - emitCalleeSavedFrameMoves(MBB, MBBI, DL, true); + TFL.emitCalleeSavedFrameMoves(MBB, MBBI, DL, true); } - // X86 Interrupt handling function cannot assume anything about the direction - // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction - // in each prologue of interrupt handler function. - // - // FIXME: Create "cld" instruction only in these cases: - // 1. The interrupt handling function uses any of the "rep" instructions. - // 2. Interrupt handling function calls another function. - // - if (Fn.getCallingConv() == CallingConv::X86_INTR) - BuildMI(MBB, MBBI, DL, TII.get(X86::CLD)) - .setMIFlag(MachineInstr::FrameSetup); + void Emit32BitFuncletStackPointerSpill() { + if (!IsFunclet || !Is32Bit) + return; + + assert(!IsWin64Prologue || !HasFramePointer); + + // Reset ebp/esi to something good for funclets. + MBBI = TFL.restoreWin32EHStackPointers(MBB, MBBI, DL); + + // If we're a catch funclet, we can be returned to via `catchret`. Save esp + // into the registration node so that the runtime will restore it for us. + if (MBB.isCleanupFuncletEntry()) + return; + + assert(Personality == EHPersonality::MSVC_CXX); + + Register FramePointer; + int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex; + int64_t EHRegOffset = + TFL.getFrameIndexReference(MF, FI, FramePointer).getFixed(); + + // esp is the first field, so no extra displacement is needed. + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), + FramePointer, false, EHRegOffset) + .addReg(X86::ESP); + } + + void EmitWin64FramePointer(uint64_t ParentFrameSize, Register StackPointer, + int &SEHFrameOffset) { + if (!IsWin64Prologue) + return; + if (!HasFramePointer) + return; + + SEHFrameOffset = calculateSetFPREG(ParentFrameSize); + if (SEHFrameOffset) + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePointer), + StackPointer, false, SEHFrameOffset); + else + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePointer) + .addReg(StackPointer); + + // Only emit CFI describing our frame pointer if we are not a funclet. + if (IsFunclet) + return; + + assert(!ShouldEmitWinFPO && ".seh_setframe is incompatible with FPO data"); + EmitWinCFI(X86::SEH_SetFrame, {FramePointer, SEHFrameOffset}); + if (isAsynchronousEHPersonality(Personality)) + MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset; + } +}; + +/// emitPrologue - Push callee-saved registers onto the stack, which +/// automatically adjust the stack pointer. Adjust the stack pointer to allocate +/// space for local variables. Also emit labels used by the exception handler to +/// generate the exception handling frames. + +/* + Here's a gist of what gets emitted: + + ; Establish frame pointer, if needed + [if needs FP] + push %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + .seh_pushreg %rpb + mov %rsp, %rbp + .cfi_def_cfa_register %rbp + + ; Spill general-purpose registers + [for all callee-saved GPRs] + pushq % + [if not needs FP] + .cfi_def_cfa_offset (offset from RETADDR) + .seh_pushreg % + + ; If the required stack alignment > default stack alignment + ; rsp needs to be re-aligned. This creates a "re-alignment gap" + ; of unknown size in the stack frame. + [if stack needs re-alignment] + and $MASK, %rsp + + ; Allocate space for locals + [if target is Windows and allocated space > 4096 bytes] + ; Windows needs special care for allocations larger + ; than one page. + mov $NNN, %rax + call ___chkstk_ms/___chkstk + sub %rax, %rsp + [else] + sub $NNN, %rsp + + [if needs FP] + .seh_stackalloc (size of XMM spill slots) + .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots + [else] + .seh_stackalloc NNN + + ; Spill XMMs + ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, + ; they may get spilled on any platform, if the current function + ; calls @llvm.eh.unwind.init + [if needs FP] + [for all callee-saved XMM registers] + movaps %, -MMM(%rbp) + [for all callee-saved XMM registers] + .seh_savexmm %, (-MMM + SEHFrameOffset) + ; i.e. the offset relative to (%rbp - SEHFrameOffset) + [else] + [for all callee-saved XMM registers] + movaps %, KKK(%rsp) + [for all callee-saved XMM registers] + .seh_savexmm %, KKK + + .seh_endprologue + + [if needs base pointer] + mov %rsp, %rbx + [if needs to restore base pointer] + mov %rsp, -MMM(%rbp) + + ; Emit CFI info + [if needs FP] + [for all callee-saved registers] + .cfi_offset %, (offset from %rbp) + [else] + .cfi_def_cfa_offset (offset from RETADDR) + [for all callee-saved registers] + .cfi_offset %, (offset from %rsp) + + Notes: + - .seh directives are emitted only for Windows 64 ABI + - .cv_fpo directives are emitted on win32 when emitting CodeView + - .cfi directives are emitted for all other ABIs + - for 32-bit code, substitute %e?? registers for %r?? +*/ + +void X86FrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&STI == &MF.getSubtarget() && + "MF used frame lowering for wrong subtarget"); - // At this point we know if the function has WinCFI or not. - MF.setHasWinCFI(HasWinCFI); + FrameBuilder FB(*this, MF, MBB); + FB.EmitPrologue(); } bool X86FrameLowering::canUseLEAForSPInEpilogue(