Index: llvm/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86FrameLowering.cpp +++ llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1349,15 +1349,12 @@ // to determine the end of the prologue. DebugLoc DL; - // Add RETADDR move area to callee saved frame size. - int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - if (TailCallReturnAddrDelta && IsWin64Prologue) + // Space reserved for stack-based arguments when making a (ABI-guaranteed) + // tail call. + unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); + if (TailCallArgReserveSize && IsWin64Prologue) report_fatal_error("Can't handle guaranteed tail call under win64 yet"); - if (TailCallReturnAddrDelta < 0) - X86FI->setCalleeSavedFrameSize( - X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - const bool EmitStackProbeCall = STI.getTargetLowering()->hasStackProbeSymbol(MF); unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); @@ -1391,7 +1388,8 @@ !EmitStackProbeCall && // No stack probes. !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. !MF.shouldSplitStack()) { // Regular stack - uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); + uint64_t MinSize = + X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta(); if (HasFP) MinSize += SlotSize; X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); @@ -1401,8 +1399,8 @@ // Insert stack pointer adjustment for later moving of return addr. Only // applies to tail call optimized functions where the callee argument stack // size is bigger than the callers. - if (TailCallReturnAddrDelta < 0) { - BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta, + if (TailCallArgReserveSize != 0) { + BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize, /*InEpilogue=*/false) .setMIFlag(MachineInstr::FrameSetup); } @@ -1451,7 +1449,8 @@ if (X86FI->getRestoreBasePointer()) FrameSize += SlotSize; - NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); + NumBytes = FrameSize - + (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); // Callee-saved registers are pushed on stack before the stack is realigned. if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) @@ -1554,7 +1553,8 @@ } } else { assert(!IsFunclet && "funclets without FPs not yet implemented"); - NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); + NumBytes = StackSize - + (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); } // Update the offset adjustment, which is mainly used by codeview to translate @@ -2011,6 +2011,7 @@ uint64_t StackSize = MFI.getStackSize(); uint64_t MaxAlign = calculateMaxStackAlign(MF); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); + unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); bool HasFP = hasFP(MF); uint64_t NumBytes = 0; @@ -2024,14 +2025,14 @@ } else if (HasFP) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; - NumBytes = FrameSize - CSSize; + NumBytes = FrameSize - CSSize - TailCallArgReserveSize; // Callee-saved registers were pushed on stack before the stack was // realigned. if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) NumBytes = alignTo(FrameSize, MaxAlign); } else { - NumBytes = StackSize - CSSize; + NumBytes = StackSize - CSSize - TailCallArgReserveSize; } uint64_t SEHStackAllocAmt = NumBytes; @@ -2098,7 +2099,6 @@ if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); - // If there is an ADD32ri or SUB32ri of ESP immediately before this // instruction, merge the two instructions. if (NumBytes || MFI.hasVarSizedObjects()) @@ -2143,7 +2143,8 @@ if (!hasFP(MF) && NeedsDwarfCFI) { // Define the current CFA rule to use the provided offset. BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfaOffset(nullptr, CSSize + SlotSize)); + MCCFIInstruction::cfiDefCfaOffset( + nullptr, CSSize + TailCallArgReserveSize + SlotSize)); } --MBBI; } @@ -2226,7 +2227,6 @@ const X86MachineFunctionInfo *X86FI = MF.getInfo(); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t StackSize = MFI.getStackSize(); - bool HasFP = hasFP(MF); bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); int64_t FPDelta = 0; @@ -2262,39 +2262,27 @@ "FPDelta isn't aligned per the Win64 ABI!"); } - - if (TRI->hasBasePointer(MF)) { - assert(HasFP && "VLAs and dynamic stack realign, but no FP?!"); - if (FI < 0) { - // Skip the saved EBP. - return StackOffset::getFixed(Offset + SlotSize + FPDelta); - } else { - assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); - return StackOffset::getFixed(Offset + StackSize); - } - } else if (TRI->hasStackRealignment(MF)) { - if (FI < 0) { - // Skip the saved EBP. - return StackOffset::getFixed(Offset + SlotSize + FPDelta); - } else { - assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); - return StackOffset::getFixed(Offset + StackSize); - } - // FIXME: Support tail calls - } else { - if (!HasFP) - return StackOffset::getFixed(Offset + StackSize); - - // Skip the saved EBP. + if (FrameReg == TRI->getFramePtr()) { + // Skip saved EBP/RBP Offset += SlotSize; + // Account for restricted Windows prologue. + Offset += FPDelta; + // Skip the RETADDR move area int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta; + + return StackOffset::getFixed(Offset); } - return StackOffset::getFixed(Offset + FPDelta); + // FrameReg is either the stack pointer or a base pointer. But the base is + // located at the end of the statically known StackSize so the distinction + // doesn't really matter. + if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF)) + assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); + return StackOffset::getFixed(Offset + StackSize); } int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Index: llvm/test/CodeGen/X86/swifttail-realign.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/swifttail-realign.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s + +declare swifttailcc void @callee([6 x i64], i64, i64) + +@var = external global i8* + +define swifttailcc void @caller(i64 %n) { +; CHECK-LABEL: caller: +; CHECK: subq $16, %rsp +; CHECK: pushq %rbp +; CHECK: movq %rsp, %rbp +; CHECK: pushq %rbx +; CHECK: andq $-32, %rsp +; [... don't really care what happens to rsp to allocate %ptr ...] +; CHECK: movq %24(%rbp), [[RETADDR:%.*]] +; CHECK: movq [[RETADDR]], 8(%rbp) +; CHECK: movq $42, 16(%rbp) +; CHECK: movq $0, 24(%rbp) +; CHECK: leaq -8(%rbp), %rsp +; CHECK: popq %rbx +; CHECK: popq %rbp +; CHECK: jmp callee + + call void asm sideeffect "", "~{rbx}"() + %ptr = alloca i8, i64 %n, align 32 + store i8* %ptr, i8** @var + tail call swifttailcc void @callee([6 x i64] undef, i64 42, i64 0) + ret void +}