Index: lib/Target/X86/X86ExpandPseudo.cpp =================================================================== --- lib/Target/X86/X86ExpandPseudo.cpp +++ lib/Target/X86/X86ExpandPseudo.cpp @@ -44,6 +44,7 @@ const X86Subtarget *STI; const X86InstrInfo *TII; const X86RegisterInfo *TRI; + const X86MachineFunctionInfo *X86FI; const X86FrameLowering *X86FL; bool runOnMachineFunction(MachineFunction &Fn) override; @@ -88,11 +89,18 @@ // Adjust stack pointer. int StackAdj = StackAdjust.getImm(); + int MaxTCDelta = X86FI->getTCReturnAddrDelta(); + int Offset = 0; + assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); - if (StackAdj) { + // Incoporate the retaddr area. + Offset = StackAdj-MaxTCDelta; + assert(Offset >= 0 && "Offset should never be negative"); + + if (Offset) { // Check for possible merge with preceding ADD instruction. - StackAdj += X86FL->mergeSPUpdates(MBB, MBBI, true); - X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); + Offset += X86FL->mergeSPUpdates(MBB, MBBI, true); + X86FL->emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); } // Jump to label or value in register. @@ -247,6 +255,7 @@ STI = &static_cast(MF.getSubtarget()); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); + X86FI = MF.getInfo(); X86FL = STI->getFrameLowering(); bool Modified = false; Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -1467,11 +1467,19 @@ return FrameSizeMinusRBP - CSSize; } +static bool isTailCallOpcode(unsigned Opc) { + return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi || + Opc == X86::TCRETURNmi || + Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 || + Opc == X86::TCRETURNmi64; +} + void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo(); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); @@ -1620,15 +1628,17 @@ if (NeedsWinCFI) BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); - // Add the return addr area delta back since we are not tail calling. - int Offset = -1 * X86FI->getTCReturnAddrDelta(); - assert(Offset >= 0 && "TCDelta should never be positive"); - if (Offset) { - MBBI = MBB.getFirstTerminator(); + if (!isTailCallOpcode(RetOpcode)) { + // Add the return addr area delta back since we are not tail calling. + int Offset = -1 * X86FI->getTCReturnAddrDelta(); + assert(Offset >= 0 && "TCDelta should never be positive"); + if (Offset) { + MBBI = MBB.getFirstTerminator(); - // Check for possible merge with preceding ADD instruction. - Offset += mergeSPUpdates(MBB, MBBI, true); - emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); + // Check for possible merge with preceding ADD instruction. + Offset += mergeSPUpdates(MBB, MBBI, true); + emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); + } } } Index: test/CodeGen/X86/hipe-cc.ll =================================================================== --- test/CodeGen/X86/hipe-cc.ll +++ test/CodeGen/X86/hipe-cc.ll @@ -73,9 +73,23 @@ ret void } +; Sanity-check the tail call sequence. Number of arguments was chosen as to +; expose a bug where the tail call sequence clobbered the stack. +define cc 11 { i32, i32, i32 } @tailcaller(i32 %hp, i32 %p) nounwind { + ; CHECK: movl $15, %eax + ; CHECK-NEXT: movl $31, %edx + ; CHECK-NEXT: movl $47, %ecx + ; CHECK-NEXT: popl %edi + ; CHECK-NEXT: jmp tailcallee + %ret = tail call cc11 { i32, i32, i32 } @tailcallee(i32 %hp, i32 %p, i32 15, + i32 31, i32 47, i32 63) nounwind + ret { i32, i32, i32 } %ret +} + !hipe.literals = !{ !0, !1, !2 } !0 = !{ !"P_NSP_LIMIT", i32 84 } !1 = !{ !"X86_LEAF_WORDS", i32 24 } !2 = !{ !"AMD64_LEAF_WORDS", i32 24 } @clos = external constant i32 declare cc 11 void @bar(i32, i32, i32, i32, i32) +declare cc 11 { i32, i32, i32 } @tailcallee(i32, i32, i32, i32, i32, i32) Index: test/CodeGen/X86/hipe-cc64.ll =================================================================== --- test/CodeGen/X86/hipe-cc64.ll +++ test/CodeGen/X86/hipe-cc64.ll @@ -83,9 +83,24 @@ ret void } +; Sanity-check the tail call sequence. Number of arguments was chosen as to +; expose a bug where the tail call sequence clobbered the stack. +define cc 11 { i64, i64, i64 } @tailcaller(i64 %hp, i64 %p) #0 { + ; CHECK: movl $15, %esi + ; CHECK-NEXT: movl $31, %edx + ; CHECK-NEXT: movl $47, %ecx + ; CHECK-NEXT: movl $63, %r8d + ; CHECK-NEXT: popq %rax + ; CHECK-NEXT: jmp tailcallee + %ret = tail call cc11 { i64, i64, i64 } @tailcallee(i64 %hp, i64 %p, i64 15, + i64 31, i64 47, i64 63, i64 79) #1 + ret { i64, i64, i64 } %ret +} + !hipe.literals = !{ !0, !1, !2 } !0 = !{ !"P_NSP_LIMIT", i32 160 } !1 = !{ !"X86_LEAF_WORDS", i32 24 } !2 = !{ !"AMD64_LEAF_WORDS", i32 24 } @clos = external constant i64 declare cc 11 void @bar(i64, i64, i64, i64, i64, i64) +declare cc 11 { i64, i64, i64 } @tailcallee(i64, i64, i64, i64, i64, i64, i64)