Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2464,15 +2464,22 @@ CC == CallingConv::HiPE || CC == CallingConv::HHVM); } -/// \brief Return true if the calling convention is a C calling convention. -static bool isCCallConvention(CallingConv::ID CC) { - return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 || - CC == CallingConv::X86_64_SysV); -} - /// Return true if we might ever do TCO for calls with this calling convention. static bool mayTailCallThisCC(CallingConv::ID CC) { - return isCCallConvention(CC) || canGuaranteeTCO(CC); + switch (CC) { + // C calling conventions: + case CallingConv::C: + case CallingConv::X86_64_Win64: + case CallingConv::X86_64_SysV: + // Callee pop conventions: + case CallingConv::X86_ThisCall: + case CallingConv::X86_StdCall: + case CallingConv::X86_VectorCall: + case CallingConv::X86_FastCall: + return true; + default: + return canGuaranteeTCO(CC); + } } /// Return true if the function is being made into a tailcall target by @@ -3650,21 +3657,9 @@ if (isCalleeStructRet || isCallerStructRet) return false; - // Don't do TCO when the current function is expected to clear its stack and - // the callee's convention does not match. - // FIXME: this is more restrictive than needed. We could produce a tailcall - // when the stack adjustment matches. For example, with a thiscall that takes - // only one argument. - bool CallerPopsArgs = - X86::isCalleePop(CallerCC, Subtarget->is64Bit(), CallerF->isVarArg(), - /*GuaranteeTCO=*/false); - if (CallerPopsArgs && !CCMatch) - return false; - // Do not sibcall optimize vararg calls unless all arguments are passed via // registers. if (isVarArg && !Outs.empty()) { - // Optimizing for varargs on Win64 is unlikely to be safe without // additional testing. if (IsCalleeWin64 || IsCallerWin64) @@ -3732,6 +3727,8 @@ } } + unsigned StackArgsSize = 0; + // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { @@ -3741,15 +3738,15 @@ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); + // Allocate shadow area for Win64 if (IsCalleeWin64) CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeCallOperands(Outs, CC_X86); - if (CCInfo.getNextStackOffset()) { - if (MF.getInfo()->getBytesToPopOnReturn()) - return false; + StackArgsSize = CCInfo.getNextStackOffset(); + if (CCInfo.getNextStackOffset()) { // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -3800,6 +3797,20 @@ } } + bool CalleeWillPop = + X86::isCalleePop(CalleeCC, Subtarget->is64Bit(), isVarArg, + MF.getTarget().Options.GuaranteedTailCallOpt); + + if (unsigned BytesToPop = + MF.getInfo()->getBytesToPopOnReturn()) { + // If we have bytes to pop, the callee must pop them. + if (!CalleeWillPop || BytesToPop != StackArgsSize) + return false; + } else if (CalleeWillPop && StackArgsSize > 0) { + // If we don't have bytes to pop, make sure the callee doesn't pop any. + return false; + } + return true; } Index: test/CodeGen/X86/tailcall-msvc-conventions.ll =================================================================== --- test/CodeGen/X86/tailcall-msvc-conventions.ll +++ test/CodeGen/X86/tailcall-msvc-conventions.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=i686-windows-msvc -O1 < %s | FileCheck %s -; RUN: llc -mtriple=i686-windows-msvc -O0 < %s | FileCheck %s +; RUN: llc -mtriple=i686-unknown-linux-gnu -O1 < %s | FileCheck %s +; RUN: llc -mtriple=i686-unknown-linux-gnu -O0 < %s | FileCheck %s ; The MSVC family of x86 calling conventions makes tail calls really tricky. ; Tests of all the various combinations should live here. @@ -14,27 +14,176 @@ tail call void @cdecl_void() ret void } -; CHECK-LABEL: thiscall_cdecl_notail: # @thiscall_cdecl_notail -; CHECK: calll _cdecl_void +; CHECK-LABEL: thiscall_cdecl_notail +; CHECK: calll cdecl_void ; CHECK: retl $8 + define x86_stdcallcc void @stdcall_cdecl_notail(i32 %a, i32 %b, i32 %c) { tail call void @cdecl_void() ret void } -; CHECK-LABEL: _stdcall_cdecl_notail@12: # @stdcall_cdecl_notail -; CHECK: calll _cdecl_void +; CHECK-LABEL: stdcall_cdecl_notail +; CHECK: calll cdecl_void ; CHECK: retl $12 + define x86_vectorcallcc void @vectorcall_cdecl_notail(i32 inreg %a, i32 inreg %b, i32 %c) { tail call void @cdecl_void() ret void } -; CHECK-LABEL: vectorcall_cdecl_notail@@12: # @vectorcall_cdecl_notail -; CHECK: calll _cdecl_void +; CHECK-LABEL: vectorcall_cdecl_notail +; CHECK: calll cdecl_void ; CHECK: retl $4 + define x86_fastcallcc void @fastcall_cdecl_notail(i32 inreg %a, i32 inreg %b, i32 %c) { tail call void @cdecl_void() ret void } -; CHECK-LABEL: @fastcall_cdecl_notail@12: # @fastcall_cdecl_notail -; CHECK: calll _cdecl_void +; CHECK-LABEL: fastcall_cdecl_notail +; CHECK: calll cdecl_void +; CHECK: retl $4 + + +; Tail call to/from callee pop functions can work under the right circumstances: + +declare x86_thiscallcc void @no_args_method(i8*) +declare x86_thiscallcc void @one_arg_method(i8*, i32) +declare x86_thiscallcc void @two_args_method(i8*, i32, i32) +declare void @ccall_func() +declare void @ccall_func1(i32) + +define x86_thiscallcc void @thiscall_thiscall_tail(i8* %this) { +entry: + tail call x86_thiscallcc void @no_args_method(i8* %this) + ret void +} +; CHECK-LABEL: thiscall_thiscall_tail: +; CHECK: jmp no_args_method + +define x86_thiscallcc void @thiscall_thiscall_tail2(i8* %this, i32 %a, i32 %b) { +entry: + tail call x86_thiscallcc void @two_args_method(i8* %this, i32 %a, i32 %b) + ret void +} +; @two_args_method will take care of popping %a and %b from the stack for us. +; CHECK-LABEL: thiscall_thiscall_tail2: +; CHECK: jmp two_args_method + +define x86_thiscallcc void @thiscall_thiscall_notail(i8* %this, i32 %a, i32 %b, i32 %x) { +entry: + tail call x86_thiscallcc void @two_args_method(i8* %this, i32 %a, i32 %b) + ret void +} +; @two_args_method would not pop %x. +; CHECK-LABEL: thiscall_thiscall_notail: +; CHECK: calll two_args_method +; CHECK: retl $12 + +define x86_thiscallcc void @thiscall_thiscall_notail2(i8* %this, i32 %a) { +entry: + tail call x86_thiscallcc void @no_args_method(i8* %this) + ret void +} +; @no_args_method would not pop %x for us. Make sure this is checked even +; when there are no arguments to the call. +; CHECK-LABEL: thiscall_thiscall_notail2: +; CHECK: calll no_args_method +; CHECK: retl $4 + +define void @ccall_thiscall_tail(i8* %x) { +entry: + tail call x86_thiscallcc void @no_args_method(i8* %x) + ret void +} +; Tail calling from ccall to thiscall works. +; CHECK-LABEL: ccall_thiscall_tail: +; CHECK: jmp no_args_method + +define void @ccall_thiscall_notail(i8* %x, i32 %y) { +entry: + tail call x86_thiscallcc void @one_arg_method(i8* %x, i32 %y); + ret void +} +; @one_arg_method would pop %y off the stack. +; CHECK-LABEL: ccall_thiscall_notail: +; CHECK: calll one_arg_method + +define x86_thiscallcc void @thiscall_ccall_tail(i8* %this) { +entry: + tail call void @ccall_func() + ret void +} +; Tail call from thiscall to ccall works if no arguments need popping. +; CHECK-LABEL: thiscall_ccall_tail: +; CHECK: jmp ccall_func + +define x86_thiscallcc void @thiscall_ccall_notail(i8* %this, i32 %x) { +entry: + tail call void @ccall_func1(i32 %x) + ret void +} +; No tail call: %x needs to be popped. +; CHECK-LABEL: thiscall_ccall_notail: +; CHECK: calll ccall_func1 ; CHECK: retl $4 + +%S = type { i32 (...)** } +define x86_thiscallcc void @tailcall_through_pointer(%S* %this, i32 %a) { +entry: + %0 = bitcast %S* %this to void (%S*, i32)*** + %vtable = load void (%S*, i32)**, void (%S*, i32)*** %0 + %1 = load void (%S*, i32)*, void (%S*, i32)** %vtable + tail call x86_thiscallcc void %1(%S* %this, i32 %a) + ret void +} +; Tail calling works through function pointers too. +; CHECK-LABEL: tailcall_through_pointer: +; CHECK: jmpl + +define x86_stdcallcc void @stdcall_cdecl_tail() { + tail call void @ccall_func() + ret void +} +; stdcall to cdecl works if no aruments need popping. +; CHECK-LABEL: stdcall_cdecl_tail +; CHECK: jmp ccall_func + +define x86_vectorcallcc void @vectorcall_cdecl_tail(i32 inreg %a, i32 inreg %b) { + tail call void @ccall_func() + ret void +} +; vectorcall to cdecl works if no arguments need popping. +; CHECK-LABEL: vectorcall_cdecl_tail +; CHECK: jmp ccall_func + +define x86_fastcallcc void @fastcall_cdecl_tail(i32 inreg %a, i32 inreg %b) { + tail call void @ccall_func() + ret void +} +; fastcall to cdecl works if no arguments need popping. +; CHECK-LABEL: fastcall_cdecl_tail +; CHECK: jmp ccall_func + +define x86_stdcallcc void @stdcall_thiscall_notail(i8* %this, i32 %a, i32 %b) { + tail call x86_thiscallcc void @two_args_method(i8* %this, i32 %a, i32 %b) + ret void +} +; two_args_method will not pop %this. +; CHECK-LABEL: stdcall_thiscall_notail +; CHECK: calll two_args_method + +define x86_stdcallcc void @stdcall_thiscall_tail(i32 %a, i32 %b) { + tail call x86_thiscallcc void @two_args_method(i8* null, i32 %a, i32 %b) + ret void +} +; The callee pop amounts match up. +; CHECK-LABEL: stdcall_thiscall_tail +; CHECK: jmp two_args_method + +declare x86_fastcallcc void @fastcall2(i32 inreg %a, i32 inreg %b) +define void @cdecl_fastcall_tail(i32 %a, i32 %b) { + tail call x86_fastcallcc void @fastcall2(i32 %a, i32 %b) + ret void +} +; fastcall2 won't pop anything. +; CHECK-LABEL: cdecl_fastcall_tail +; CHECK: jmp fastcall2