Index: llvm/docs/BitCodeFormat.rst =================================================================== --- llvm/docs/BitCodeFormat.rst +++ llvm/docs/BitCodeFormat.rst @@ -794,6 +794,7 @@ * ``preserve_allcc``: code 15 * ``swiftcc`` : code 16 * ``cxx_fast_tlscc``: code 17 + * ``tailcc`` : code 18 * ``x86_stdcallcc``: code 64 * ``x86_fastcallcc``: code 65 * ``arm_apcscc``: code 66 Index: llvm/docs/CodeGenerator.rst =================================================================== --- llvm/docs/CodeGenerator.rst +++ llvm/docs/CodeGenerator.rst @@ -2068,12 +2068,12 @@ and PowerPC if: * Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC - calling convention) or ``cc 11`` (HiPE calling convention). + calling convention), ``cc 11`` (HiPE calling convention), or ``tailcc``. * The call is a tail call - in tail position (ret immediately follows call and ret uses value of call or is void). -* Option ``-tailcallopt`` is enabled. +* Option ``-tailcallopt`` is enabled or the calling convention is ``tailcc``. * Platform-specific constraints are met. Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -299,7 +299,7 @@ allows the target to use whatever tricks it wants to produce fast code for the target, without having to conform to an externally specified ABI (Application Binary Interface). `Tail calls can only - be optimized when this, the GHC or the HiPE convention is + be optimized when this, the tailcc, the GHC or the HiPE convention is used. `_ This calling convention does not support varargs and requires the prototype of all callees to exactly match the prototype of the function definition. @@ -436,6 +436,17 @@ - On X86-64 RCX and R8 are available for additional integer returns, and XMM2 and XMM3 are available for additional FP/vector returns. - On iOS platforms, we use AAPCS-VFP calling convention. +"``tailcc``" - Tail callable calling convention + This calling convention attempts to make calls as fast as possible + (e.g. by passing things in registers), while also ensuring it is always + possible to perform tail call optimization on calls that are in tail + position. This calling convention allows the target to use whatever tricks + it wants to produce fast code for the target, without having to conform to + an externally specified ABI (Application Binary Interface). `Tail calls can + only be optimized when this, the fastcc, the GHC or the HiPE convention is + used. `_ This calling convention does not + support varargs and requires the prototype of all callees to exactly + match the prototype of the function definition. "``cc ``" - Numbered convention Any calling convention may be specified by number, allowing target-specific calling conventions to be used. Target specific @@ -10224,11 +10235,12 @@ Tail call optimization for calls marked ``tail`` is guaranteed to occur if the following conditions are met: - - Caller and callee both have the calling convention ``fastcc``. + - Caller and callee both have the calling convention ``fastcc`` or ``tailcc``. - The call is in tail position (ret immediately follows call and ret uses value of call or is void). - - Option ``-tailcallopt`` is enabled, or - ``llvm::GuaranteedTailCallOpt`` is ``true``. + - Option ``-tailcallopt`` is enabled, + ``llvm::GuaranteedTailCallOpt`` is ``true``, or the calling convention + is ``tailcc`` - `Platform-specific constraints are met. `_ Index: llvm/include/llvm/IR/CallingConv.h =================================================================== --- llvm/include/llvm/IR/CallingConv.h +++ llvm/include/llvm/IR/CallingConv.h @@ -75,6 +75,11 @@ // CXX_FAST_TLS - Calling convention for access functions. CXX_FAST_TLS = 17, + /// Tail - This calling convention attemps to make calls as fast as + /// possible while guaranteeing that tail call optimization can always + /// be performed. + Tail = 18, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, Index: llvm/lib/AsmParser/LLLexer.cpp =================================================================== --- llvm/lib/AsmParser/LLLexer.cpp +++ llvm/lib/AsmParser/LLLexer.cpp @@ -622,6 +622,7 @@ KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); KEYWORD(amdgpu_kernel); + KEYWORD(tailcc); KEYWORD(cc); KEYWORD(c); Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -1955,6 +1955,7 @@ /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' /// ::= 'amdgpu_kernel' +/// ::= 'tailcc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -2000,6 +2001,7 @@ case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; + case lltok::kw_tailcc: CC = CallingConv::Tail; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); Index: llvm/lib/AsmParser/LLToken.h =================================================================== --- llvm/lib/AsmParser/LLToken.h +++ llvm/lib/AsmParser/LLToken.h @@ -168,6 +168,7 @@ kw_amdgpu_ps, kw_amdgpu_cs, kw_amdgpu_kernel, + kw_tailcc, // Attributes: kw_attributes, Index: llvm/lib/CodeGen/Analysis.cpp =================================================================== --- llvm/lib/CodeGen/Analysis.cpp +++ llvm/lib/CodeGen/Analysis.cpp @@ -523,7 +523,8 @@ // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!TM.Options.GuaranteedTailCallOpt || !isa(Term))) + ((!TM.Options.GuaranteedTailCallOpt && + CS.getCallingConv() != CallingConv::Tail) || !isa(Term))) return false; // If I will have a chain, make sure no other instruction that will have a Index: llvm/lib/IR/AsmWriter.cpp =================================================================== --- llvm/lib/IR/AsmWriter.cpp +++ llvm/lib/IR/AsmWriter.cpp @@ -352,6 +352,7 @@ case CallingConv::PreserveAll: Out << "preserve_allcc"; break; case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break; case CallingConv::GHC: Out << "ghccc"; break; + case CallingConv::Tail: Out << "tailcc"; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; Index: llvm/lib/Target/X86/X86CallingConv.td =================================================================== --- llvm/lib/Target/X86/X86CallingConv.td +++ llvm/lib/Target/X86/X86CallingConv.td @@ -433,6 +433,7 @@ def RetCC_X86_32 : CallingConv<[ // If FastCC, use RetCC_X86_32_Fast. CCIfCC<"CallingConv::Fast", CCDelegateTo>, + CCIfCC<"CallingConv::Tail", CCDelegateTo>, // If HiPE, use RetCC_X86_32_HiPE. CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, @@ -1000,6 +1001,7 @@ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo>, CCIfCC<"CallingConv::Fast", CCDelegateTo>, + CCIfCC<"CallingConv::Tail", CCDelegateTo>, CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo>, Index: llvm/lib/Target/X86/X86FastISel.cpp =================================================================== --- llvm/lib/Target/X86/X86FastISel.cpp +++ llvm/lib/Target/X86/X86FastISel.cpp @@ -1160,6 +1160,7 @@ CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && + CC != CallingConv::Tail && CC != CallingConv::X86_FastCall && CC != CallingConv::X86_StdCall && CC != CallingConv::X86_ThisCall && @@ -1173,7 +1174,8 @@ // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) + if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || + CC == CallingConv::Tail) return false; // Let SDISel handle vararg functions. @@ -3157,7 +3159,7 @@ if (Subtarget->getTargetTriple().isOSMSVCRT()) return 0; if (CC == CallingConv::Fast || CC == CallingConv::GHC || - CC == CallingConv::HiPE) + CC == CallingConv::HiPE || CC == CallingConv::Tail) return 0; if (CS) @@ -3208,6 +3210,7 @@ default: return false; case CallingConv::C: case CallingConv::Fast: + case CallingConv::Tail: case CallingConv::WebKit_JS: case CallingConv::Swift: case CallingConv::X86_FastCall: @@ -3224,7 +3227,8 @@ // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) + if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || + CC == CallingConv::Tail) return false; // Don't know how to handle Win64 varargs yet. Nothing special needed for Index: llvm/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86FrameLowering.cpp +++ llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2269,7 +2269,8 @@ bool IsNested = HasNestArgument(&MF); if (CallingConvention == CallingConv::X86_FastCall || - CallingConvention == CallingConv::Fast) { + CallingConvention == CallingConv::Fast || + CallingConvention == CallingConv::Tail) { if (IsNested) report_fatal_error("Segmented stacks does not support fastcall with " "nested function."); Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3018,7 +3018,7 @@ static bool canGuaranteeTCO(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC || CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || - CC == CallingConv::HHVM); + CC == CallingConv::HHVM || CC == CallingConv::Tail); } /// Return true if we might ever do TCO for calls with this calling convention. @@ -3044,7 +3044,7 @@ /// Return true if the function is being made into a tailcall target by /// changing its ABI. static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { - return GuaranteedTailCallOpt && canGuaranteeTCO(CC); + return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail; } bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { @@ -3691,7 +3691,8 @@ isTailCall = false; if (Subtarget.isPICStyleGOT() && - !MF.getTarget().Options.GuaranteedTailCallOpt) { + !MF.getTarget().Options.GuaranteedTailCallOpt && + CallConv != CallingConv::Tail) { // If we are using a GOT, disable tail calls to external symbols with // default visibility. Tail calling such a symbol requires using a GOT // relocation, which forces early binding of the symbol. This breaks code @@ -3718,7 +3719,8 @@ // Sibcalls are automatically detected tailcalls which do not require // ABI changes. - if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) + if (!MF.getTarget().Options.GuaranteedTailCallOpt && + CallConv != CallingConv::Tail && isTailCall) IsSibcall = true; if (isTailCall) @@ -3750,8 +3752,8 @@ // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; - else if (MF.getTarget().Options.GuaranteedTailCallOpt && - canGuaranteeTCO(CallConv)) + else if ((MF.getTarget().Options.GuaranteedTailCallOpt || + CallConv == CallingConv::Tail) && canGuaranteeTCO(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; @@ -4383,7 +4385,7 @@ if (IsCalleeWin64 != IsCallerWin64) return false; - if (DAG.getTarget().Options.GuaranteedTailCallOpt) { + if (DAG.getTarget().Options.GuaranteedTailCallOpt || CalleeCC == CallingConv::Tail) { if (canGuaranteeTCO(CalleeCC) && CCMatch) return true; return false; @@ -24546,6 +24548,7 @@ case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: case CallingConv::Fast: + case CallingConv::Tail: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td NestReg = X86::EAX; Index: llvm/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/lib/Target/X86/X86Subtarget.h +++ llvm/lib/Target/X86/X86Subtarget.h @@ -815,6 +815,7 @@ // On Win64, all these conventions just use the default convention. case CallingConv::C: case CallingConv::Fast: + case CallingConv::Tail: case CallingConv::Swift: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: Index: llvm/test/CodeGen/X86/tailcall-tailcc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/tailcall-tailcc.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -mtriple=i686-- | grep TAILCALL | count 7 + +; With -tailcallopt, CodeGen guarantees a tail call optimization +; for all of these. + +declare tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) + +define tailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind { +entry: + %tmp11 = tail call tailcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2) + ret i32 %tmp11 +} + +declare tailcc i8* @alias_callee() + +define tailcc noalias i8* @noalias_caller() nounwind { + %p = tail call tailcc i8* @alias_callee() + ret i8* %p +} + +declare tailcc noalias i8* @noalias_callee() + +define tailcc i8* @alias_caller() nounwind { + %p = tail call tailcc noalias i8* @noalias_callee() + ret i8* %p +} + +declare tailcc i32 @i32_callee() + +define tailcc i32 @ret_undef() nounwind { + %p = tail call tailcc i32 @i32_callee() + ret i32 undef +} + +declare tailcc void @does_not_return() + +define tailcc i32 @noret() nounwind { + tail call tailcc void @does_not_return() + unreachable +} + +define tailcc void @void_test(i32, i32, i32, i32) { + entry: + tail call tailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3) + ret void +} + +define tailcc i1 @i1test(i32, i32, i32, i32) { + entry: + %4 = tail call tailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3) + ret i1 %4 +} Index: llvm/utils/vim/syntax/llvm.vim =================================================================== --- llvm/utils/vim/syntax/llvm.vim +++ llvm/utils/vim/syntax/llvm.vim @@ -82,6 +82,7 @@ \ externally_initialized \ extern_weak \ fastcc + \ tailcc \ filter \ from \ gc