Index: llvm/docs/BitCodeFormat.rst =================================================================== --- llvm/docs/BitCodeFormat.rst +++ llvm/docs/BitCodeFormat.rst @@ -795,6 +795,8 @@ * ``swiftcc`` : code 16 * ``cxx_fast_tlscc``: code 17 * ``tailcc`` : code 18 + * ``cfguard_checkcc`` : code 19 + * ``swifttailcc`` : code 20 * ``x86_stdcallcc``: code 64 * ``x86_fastcallcc``: code 65 * ``arm_apcscc``: code 66 Index: llvm/docs/CodeGenerator.rst =================================================================== --- llvm/docs/CodeGenerator.rst +++ llvm/docs/CodeGenerator.rst @@ -2102,6 +2102,10 @@ * The caller and callee's return types must match. The caller cannot be void unless the callee is, too. +AArch64 constraints: + +* No variable argument lists are used. + Example: Call as ``llc -tailcallopt test.ll``. Index: llvm/include/llvm/IR/CallingConv.h =================================================================== --- llvm/include/llvm/IR/CallingConv.h +++ llvm/include/llvm/IR/CallingConv.h @@ -86,6 +86,11 @@ /// and has no return value. All register values are preserved. CFGuard_Check = 19, + /// SwiftTail - This follows the SWift calling convention in how arguments + /// are passed but guarantees tail calls will be made by making the callee + /// clean up their stack. + SwiftTail = 20, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, Index: llvm/lib/AsmParser/LLLexer.cpp =================================================================== --- llvm/lib/AsmParser/LLLexer.cpp +++ llvm/lib/AsmParser/LLLexer.cpp @@ -609,6 +609,7 @@ KEYWORD(x86_regcallcc); KEYWORD(webkit_jscc); KEYWORD(swiftcc); + KEYWORD(swifttailcc); KEYWORD(anyregcc); KEYWORD(preserve_mostcc); KEYWORD(preserve_allcc); Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -2078,6 +2078,7 @@ /// ::= 'preserve_allcc' /// ::= 'ghccc' /// ::= 'swiftcc' +/// ::= 'swifttailcc' /// ::= 'x86_intrcc' /// ::= 'hhvmcc' /// ::= 'hhvm_ccc' @@ -2128,6 +2129,7 @@ case lltok::kw_preserve_allcc: CC = CallingConv::PreserveAll; break; case lltok::kw_ghccc: CC = CallingConv::GHC; break; case lltok::kw_swiftcc: CC = CallingConv::Swift; break; + case lltok::kw_swifttailcc: CC = CallingConv::SwiftTail; break; case lltok::kw_x86_intrcc: CC = CallingConv::X86_INTR; break; case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break; case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break; Index: llvm/lib/AsmParser/LLToken.h =================================================================== --- llvm/lib/AsmParser/LLToken.h +++ llvm/lib/AsmParser/LLToken.h @@ -156,6 +156,7 @@ kw_webkit_jscc, kw_anyregcc, kw_swiftcc, + kw_swifttailcc, kw_preserve_mostcc, kw_preserve_allcc, kw_ghccc, Index: llvm/lib/CodeGen/Analysis.cpp =================================================================== --- llvm/lib/CodeGen/Analysis.cpp +++ llvm/lib/CodeGen/Analysis.cpp @@ -513,9 +513,10 @@ // not profitable. Also, if the callee is a special function (e.g. // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. - if (!Ret && - ((!TM.Options.GuaranteedTailCallOpt && - Call.getCallingConv() != CallingConv::Tail) || !isa(Term))) + if (!Ret && ((!TM.Options.GuaranteedTailCallOpt && + Call.getCallingConv() != CallingConv::Tail && + Call.getCallingConv() != CallingConv::SwiftTail) || + !isa(Term))) return false; // If I will have a chain, make sure no other instruction that will have a Index: llvm/lib/IR/AsmWriter.cpp =================================================================== --- llvm/lib/IR/AsmWriter.cpp +++ llvm/lib/IR/AsmWriter.cpp @@ -388,6 +388,7 @@ case CallingConv::SPIR_FUNC: Out << "spir_func"; break; case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break; case CallingConv::Swift: Out << "swiftcc"; break; + case CallingConv::SwiftTail: Out << "swifttailcc"; break; case CallingConv::X86_INTR: Out << "x86_intrcc"; break; case CallingConv::HHVM: Out << "hhvmcc"; break; case CallingConv::HHVM_C: Out << "hhvm_ccc"; break; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4515,6 +4515,8 @@ case CallingConv::PreserveMost: case CallingConv::CXX_FAST_TLS: case CallingConv::Swift: + case CallingConv::SwiftTail: + case CallingConv::Tail: if (Subtarget->isTargetWindows() && IsVarArg) return CC_AArch64_Win64_VarArg; if (!Subtarget->isTargetDarwin()) @@ -4985,8 +4987,9 @@ } /// Return true if the calling convention is one that we can guarantee TCO for. -static bool canGuaranteeTCO(CallingConv::ID CC) { - return CC == CallingConv::Fast; +static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { + return (CC == CallingConv::Fast && GuaranteeTailCalls) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; } /// Return true if we might ever do TCO for calls with this calling convention. @@ -4996,9 +4999,12 @@ case CallingConv::AArch64_SVE_VectorCall: case CallingConv::PreserveMost: case CallingConv::Swift: + case CallingConv::SwiftTail: + case CallingConv::Tail: + case CallingConv::Fast: return true; default: - return canGuaranteeTCO(CC); + return false; } } @@ -5050,8 +5056,8 @@ return false; } - if (getTargetMachine().Options.GuaranteedTailCallOpt) - return canGuaranteeTCO(CalleeCC) && CCMatch; + if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) + return CCMatch; // Externally-defined functions with weak linkage should not be // tail-called on AArch64 when the OS does not support dynamic @@ -5182,7 +5188,8 @@ bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const { - return CallCC == CallingConv::Fast && TailCallOpt; + return (CallCC == CallingConv::Fast && TailCallOpt) || + CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail; } /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain, @@ -5233,7 +5240,8 @@ // A sibling call is one where we're under the usual C ABI and not planning // to change that but can still do a tail call: - if (!TailCallOpt && IsTailCall) + if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail && + CallConv != CallingConv::SwiftTail) IsSibCall = true; if (IsTailCall) Index: llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -231,7 +231,8 @@ } // namespace static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { - return CallConv == CallingConv::Fast && TailCallOpt; + return (CallConv == CallingConv::Fast && TailCallOpt) || + CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; } void AArch64CallLowering::splitToValueTypes( @@ -521,8 +522,9 @@ } /// Return true if the calling convention is one that we can guarantee TCO for. -static bool canGuaranteeTCO(CallingConv::ID CC) { - return CC == CallingConv::Fast; +static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { + return (CC == CallingConv::Fast && GuaranteeTailCalls) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; } /// Return true if we might ever do TCO for calls with this calling convention. @@ -531,9 +533,12 @@ case CallingConv::C: case CallingConv::PreserveMost: case CallingConv::Swift: + case CallingConv::SwiftTail: + case CallingConv::Tail: + case CallingConv::Fast: return true; default: - return canGuaranteeTCO(CC); + return false; } } @@ -718,8 +723,8 @@ } // If we have -tailcallopt, then we're done. - if (MF.getTarget().Options.GuaranteedTailCallOpt) - return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv(); + if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt)) + return CalleeCC == CallerF.getCallingConv(); // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). // Try to find cases where we can do that. @@ -772,7 +777,9 @@ AArch64FunctionInfo *FuncInfo = MF.getInfo(); // True when we're tail calling, but without -tailcallopt. - bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt; + bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt && + Info.CallConv != CallingConv::Tail && + Info.CallConv != CallingConv::SwiftTail; // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 // register class. Until we can do that, we should fall back here. Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1998,6 +1998,7 @@ return CallingConv::PreserveMost; case CallingConv::ARM_AAPCS_VFP: case CallingConv::Swift: + case CallingConv::SwiftTail: return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) Index: llvm/lib/Target/X86/X86CallingConv.td =================================================================== --- llvm/lib/Target/X86/X86CallingConv.td +++ llvm/lib/Target/X86/X86CallingConv.td @@ -462,6 +462,7 @@ // Handle Swift calls. CCIfCC<"CallingConv::Swift", CCDelegateTo>, + CCIfCC<"CallingConv::SwiftTail", CCDelegateTo>, // Handle explicit CC selection CCIfCC<"CallingConv::Win64", CCDelegateTo>, @@ -517,9 +518,11 @@ // A SwiftError is passed in R12. CCIfSwiftError>>, - // For Swift Calling Convention, pass sret in %rax. + // For Swift Calling Conventions, pass sret in %rax. CCIfCC<"CallingConv::Swift", CCIfSRet>>>, + CCIfCC<"CallingConv::SwiftTail", + CCIfSRet>>>, // Pointers are always passed in full 64-bit registers. CCIfPtr>, Index: llvm/lib/Target/X86/X86FastISel.cpp =================================================================== --- llvm/lib/Target/X86/X86FastISel.cpp +++ llvm/lib/Target/X86/X86FastISel.cpp @@ -1284,7 +1284,8 @@ // the sret argument into %rax/%eax (depending on ABI) for the return. // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. - if (F.hasStructRetAttr() && CC != CallingConv::Swift) { + if (F.hasStructRetAttr() && CC != CallingConv::Swift && + CC != CallingConv::SwiftTail) { Register Reg = X86MFInfo->getSRetReturnReg(); assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()!"); @@ -3231,6 +3232,7 @@ case CallingConv::Tail: case CallingConv::WebKit_JS: case CallingConv::Swift: + case CallingConv::SwiftTail: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3183,7 +3183,8 @@ static bool canGuaranteeTCO(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC || CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || - CC == CallingConv::HHVM || CC == CallingConv::Tail); + CC == CallingConv::HHVM || CC == CallingConv::Tail || + CC == CallingConv::SwiftTail); } /// Return true if we might ever do TCO for calls with this calling convention. @@ -3209,7 +3210,8 @@ /// Return true if the function is being made into a tailcall target by /// changing its ABI. static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { - return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail; + return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; } bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { @@ -3743,7 +3745,7 @@ for (unsigned I = 0, E = Ins.size(); I != E; ++I) { // Swift calling convention does not require we copy the sret argument // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. - if (CallConv == CallingConv::Swift) + if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) continue; // All x86 ABIs require that for returning structs by value we copy the @@ -3907,7 +3909,7 @@ StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); bool IsSibcall = false; bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || - CallConv == CallingConv::Tail; + CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; X86MachineFunctionInfo *X86Info = MF.getInfo(); const auto *CI = dyn_cast_or_null(CLI.CB); const Function *Fn = CI ? CI->getCalledFunction() : nullptr; @@ -4613,7 +4615,7 @@ bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || - CalleeCC == CallingConv::Tail; + CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail; // Win64 functions have extra shadow space for argument homing. Don't do the // sibcall if the caller and callee have mismatched expectations for this @@ -26805,6 +26807,7 @@ case CallingConv::X86_ThisCall: case CallingConv::Fast: case CallingConv::Tail: + case CallingConv::SwiftTail: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td NestReg = X86::EAX; Index: llvm/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/lib/Target/X86/X86Subtarget.h +++ llvm/lib/Target/X86/X86Subtarget.h @@ -885,6 +885,7 @@ case CallingConv::Fast: case CallingConv::Tail: case CallingConv::Swift: + case CallingConv::SwiftTail: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: Index: llvm/test/Bitcode/compatibility.ll =================================================================== --- llvm/test/Bitcode/compatibility.ll +++ llvm/test/Bitcode/compatibility.ll @@ -382,6 +382,8 @@ ; CHECK: declare preserve_mostcc void @f.preserve_mostcc() declare preserve_allcc void @f.preserve_allcc() ; CHECK: declare preserve_allcc void @f.preserve_allcc() +declare swifttailcc void @f.swifttailcc() +; CHECK: declare swifttailcc void @f.swifttailcc() declare cc64 void @f.cc64() ; CHECK: declare x86_stdcallcc void @f.cc64() declare x86_stdcallcc void @f.x86_stdcallcc() Index: llvm/test/CodeGen/AArch64/swifttail-call.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/swifttail-call.ll @@ -0,0 +1,158 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=SDAG,COMMON +; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=GISEL,COMMON + +declare swifttailcc void @callee_stack0() +declare swifttailcc void @callee_stack8([8 x i64], i64) +declare swifttailcc void @callee_stack16([8 x i64], i64, i64) +declare extern_weak swifttailcc void @callee_weak() + +define swifttailcc void @caller_to0_from0() nounwind { +; COMMON-LABEL: caller_to0_from0: +; COMMON-NEXT: // %bb. + + tail call swifttailcc void @callee_stack0() + ret void + +; COMMON-NEXT: b callee_stack0 +} + +define swifttailcc void @caller_to0_from8([8 x i64], i64) { +; COMMON-LABEL: caller_to0_from8: + + tail call swifttailcc void @callee_stack0() + ret void + +; COMMON: add sp, sp, #16 +; COMMON-NEXT: b callee_stack0 +} + +define swifttailcc void @caller_to8_from0() { +; COMMON-LABEL: caller_to8_from0: +; COMMON: sub sp, sp, #32 + +; Key point is that the "42" should go #16 below incoming stack +; pointer (we didn't have arg space to reuse). + tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: b callee_stack8 +} + +define swifttailcc void @caller_to8_from8([8 x i64], i64 %a) { +; COMMON-LABEL: caller_to8_from8: +; COMMON: sub sp, sp, #16 + +; Key point is that the "%a" should go where at SP on entry. + tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: b callee_stack8 +} + +define swifttailcc void @caller_to16_from8([8 x i64], i64 %a) { +; COMMON-LABEL: caller_to16_from8: +; COMMON: sub sp, sp, #16 + +; Important point is that the call reuses the "dead" argument space +; above %a on the stack. If it tries to go below incoming-SP then the +; callee will not deallocate the space, even in swifttailcc. + tail call swifttailcc void @callee_stack16([8 x i64] undef, i64 42, i64 2) + +; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: b callee_stack16 + ret void +} + + +define swifttailcc void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) { +; COMMON-LABEL: caller_to8_from24: +; COMMON: sub sp, sp, #16 + +; Key point is that the "%a" should go where at #16 above SP on entry. + tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp, #32]! +; COMMON-NEXT: b callee_stack8 +} + + +define swifttailcc void @caller_to16_from16([8 x i64], i64 %a, i64 %b) { +; COMMON-LABEL: caller_to16_from16: +; COMMON: sub sp, sp, #16 + +; Here we want to make sure that both loads happen before the stores: +; otherwise either %a or %b will be wrongly clobbered. + tail call swifttailcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a) + ret void + +; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: b callee_stack16 +} + +define swifttailcc void @disable_tail_calls() nounwind "disable-tail-calls"="true" { +; COMMON-LABEL: disable_tail_calls: +; COMMON-NEXT: // %bb. + + tail call swifttailcc void @callee_stack0() + ret void + +; COMMON: bl callee_stack0 +; COMMON: ret +} + +; Weakly-referenced extern functions cannot be tail-called, as AAELF does +; not define the behaviour of branch instructions to undefined weak symbols. +define swifttailcc void @caller_weak() { +; COMMON-LABEL: caller_weak: +; COMMON: bl callee_weak + tail call void @callee_weak() + ret void +} + +declare { [2 x float] } @get_vec2() + +define { [3 x float] } @test_add_elem() { +; SDAG-LABEL: test_add_elem: +; SDAG: bl get_vec2 +; SDAG: fmov s2, #1.0 +; SDAG: ret +; GISEL-LABEL: test_add_elem: +; GISEL: str x30, [sp, #-16]! +; GISEL: bl get_vec2 +; GISEL: fmov s2, #1.0 +; GISEL: ldr x30, [sp], #16 +; GISEL: ret + + %call = tail call { [2 x float] } @get_vec2() + %arr = extractvalue { [2 x float] } %call, 0 + %arr.0 = extractvalue [2 x float] %arr, 0 + %arr.1 = extractvalue [2 x float] %arr, 1 + + %res.0 = insertvalue { [3 x float] } undef, float %arr.0, 0, 0 + %res.01 = insertvalue { [3 x float] } %res.0, float %arr.1, 0, 1 + %res.012 = insertvalue { [3 x float] } %res.01, float 1.000000e+00, 0, 2 + ret { [3 x float] } %res.012 +} + +declare double @get_double() +define { double, [2 x double] } @test_mismatched_insert() { +; COMMON-LABEL: test_mismatched_insert: +; COMMON: bl get_double +; COMMON: bl get_double +; COMMON: bl get_double +; COMMON: ret + + %val0 = call double @get_double() + %val1 = call double @get_double() + %val2 = tail call double @get_double() + + %res.0 = insertvalue { double, [2 x double] } undef, double %val0, 0 + %res.01 = insertvalue { double, [2 x double] } %res.0, double %val1, 1, 0 + %res.012 = insertvalue { double, [2 x double] } %res.01, double %val2, 1, 1 + + ret { double, [2 x double] } %res.012 +} Index: llvm/test/CodeGen/AArch64/tailcc-tail-call.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/tailcc-tail-call.ll @@ -0,0 +1,158 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=SDAG,COMMON +; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=GISEL,COMMON + +declare tailcc void @callee_stack0() +declare tailcc void @callee_stack8([8 x i64], i64) +declare tailcc void @callee_stack16([8 x i64], i64, i64) +declare extern_weak tailcc void @callee_weak() + +define tailcc void @caller_to0_from0() nounwind { +; COMMON-LABEL: caller_to0_from0: +; COMMON-NEXT: // %bb. + + tail call tailcc void @callee_stack0() + ret void + +; COMMON-NEXT: b callee_stack0 +} + +define tailcc void @caller_to0_from8([8 x i64], i64) { +; COMMON-LABEL: caller_to0_from8: + + tail call tailcc void @callee_stack0() + ret void + +; COMMON: add sp, sp, #16 +; COMMON-NEXT: b callee_stack0 +} + +define tailcc void @caller_to8_from0() { +; COMMON-LABEL: caller_to8_from0: +; COMMON: sub sp, sp, #32 + +; Key point is that the "42" should go #16 below incoming stack +; pointer (we didn't have arg space to reuse). + tail call tailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: b callee_stack8 +} + +define tailcc void @caller_to8_from8([8 x i64], i64 %a) { +; COMMON-LABEL: caller_to8_from8: +; COMMON: sub sp, sp, #16 + +; Key point is that the "%a" should go where at SP on entry. + tail call tailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: b callee_stack8 +} + +define tailcc void @caller_to16_from8([8 x i64], i64 %a) { +; COMMON-LABEL: caller_to16_from8: +; COMMON: sub sp, sp, #16 + +; Important point is that the call reuses the "dead" argument space +; above %a on the stack. If it tries to go below incoming-SP then the +; callee will not deallocate the space, even in tailcc. + tail call tailcc void @callee_stack16([8 x i64] undef, i64 42, i64 2) + +; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: b callee_stack16 + ret void +} + + +define tailcc void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) { +; COMMON-LABEL: caller_to8_from24: +; COMMON: sub sp, sp, #16 + +; Key point is that the "%a" should go where at #16 above SP on entry. + tail call tailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp, #32]! +; COMMON-NEXT: b callee_stack8 +} + + +define tailcc void @caller_to16_from16([8 x i64], i64 %a, i64 %b) { +; COMMON-LABEL: caller_to16_from16: +; COMMON: sub sp, sp, #16 + +; Here we want to make sure that both loads happen before the stores: +; otherwise either %a or %b will be wrongly clobbered. + tail call tailcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a) + ret void + +; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: b callee_stack16 +} + +define tailcc void @disable_tail_calls() nounwind "disable-tail-calls"="true" { +; COMMON-LABEL: disable_tail_calls: +; COMMON-NEXT: // %bb. + + tail call tailcc void @callee_stack0() + ret void + +; COMMON: bl callee_stack0 +; COMMON: ret +} + +; Weakly-referenced extern functions cannot be tail-called, as AAELF does +; not define the behaviour of branch instructions to undefined weak symbols. +define tailcc void @caller_weak() { +; COMMON-LABEL: caller_weak: +; COMMON: bl callee_weak + tail call void @callee_weak() + ret void +} + +declare { [2 x float] } @get_vec2() + +define { [3 x float] } @test_add_elem() { +; SDAG-LABEL: test_add_elem: +; SDAG: bl get_vec2 +; SDAG: fmov s2, #1.0 +; SDAG: ret +; GISEL-LABEL: test_add_elem: +; GISEL: str x30, [sp, #-16]! +; GISEL: bl get_vec2 +; GISEL: fmov s2, #1.0 +; GISEL: ldr x30, [sp], #16 +; GISEL: ret + + %call = tail call { [2 x float] } @get_vec2() + %arr = extractvalue { [2 x float] } %call, 0 + %arr.0 = extractvalue [2 x float] %arr, 0 + %arr.1 = extractvalue [2 x float] %arr, 1 + + %res.0 = insertvalue { [3 x float] } undef, float %arr.0, 0, 0 + %res.01 = insertvalue { [3 x float] } %res.0, float %arr.1, 0, 1 + %res.012 = insertvalue { [3 x float] } %res.01, float 1.000000e+00, 0, 2 + ret { [3 x float] } %res.012 +} + +declare double @get_double() +define { double, [2 x double] } @test_mismatched_insert() { +; COMMON-LABEL: test_mismatched_insert: +; COMMON: bl get_double +; COMMON: bl get_double +; COMMON: bl get_double +; COMMON: ret + + %val0 = call double @get_double() + %val1 = call double @get_double() + %val2 = tail call double @get_double() + + %res.0 = insertvalue { double, [2 x double] } undef, double %val0, 0 + %res.01 = insertvalue { double, [2 x double] } %res.0, double %val1, 1, 0 + %res.012 = insertvalue { double, [2 x double] } %res.01, double %val2, 1, 1 + + ret { double, [2 x double] } %res.012 +} Index: llvm/test/CodeGen/ARM/swifttailcc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/swifttailcc.ll @@ -0,0 +1,8 @@ +; RUN: llc -mtriple thumbv7k-apple-watchos %s -o - | FileCheck %s + +define float @verify_aapcs_vfp(float %in) { +; CHECK: vadd.f32 s0, s0, s0 + + %res = fadd float %in, %in + ret float %res +} Index: llvm/test/CodeGen/X86/swifttail-return.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/swifttail-return.ll @@ -0,0 +1,607 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-unknown-unknown -O0 | FileCheck --check-prefix=CHECK-O0 %s + +@var = dso_local global i32 0 + +; Test how llvm handles return type of {i16, i8}. The return value will be +; passed in %eax and %dl. +define i16 @test(i32 %key) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq gen@PLT +; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: # kill: def $ax killed $ax def $eax +; CHECK-NEXT: movsbl %dl, %ecx +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-O0-LABEL: test: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-O0-NEXT: callq gen@PLT +; CHECK-O0-NEXT: cwtl +; CHECK-O0-NEXT: movsbl %dl, %ecx +; CHECK-O0-NEXT: addl %ecx, %eax +; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 +; CHECK-O0-NEXT: retq +entry: + %key.addr = alloca i32, align 4 + store i32 %key, i32* %key.addr, align 4 + %0 = load i32, i32* %key.addr, align 4 + %call = call swifttailcc { i16, i8 } @gen(i32 %0) + %v3 = extractvalue { i16, i8 } %call, 0 + %v1 = sext i16 %v3 to i32 + %v5 = extractvalue { i16, i8 } %call, 1 + %v2 = sext i8 %v5 to i32 + %add = add nsw i32 %v1, %v2 + %conv = trunc i32 %add to i16 + ret i16 %conv +} + +declare swifttailcc { i16, i8 } @gen(i32) + +; If we can't pass every return value in register, we will pass everything +; in memroy. The caller provides space for the return value and passes +; the address in %rax. The first input argument will be in %rdi. +define dso_local i32 @test2(i32 %key) #0 { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: callq gen2@PLT +; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-O0-LABEL: test2: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: subq $40, %rsp +; CHECK-O0-NEXT: .cfi_def_cfa_offset 48 +; CHECK-O0-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-O0-NEXT: callq gen2@PLT +; CHECK-O0-NEXT: subq $8, %rsp +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edx +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %esi +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-O0-NEXT: addl %edi, %eax +; CHECK-O0-NEXT: addl %esi, %eax +; CHECK-O0-NEXT: addl %edx, %eax +; CHECK-O0-NEXT: addl %ecx, %eax +; CHECK-O0-NEXT: addq $40, %rsp +; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 +; CHECK-O0-NEXT: retq +entry: + %key.addr = alloca i32, align 4 + store i32 %key, i32* %key.addr, align 4 + %0 = load i32, i32* %key.addr, align 4 + %call = call swifttailcc { i32, i32, i32, i32, i32 } @gen2(i32 %0) + + %v3 = extractvalue { i32, i32, i32, i32, i32 } %call, 0 + %v5 = extractvalue { i32, i32, i32, i32, i32 } %call, 1 + %v6 = extractvalue { i32, i32, i32, i32, i32 } %call, 2 + %v7 = extractvalue { i32, i32, i32, i32, i32 } %call, 3 + %v8 = extractvalue { i32, i32, i32, i32, i32 } %call, 4 + + %add = add nsw i32 %v3, %v5 + %add1 = add nsw i32 %add, %v6 + %add2 = add nsw i32 %add1, %v7 + %add3 = add nsw i32 %add2, %v8 + ret i32 %add3 +} + +; The address of the return value is passed in %rax. +; On return, we don't keep the address in %rax. +define swifttailcc { i32, i32, i32, i32, i32 } @gen2(i32 %key) { +; CHECK-LABEL: gen2: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, 16(%rax) +; CHECK-NEXT: movl %edi, 12(%rax) +; CHECK-NEXT: movl %edi, 8(%rax) +; CHECK-NEXT: movl %edi, 4(%rax) +; CHECK-NEXT: movl %edi, (%rax) +; CHECK-NEXT: retq $8 +; +; CHECK-O0-LABEL: gen2: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movl %edi, 16(%rax) +; CHECK-O0-NEXT: movl %edi, 12(%rax) +; CHECK-O0-NEXT: movl %edi, 8(%rax) +; CHECK-O0-NEXT: movl %edi, 4(%rax) +; CHECK-O0-NEXT: movl %edi, (%rax) +; CHECK-O0-NEXT: retq $8 + %Y = insertvalue { i32, i32, i32, i32, i32 } undef, i32 %key, 0 + %Z = insertvalue { i32, i32, i32, i32, i32 } %Y, i32 %key, 1 + %Z2 = insertvalue { i32, i32, i32, i32, i32 } %Z, i32 %key, 2 + %Z3 = insertvalue { i32, i32, i32, i32, i32 } %Z2, i32 %key, 3 + %Z4 = insertvalue { i32, i32, i32, i32, i32 } %Z3, i32 %key, 4 + ret { i32, i32, i32, i32, i32 } %Z4 +} + +; The return value {i32, i32, i32, i32} will be returned via registers %eax, +; %edx, %ecx, %r8d. +define dso_local i32 @test3(i32 %key) #0 { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq gen3@PLT +; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %r8d, %eax +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-O0-LABEL: test3: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-O0-NEXT: callq gen3@PLT +; CHECK-O0-NEXT: addl %edx, %eax +; CHECK-O0-NEXT: addl %ecx, %eax +; CHECK-O0-NEXT: addl %r8d, %eax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 +; CHECK-O0-NEXT: retq +entry: + %key.addr = alloca i32, align 4 + store i32 %key, i32* %key.addr, align 4 + %0 = load i32, i32* %key.addr, align 4 + %call = call swifttailcc { i32, i32, i32, i32 } @gen3(i32 %0) + + %v3 = extractvalue { i32, i32, i32, i32 } %call, 0 + %v5 = extractvalue { i32, i32, i32, i32 } %call, 1 + %v6 = extractvalue { i32, i32, i32, i32 } %call, 2 + %v7 = extractvalue { i32, i32, i32, i32 } %call, 3 + + %add = add nsw i32 %v3, %v5 + %add1 = add nsw i32 %add, %v6 + %add2 = add nsw i32 %add1, %v7 + ret i32 %add2 +} + +declare swifttailcc { i32, i32, i32, i32 } @gen3(i32 %key) + +; The return value {float, float, float, float} will be returned via registers +; %xmm0, %xmm1, %xmm2, %xmm3. +define dso_local float @test4(float %key) #0 { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq gen4@PLT +; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: addss %xmm2, %xmm0 +; CHECK-NEXT: addss %xmm3, %xmm0 +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-O0-LABEL: test4: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) +; CHECK-O0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: callq gen4@PLT +; CHECK-O0-NEXT: addss %xmm1, %xmm0 +; CHECK-O0-NEXT: addss %xmm2, %xmm0 +; CHECK-O0-NEXT: addss %xmm3, %xmm0 +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 +; CHECK-O0-NEXT: retq +entry: + %key.addr = alloca float, align 4 + store float %key, float* %key.addr, align 4 + %0 = load float, float* %key.addr, align 4 + %call = call swifttailcc { float, float, float, float } @gen4(float %0) + + %v3 = extractvalue { float, float, float, float } %call, 0 + %v5 = extractvalue { float, float, float, float } %call, 1 + %v6 = extractvalue { float, float, float, float } %call, 2 + %v7 = extractvalue { float, float, float, float } %call, 3 + + %add = fadd float %v3, %v5 + %add1 = fadd float %add, %v6 + %add2 = fadd float %add1, %v7 + ret float %add2 +} + +declare swifttailcc { float, float, float, float } @gen4(float %key) + +define dso_local void @consume_i1_ret() { +; CHECK-LABEL: consume_i1_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq produce_i1_ret@PLT +; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: movl %eax, {{.*}}(%rip) +; CHECK-NEXT: movzbl %dl, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: movl %eax, {{.*}}(%rip) +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: movl %eax, {{.*}}(%rip) +; CHECK-NEXT: movzbl %r8b, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: movl %eax, {{.*}}(%rip) +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; CHECK-O0-LABEL: consume_i1_ret: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: callq produce_i1_ret@PLT +; CHECK-O0-NEXT: andb $1, %al +; CHECK-O0-NEXT: movzbl %al, %eax +; CHECK-O0-NEXT: movl %eax, var +; CHECK-O0-NEXT: andb $1, %dl +; CHECK-O0-NEXT: movzbl %dl, %eax +; CHECK-O0-NEXT: movl %eax, var +; CHECK-O0-NEXT: andb $1, %cl +; CHECK-O0-NEXT: movzbl %cl, %eax +; CHECK-O0-NEXT: movl %eax, var +; CHECK-O0-NEXT: andb $1, %r8b +; CHECK-O0-NEXT: movzbl %r8b, %eax +; CHECK-O0-NEXT: movl %eax, var +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 +; CHECK-O0-NEXT: retq + %call = call swifttailcc { i1, i1, i1, i1 } @produce_i1_ret() + %v3 = extractvalue { i1, i1, i1, i1 } %call, 0 + %v5 = extractvalue { i1, i1, i1, i1 } %call, 1 + %v6 = extractvalue { i1, i1, i1, i1 } %call, 2 + %v7 = extractvalue { i1, i1, i1, i1 } %call, 3 + %val = zext i1 %v3 to i32 + store volatile i32 %val, i32* @var + %val2 = zext i1 %v5 to i32 + store volatile i32 %val2, i32* @var + %val3 = zext i1 %v6 to i32 + store volatile i32 %val3, i32* @var + %val4 = zext i1 %v7 to i32 + store i32 %val4, i32* @var + ret void +} + +declare swifttailcc { i1, i1, i1, i1 } @produce_i1_ret() + +define swifttailcc void @foo(i64* sret(i64) %agg.result, i64 %val) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, (%rax) +; CHECK-NEXT: retq $8 +; +; CHECK-O0-LABEL: foo: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movq %rdi, (%rax) +; CHECK-O0-NEXT: retq $8 + store i64 %val, i64* %agg.result + ret void +} + +define swifttailcc double @test5() #0 { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq gen5@PLT +; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: addsd %xmm2, %xmm0 +; CHECK-NEXT: addsd %xmm3, %xmm0 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq $8 +; +; CHECK-O0-LABEL: test5: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: callq gen5@PLT +; CHECK-O0-NEXT: subq $8, %rsp +; CHECK-O0-NEXT: addsd %xmm1, %xmm0 +; CHECK-O0-NEXT: addsd %xmm2, %xmm0 +; CHECK-O0-NEXT: addsd %xmm3, %xmm0 +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 +; CHECK-O0-NEXT: retq $8 +entry: + %call = call swifttailcc { double, double, double, double } @gen5() + + %v3 = extractvalue { double, double, double, double } %call, 0 + %v5 = extractvalue { double, double, double, double } %call, 1 + %v6 = extractvalue { double, double, double, double } %call, 2 + %v7 = extractvalue { double, double, double, double } %call, 3 + + %add = fadd double %v3, %v5 + %add1 = fadd double %add, %v6 + %add2 = fadd double %add1, %v7 + ret double %add2 +} + +declare swifttailcc { double, double, double, double } @gen5() + + +define swifttailcc { double, i64 } @test6() #0 { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq gen6@PLT +; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: addsd %xmm2, %xmm0 +; CHECK-NEXT: addsd %xmm3, %xmm0 +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq $8 +; +; CHECK-O0-LABEL: test6: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: callq gen6@PLT +; CHECK-O0-NEXT: subq $8, %rsp +; CHECK-O0-NEXT: addsd %xmm1, %xmm0 +; CHECK-O0-NEXT: addsd %xmm2, %xmm0 +; CHECK-O0-NEXT: addsd %xmm3, %xmm0 +; CHECK-O0-NEXT: addq %rdx, %rax +; CHECK-O0-NEXT: addq %rcx, %rax +; CHECK-O0-NEXT: addq %r8, %rax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 +; CHECK-O0-NEXT: retq $8 +entry: + %call = call swifttailcc { double, double, double, double, i64, i64, i64, i64 } @gen6() + + %v3 = extractvalue { double, double, double, double, i64, i64, i64, i64 } %call, 0 + %v5 = extractvalue { double, double, double, double, i64, i64, i64, i64 } %call, 1 + %v6 = extractvalue { double, double, double, double, i64, i64, i64, i64 } %call, 2 + %v7 = extractvalue { double, double, double, double, i64, i64, i64, i64 } %call, 3 + %v3.i = extractvalue { double, double, double, double, i64, i64, i64, i64 } %call, 4 + %v5.i = extractvalue { double, double, double, double, i64, i64, i64, i64 } %call, 5 + %v6.i = extractvalue { double, double, double, double, i64, i64, i64, i64 } %call, 6 + %v7.i = extractvalue { double, double, double, double, i64, i64, i64, i64 } %call, 7 + + %add = fadd double %v3, %v5 + %add1 = fadd double %add, %v6 + %add2 = fadd double %add1, %v7 + + %add.i = add nsw i64 %v3.i, %v5.i + %add1.i = add nsw i64 %add.i, %v6.i + %add2.i = add nsw i64 %add1.i, %v7.i + + %Y = insertvalue { double, i64 } undef, double %add2, 0 + %Z = insertvalue { double, i64 } %Y, i64 %add2.i, 1 + ret { double, i64} %Z +} + +declare swifttailcc { double, double, double, double, i64, i64, i64, i64 } @gen6() + +define swifttailcc { i32, i32, i32, i32 } @gen7(i32 %key) { +; CHECK-LABEL: gen7: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: movl %edi, %r8d +; CHECK-NEXT: retq $8 +; +; CHECK-O0-LABEL: gen7: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movl %edi, %r8d +; CHECK-O0-NEXT: movl %r8d, %eax +; CHECK-O0-NEXT: movl %r8d, %edx +; CHECK-O0-NEXT: movl %r8d, %ecx +; CHECK-O0-NEXT: retq $8 + %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0 + %v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1 + %v2 = insertvalue { i32, i32, i32, i32 } %v1, i32 %key, 2 + %v3 = insertvalue { i32, i32, i32, i32 } %v2, i32 %key, 3 + ret { i32, i32, i32, i32 } %v3 +} + +define swifttailcc { i64, i64, i64, i64 } @gen8(i64 %key) { +; CHECK-LABEL: gen8: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: movq %rdi, %r8 +; CHECK-NEXT: retq $8 +; +; CHECK-O0-LABEL: gen8: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movq %rdi, %r8 +; CHECK-O0-NEXT: movq %r8, %rax +; CHECK-O0-NEXT: movq %r8, %rdx +; CHECK-O0-NEXT: movq %r8, %rcx +; CHECK-O0-NEXT: retq $8 + %v0 = insertvalue { i64, i64, i64, i64 } undef, i64 %key, 0 + %v1 = insertvalue { i64, i64, i64, i64 } %v0, i64 %key, 1 + %v2 = insertvalue { i64, i64, i64, i64 } %v1, i64 %key, 2 + %v3 = insertvalue { i64, i64, i64, i64 } %v2, i64 %key, 3 + ret { i64, i64, i64, i64 } %v3 +} + +define swifttailcc { i8, i8, i8, i8 } @gen9(i8 %key) { +; CHECK-LABEL: gen9: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %eax, %r8d +; CHECK-NEXT: retq $8 +; +; CHECK-O0-LABEL: gen9: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movb %dil, %r8b +; CHECK-O0-NEXT: movb %r8b, %al +; CHECK-O0-NEXT: movb %r8b, %dl +; CHECK-O0-NEXT: movb %r8b, %cl +; CHECK-O0-NEXT: retq $8 + %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 + %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1 + %v2 = insertvalue { i8, i8, i8, i8 } %v1, i8 %key, 2 + %v3 = insertvalue { i8, i8, i8, i8 } %v2, i8 %key, 3 + ret { i8, i8, i8, i8 } %v3 +} +define swifttailcc { double, double, double, double, i64, i64, i64, i64 } @gen10(double %keyd, i64 %keyi) { +; CHECK-LABEL: gen10: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm0, %xmm2 +; CHECK-NEXT: movaps %xmm0, %xmm3 +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: movq %rdi, %r8 +; CHECK-NEXT: retq $8 +; +; CHECK-O0-LABEL: gen10: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movq %rdi, %r8 +; CHECK-O0-NEXT: movaps %xmm0, %xmm3 +; CHECK-O0-NEXT: movaps %xmm3, %xmm0 +; CHECK-O0-NEXT: movaps %xmm3, %xmm1 +; CHECK-O0-NEXT: movaps %xmm3, %xmm2 +; CHECK-O0-NEXT: movq %r8, %rax +; CHECK-O0-NEXT: movq %r8, %rdx +; CHECK-O0-NEXT: movq %r8, %rcx +; CHECK-O0-NEXT: retq $8 + %v0 = insertvalue { double, double, double, double, i64, i64, i64, i64 } undef, double %keyd, 0 + %v1 = insertvalue { double, double, double, double, i64, i64, i64, i64 } %v0, double %keyd, 1 + %v2 = insertvalue { double, double, double, double, i64, i64, i64, i64 } %v1, double %keyd, 2 + %v3 = insertvalue { double, double, double, double, i64, i64, i64, i64 } %v2, double %keyd, 3 + %v4 = insertvalue { double, double, double, double, i64, i64, i64, i64 } %v3, i64 %keyi, 4 + %v5 = insertvalue { double, double, double, double, i64, i64, i64, i64 } %v4, i64 %keyi, 5 + %v6 = insertvalue { double, double, double, double, i64, i64, i64, i64 } %v5, i64 %keyi, 6 + %v7 = insertvalue { double, double, double, double, i64, i64, i64, i64 } %v6, i64 %keyi, 7 + ret { double, double, double, double, i64, i64, i64, i64 } %v7 +} + + +define swifttailcc <4 x float> @test11() #0 { +; CHECK-LABEL: test11: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq gen11@PLT +; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: addps %xmm2, %xmm0 +; CHECK-NEXT: addps %xmm3, %xmm0 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq $8 +; +; CHECK-O0-LABEL: test11: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: callq gen11@PLT +; CHECK-O0-NEXT: subq $8, %rsp +; CHECK-O0-NEXT: addps %xmm1, %xmm0 +; CHECK-O0-NEXT: addps %xmm2, %xmm0 +; CHECK-O0-NEXT: addps %xmm3, %xmm0 +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 +; CHECK-O0-NEXT: retq $8 +entry: + %call = call swifttailcc { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @gen11() + + %v3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 0 + %v5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 1 + %v6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 2 + %v7 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 3 + + %add = fadd <4 x float> %v3, %v5 + %add1 = fadd <4 x float> %add, %v6 + %add2 = fadd <4 x float> %add1, %v7 + ret <4 x float> %add2 +} + +declare swifttailcc { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @gen11() + +define swifttailcc { <4 x float>, float } @test12() #0 { +; CHECK-LABEL: test12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq gen12@PLT +; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: addps %xmm2, %xmm0 +; CHECK-NEXT: movaps %xmm3, %xmm1 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq $8 +; +; CHECK-O0-LABEL: test12: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: subq $40, %rsp +; CHECK-O0-NEXT: .cfi_def_cfa_offset 48 +; CHECK-O0-NEXT: callq gen12@PLT +; CHECK-O0-NEXT: subq $8, %rsp +; CHECK-O0-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-O0-NEXT: movaps %xmm3, %xmm1 +; CHECK-O0-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload +; CHECK-O0-NEXT: addps %xmm3, %xmm0 +; CHECK-O0-NEXT: addps %xmm2, %xmm0 +; CHECK-O0-NEXT: addq $40, %rsp +; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 +; CHECK-O0-NEXT: retq $8 +entry: + %call = call swifttailcc { <4 x float>, <4 x float>, <4 x float>, float } @gen12() + + %v3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 0 + %v5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 1 + %v6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 2 + %v8 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 3 + + %add = fadd <4 x float> %v3, %v5 + %add1 = fadd <4 x float> %add, %v6 + %res.0 = insertvalue { <4 x float>, float } undef, <4 x float> %add1, 0 + %res = insertvalue { <4 x float>, float } %res.0, float %v8, 1 + ret { <4 x float>, float } %res +} + +declare swifttailcc { <4 x float>, <4 x float>, <4 x float>, float } @gen12() Index: llvm/test/CodeGen/X86/tailcall-swifttailcc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/tailcall-swifttailcc.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s -check-prefix=X32 + +; With -tailcallopt, CodeGen guarantees a tail call optimization +; for all of these. + +declare dso_local swifttailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) + +define dso_local swifttailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind { +; X64-LABEL: tailcaller: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: movl %edi, %edx +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: popq %rax +; X64-NEXT: jmp tailcallee # TAILCALL +; +; X32-LABEL: tailcaller: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $24, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: addl $16, %esp +; X32-NEXT: jmp tailcallee # TAILCALL +entry: + %tmp11 = tail call swifttailcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2) + ret i32 %tmp11 +} + +declare dso_local swifttailcc i8* @alias_callee() + +define swifttailcc noalias i8* @noalias_caller() nounwind { +; X64-LABEL: noalias_caller: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp alias_callee # TAILCALL +; +; X32-LABEL: noalias_caller: +; X32: # %bb.0: +; X32-NEXT: jmp alias_callee # TAILCALL + %p = tail call swifttailcc i8* @alias_callee() + ret i8* %p +} + +declare dso_local swifttailcc noalias i8* @noalias_callee() + +define dso_local swifttailcc i8* @alias_caller() nounwind { +; X64-LABEL: alias_caller: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp noalias_callee # TAILCALL +; +; X32-LABEL: alias_caller: +; X32: # %bb.0: +; X32-NEXT: jmp noalias_callee # TAILCALL + %p = tail call swifttailcc noalias i8* @noalias_callee() + ret i8* %p +} + +declare dso_local swifttailcc i32 @i32_callee() + +define dso_local swifttailcc i32 @ret_undef() nounwind { +; X64-LABEL: ret_undef: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp i32_callee # TAILCALL +; +; X32-LABEL: ret_undef: +; X32: # %bb.0: +; X32-NEXT: jmp i32_callee # TAILCALL + %p = tail call swifttailcc i32 @i32_callee() + ret i32 undef +} + +declare dso_local swifttailcc void @does_not_return() + +define dso_local swifttailcc i32 @noret() nounwind { +; X64-LABEL: noret: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp does_not_return # TAILCALL +; +; X32-LABEL: noret: +; X32: # %bb.0: +; X32-NEXT: jmp does_not_return # TAILCALL + tail call swifttailcc void @does_not_return() + unreachable +} + +define dso_local swifttailcc void @void_test(i32, i32, i32, i32) { +; X64-LABEL: void_test: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: jmp void_test # TAILCALL +; +; X32-LABEL: void_test: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: subl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 24 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: jmp void_test # TAILCALL + entry: + tail call swifttailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3) + ret void +} + +define dso_local swifttailcc i1 @i1test(i32, i32, i32, i32) { +; X64-LABEL: i1test: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: jmp i1test # TAILCALL +; +; X32-LABEL: i1test: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: subl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 24 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: jmp i1test # TAILCALL + entry: + %4 = tail call swifttailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3) + ret i1 %4 +}