Index: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h @@ -460,6 +460,10 @@ llvm_unreachable("target does not provide no preserved mask"); } + /// Return true if all bits that are set in mask \p mask0 are also set in + /// \p mask1. + bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const; + /// Return all the call-preserved register masks defined for this target. virtual ArrayRef getRegMasks() const = 0; virtual ArrayRef getRegMaskNames() const = 0; Index: llvm/trunk/lib/CodeGen/TargetRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetRegisterInfo.cpp +++ llvm/trunk/lib/CodeGen/TargetRegisterInfo.cpp @@ -388,6 +388,15 @@ return false; } +bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, + const uint32_t *mask1) const { + unsigned N = (getNumRegs()+31) / 32; + for (unsigned I = 0; I < N; ++I) + if ((mask0[I] & mask1[I]) != mask0[I]) + return false; + return true; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2816,13 +2816,6 @@ CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; - // Disable tailcall for CXX_FAST_TLS when callee and caller have different - // calling conventions, given that CXX_FAST_TLS has a bigger CSR set. - if (!CCMatch && - (CallerCC == CallingConv::CXX_FAST_TLS || - CalleeCC == CallingConv::CXX_FAST_TLS)) - return false; - // Byval parameters hand the function a pointer directly into the stack area // we want to reuse during a tail call. Working around this *is* possible (see // X86) but less efficient and uglier in LowerCall. @@ -2882,6 +2875,13 @@ CCAssignFnForCall(CalleeCC, isVarArg), CCAssignFnForCall(CallerCC, isVarArg))) return false; + // The callee has to preserve all registers the caller needs to preserve. + if (!CCMatch) { + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), + TRI->getCallPreservedMask(MF, CalleeCC))) + return false; + } // Nothing more to check if the callee is taking no arguments if (Outs.empty()) Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -2101,14 +2101,6 @@ MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF->getCallingConv(); - bool CCMatch = CallerCC == CalleeCC; - - // Disable tailcall for CXX_FAST_TLS when callee and caller have different - // calling conventions, given that CXX_FAST_TLS has a bigger CSR set. - if (!CCMatch && - (CallerCC == CallingConv::CXX_FAST_TLS || - CalleeCC == CallingConv::CXX_FAST_TLS)) - return false; assert(Subtarget->supportsTailCall()); @@ -2152,6 +2144,13 @@ CCAssignFnForNode(CalleeCC, true, isVarArg), CCAssignFnForNode(CallerCC, true, isVarArg))) return false; + // The callee has to preserve all registers the caller needs to preserve. + if (CalleeCC != CallerCC) { + const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), + TRI->getCallPreservedMask(MF, CalleeCC))) + return false; + } // If Caller's vararg or byval argument has been split between registers and // stack, do not perform tail call, since part of the argument is in caller's Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -3818,13 +3818,6 @@ if (IsCalleeWin64 != IsCallerWin64) return false; - // Disable tailcall for CXX_FAST_TLS when callee and caller have different - // calling conventions, given that CXX_FAST_TLS has a bigger CSR set. - if (!CCMatch && - (CallerCC == CallingConv::CXX_FAST_TLS || - CalleeCC == CallingConv::CXX_FAST_TLS)) - return false; - if (DAG.getTarget().Options.GuaranteedTailCallOpt) { if (canGuaranteeTCO(CalleeCC) && CCMatch) return true; @@ -3888,6 +3881,13 @@ if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, RetCC_X86, RetCC_X86)) return false; + // The callee has to preserve all registers the caller needs to preserve. + if (!CCMatch) { + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), + TRI->getCallPreservedMask(MF, CalleeCC))) + return false; + } unsigned StackArgsSize = 0; Index: llvm/trunk/test/CodeGen/AArch64/tailcall-ccmismatch.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/tailcall-ccmismatch.ll +++ llvm/trunk/test/CodeGen/AArch64/tailcall-ccmismatch.ll @@ -0,0 +1,24 @@ +; RUN: llc -o - %s | FileCheck %s +target triple="aarch64--" + +declare void @somefunc() +define preserve_mostcc void @test_ccmismatch_notail() { +; Ensure that no tail call is used here, as the called function somefunc does +; not preserve enough registers for preserve_mostcc. +; CHECK-LABEL: test_ccmismatch_notail: +; CHECK-NOT: b somefunc +; CHECK: bl somefunc + tail call void @somefunc() + ret void +} + +declare preserve_mostcc void @some_preserve_most_func() +define void @test_ccmismatch_tail() { +; We can perform a tail call here, because some_preserve_most_func preserves +; all registers necessary for test_ccmismatch_tail. +; CHECK-LABEL: test_ccmismatch_tail: +; CHECK-NOT: bl some_preserve_most_func +; CHECK: b some_preserve_most_func + tail call preserve_mostcc void @some_preserve_most_func() + ret void +} Index: llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll +++ llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll @@ -126,5 +126,27 @@ ret void } +declare void @somefunc() +define cxx_fast_tlscc void @test_ccmismatch_notail() { +; A tail call is not possible here because somefunc does not preserve enough +; registers. +; CHECK-LABEL: test_ccmismatch_notail: +; CHECK-NOT: b _somefunc +; CHECK: bl _somefunc + tail call void @somefunc() + ret void +} + +declare cxx_fast_tlscc void @some_fast_tls_func() +define void @test_ccmismatch_tail() { +; We can perform a tail call here because some_fast_tls_func preserves all +; necessary registers (and more). +; CHECK-LABEL: test_ccmismatch_tail: +; CHECK-NOT: bl _some_fast_tls_func +; CHECK: b _some_fast_tls_func + tail call cxx_fast_tlscc void @some_fast_tls_func() + ret void +} + attributes #0 = { nounwind "no-frame-pointer-elim"="true" } attributes #1 = { nounwind }