Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -1477,16 +1477,11 @@ /// Check whether the call is eligible for tail call optimization. Targets /// that want to do tail call optimization should implement this function. - bool IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool isVarArg, - bool isCalleeStructRet, - bool isCallerStructRet, - Type *RetTy, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SelectionDAG& DAG) const; + bool IsEligibleForTailCallOptimization( + SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet, + bool isVarArg, Type *RetTy, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, SelectionDAG &DAG) const; SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4144,10 +4144,9 @@ if (isTailCall && !IsMustTail) { // Check if it's really possible to do a tail call. - isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, SR != NotStructReturn, - MF.getFunction().hasStructRetAttr(), CLI.RetTy, - Outs, OutVals, Ins, DAG); + isTailCall = IsEligibleForTailCallOptimization( + Callee, CallConv, SR == StackStructReturn, isVarArg, CLI.RetTy, Outs, + OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. @@ -4822,9 +4821,8 @@ /// Check whether the call is eligible for tail call optimization. Targets /// that want to do tail call optimization should implement this function. bool X86TargetLowering::IsEligibleForTailCallOptimization( - SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy, - const SmallVectorImpl &Outs, + SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet, + bool isVarArg, Type *RetTy, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG &DAG) const { if (!mayTailCallThisCC(CalleeCC)) @@ -4868,9 +4866,17 @@ if (RegInfo->hasStackRealignment(MF)) return false; - // Also avoid sibcall optimization if either caller or callee uses struct - // return semantics. - if (isCalleeStructRet || isCallerStructRet) + // Also avoid sibcall optimization if we're an sret return fn and the callee + // is incompatible. See comment in LowerReturn about why hasStructRetAttr is + // insufficient. + if (MF.getInfo()->getSRetReturnReg()) { + // For a compatible tail call the callee must return our sret pointer. So it + // needs to be (a) an sret function itself and (b) we pass our sret as its + // sret. Condition #b is harder to determine. + return false; + } else if (Subtarget.is32Bit() && IsCalleeStackStructRet) + // In the i686 ABI, the sret pointer is callee-pop, so we cannot tail-call, + // as our caller doesn't expect that. return false; // Do not sibcall optimize vararg calls unless all arguments are passed via Index: llvm/test/CodeGen/X86/sibcall.ll =================================================================== --- llvm/test/CodeGen/X86/sibcall.ll +++ llvm/test/CodeGen/X86/sibcall.ll @@ -657,47 +657,6 @@ ret void } -define fastcc void @t21_sret_to_sret_alloca(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind { -; X86-LABEL: t21_sret_to_sret_alloca: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx -; X86-NEXT: calll t21_f_sret -; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp -; X86-NEXT: popl %esi -; X86-NEXT: retl -; -; X64-LABEL: t21_sret_to_sret_alloca: -; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: subq $16, %rsp -; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: movq %rsp, %rdi -; X64-NEXT: callq t21_f_sret -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: addq $16, %rsp -; X64-NEXT: popq %rbx -; X64-NEXT: retq -; -; X32-LABEL: t21_sret_to_sret_alloca: -; X32: # %bb.0: -; X32-NEXT: pushq %rbx -; X32-NEXT: subl $16, %esp -; X32-NEXT: movq %rdi, %rbx -; X32-NEXT: movl %esp, %edi -; X32-NEXT: callq t21_f_sret -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: addl $16, %esp -; X32-NEXT: popq %rbx -; X32-NEXT: retq - %a = alloca %struct.foo, align 8 - tail call fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo) %a) nounwind - ret void -} - define fastcc void @t21_sret_to_sret_more_args(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind { ; X86-LABEL: t21_sret_to_sret_more_args: ; X86: # %bb.0: @@ -1016,8 +975,8 @@ ret void } - define ccc void @t22_non_sret_to_sret(%struct.foo* %agg.result) nounwind { +; i686 not tailcallable, as sret is callee-pop here. ; X86-LABEL: t22_non_sret_to_sret: ; X86: # %bb.0: ; X86-NEXT: subl $12, %esp @@ -1029,17 +988,11 @@ ; ; X64-LABEL: t22_non_sret_to_sret: ; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: callq t22_f_sret@PLT -; X64-NEXT: popq %rax -; X64-NEXT: retq +; X64-NEXT: jmp t22_f_sret@PLT # TAILCALL ; ; X32-LABEL: t22_non_sret_to_sret: ; X32: # %bb.0: -; X32-NEXT: pushq %rax -; X32-NEXT: callq t22_f_sret@PLT -; X32-NEXT: popq %rax -; X32-NEXT: retq +; X32-NEXT: jmp t22_f_sret@PLT # TAILCALL tail call ccc void @t22_f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind ret void }