Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3965,7 +3965,7 @@ MVT PtrTy = getPointerTy(DAG.getDataLayout()); Register Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); - FuncInfo->setSRetReturnReg(Reg); + FuncInfo->setSRetReturn(InVals[I], Reg); SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]); Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); break; @@ -4871,11 +4871,19 @@ // Also avoid sibcall optimization if we're an sret return fn and the callee // is incompatible. See comment in LowerReturn about why hasStructRetAttr is // insufficient. - if (MF.getInfo()->getSRetReturnReg()) { - // For a compatible tail call the callee must return our sret pointer. So it - // needs to be (a) an sret function itself and (b) we pass our sret as its - // sret. Condition #b is harder to determine. - return false; + if (auto SRetParm = + MF.getInfo()->getSRetReturnValue()) { + if (!IsCalleeStackStructRet) + // Callee is not stack struct ret. + return false; + + if (CalleeCC == CallingConv::Swift || CalleeCC == CallingConv::SwiftTail) + // Callee is Swift, which does not have the sret requirement. + return false; + + if (SRetParm != OutVals[0]) + // We're not passing on our incoming sret pointer. + return false; } else if (Subtarget.is32Bit() && IsCalleeStackStructRet) // In the i686 ABI, the sret pointer is callee-pop, so we cannot tail-call, // as our caller doesn't expect that. Index: llvm/lib/Target/X86/X86MachineFunctionInfo.h =================================================================== --- llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" namespace llvm { @@ -63,7 +64,8 @@ /// SRetReturnReg - Some subtargets require that sret lowering includes /// returning the value of the returned struct in a register. This field /// holds the virtual register into which the sret argument is passed. - Register SRetReturnReg; + Register SRetReturnReg = 0; + SDValue SRetReturnValue = SDValue(); /// GlobalBaseReg - keeps track of the virtual register initialized for /// use as the global base register. This is used for PIC in some PIC @@ -158,8 +160,13 @@ int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; } void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;} + const SDValue &getSRetReturnValue() { return SRetReturnValue; } Register getSRetReturnReg() const { return SRetReturnReg; } - void setSRetReturnReg(Register Reg) { SRetReturnReg = Reg; } + + void setSRetReturn(const SDValue N, Register Reg) { + SRetReturnValue = N; + SRetReturnReg = Reg; + } Register getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; } Index: llvm/test/CodeGen/X86/sibcall.ll =================================================================== --- llvm/test/CodeGen/X86/sibcall.ll +++ llvm/test/CodeGen/X86/sibcall.ll @@ -464,21 +464,11 @@ ; ; X64-LABEL: t15: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: callq f -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: popq %rbx -; X64-NEXT: retq +; X64-NEXT: jmp f # TAILCALL ; ; X32-LABEL: t15: ; X32: # %bb.0: -; X32-NEXT: pushq %rbx -; X32-NEXT: movq %rdi, %rbx -; X32-NEXT: callq f -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: popq %rbx -; X32-NEXT: retq +; X32-NEXT: jmp f # TAILCALL tail call fastcc void @f(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind ret void } @@ -627,32 +617,15 @@ define fastcc void @t21_sret_to_sret(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind { ; X86-LABEL: t21_sret_to_sret: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: calll t21_f_sret -; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $8, %esp -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-NEXT: jmp t21_f_sret # TAILCALL ; ; X64-LABEL: t21_sret_to_sret: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: callq t21_f_sret -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: popq %rbx -; X64-NEXT: retq +; X64-NEXT: jmp t21_f_sret # TAILCALL ; ; X32-LABEL: t21_sret_to_sret: ; X32: # %bb.0: -; X32-NEXT: pushq %rbx -; X32-NEXT: movq %rdi, %rbx -; X32-NEXT: callq t21_f_sret -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: popq %rbx -; X32-NEXT: retq +; X32-NEXT: jmp t21_f_sret # TAILCALL tail call fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind ret void } @@ -660,70 +633,35 @@ define fastcc void @t21_sret_to_sret_more_args(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind { ; X86-LABEL: t21_sret_to_sret_more_args: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: calll f_sret@PLT -; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $8, %esp -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-NEXT: jmp f_sret@PLT # TAILCALL ; ; X64-LABEL: t21_sret_to_sret_more_args: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: callq f_sret@PLT -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: popq %rbx -; X64-NEXT: retq +; X64-NEXT: jmp f_sret@PLT # TAILCALL ; ; X32-LABEL: t21_sret_to_sret_more_args: ; X32: # %bb.0: -; X32-NEXT: pushq %rbx -; X32-NEXT: movq %rdi, %rbx -; X32-NEXT: callq f_sret@PLT -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: popq %rbx -; X32-NEXT: retq +; X32-NEXT: jmp f_sret@PLT # TAILCALL tail call fastcc void @f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind ret void } +; this can be tailcalled -- there's only one sret parm, but it is the second one! define fastcc void @t21_sret_to_sret_second_arg_sret(%struct.foo* noalias %agg.result, %struct.foo* noalias sret(%struct.foo) %ret) nounwind { ; X86-LABEL: t21_sret_to_sret_second_arg_sret: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: calll t21_f_sret -; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $8, %esp -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-NEXT: jmp t21_f_sret # TAILCALL ; ; X64-LABEL: t21_sret_to_sret_second_arg_sret: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: movq %rsi, %rbx ; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: callq t21_f_sret -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: popq %rbx -; X64-NEXT: retq +; X64-NEXT: jmp t21_f_sret # TAILCALL ; ; X32-LABEL: t21_sret_to_sret_second_arg_sret: ; X32: # %bb.0: -; X32-NEXT: pushq %rbx -; X32-NEXT: movq %rsi, %rbx ; X32-NEXT: movq %rsi, %rdi -; X32-NEXT: callq t21_f_sret -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: popq %rbx -; X32-NEXT: retq +; X32-NEXT: jmp t21_f_sret # TAILCALL tail call fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo) %ret) nounwind ret void } @@ -745,32 +683,21 @@ ; ; X64-LABEL: t21_sret_to_sret_more_args2: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx ; X64-NEXT: movl %esi, %eax -; X64-NEXT: movq %rdi, %rbx ; X64-NEXT: movl %edx, %esi ; X64-NEXT: movl %eax, %edx -; X64-NEXT: callq f_sret@PLT -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: popq %rbx -; X64-NEXT: retq +; X64-NEXT: jmp f_sret@PLT ; ; X32-LABEL: t21_sret_to_sret_more_args2: ; X32: # %bb.0: -; X32-NEXT: pushq %rbx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movq %rdi, %rbx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edx -; X32-NEXT: callq f_sret@PLT -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: popq %rbx -; X32-NEXT: retq +; X32-NEXT: jmp f_sret@PLT tail call fastcc void @f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %b, i32 %a) nounwind ret void } - define fastcc void @t21_sret_to_sret_args_mismatch(%struct.foo* noalias sret(%struct.foo) %agg.result, %struct.foo* noalias %ret) nounwind { ; X86-LABEL: t21_sret_to_sret_args_mismatch: ; X86: # %bb.0: