Index: clang/test/CodeGenCXX/pr51000.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCXX/pr51000.cpp @@ -0,0 +1,125 @@ +// RUN: %clang -S %s -o - -O2 -Xclang -triple=x86_64-linux | FileCheck %s --check-prefix=X86 --check-prefix=X86_64 +// RUN: %clang -S %s -o - -O2 -Xclang -triple=x86_64-linux-gnux32 | FileCheck %s --check-prefix=X86 --check-prefix=X86_32 +// RUN: %clang -S %s -o - -O2 -Xclang -triple=x86_64-win64 | FileCheck %s --check-prefix=X86 --check-prefix=X86_64 +// RUN: %clang -S %s -o - -O2 -Xclang -triple=x86_64-win32 | FileCheck %s --check-prefix=X86_WIN + +inline void *operator new(decltype(sizeof(0)), void *p) noexcept { + return p; +} +inline void operator delete(void *p, decltype(sizeof(0))) noexcept { +} + +namespace One { // pr 51000 + +struct T { + int x; + T(int) + noexcept; + ~T(); +}; + +T factory(int) noexcept; + +alignas(T) char buffer[sizeof(T)]; + +void placement_new() { + // tailcallable ctor + ::new ((void *)buffer) T(42); +} +// X86-LABEL: _ZN3One13placement_newEv: +// X86: jmp _ZN3One1TC1Ei # TAILCALL +// X86_WIN-LABEL: "?placement_new@One@@YAXXZ": +// X86_WIN: jmp "??0T@One@@QEAA@H@Z" # TAILCALL + +void placement_call() { + // tailcallable factory + ::new ((void *)buffer) T(factory(42)); +} +// X86-LABEL: _ZN3One14placement_callEv: +// X86: jmp _ZN3One7factoryEi # TAILCALL +// X86_WIN-LABEL: "?placement_call@One@@YAXXZ": +// X86_WIN: jmp "?factory@One@@YA?AUT@1@H@Z" # TAILCALL + +} // namespace One + +namespace Two { + +struct A { // return in register + int m; +}; + +A foo(); + +A baz() { + // tailcallable + return foo(); +} +// X86-LABEL: _ZN3Two3bazEv: +// X86: jmp _ZN3Two3fooEv # TAILCALL +// X86_WIN-LABEL: "?baz@Two@@YA?AUA@1@XZ": +// X86_WIN: "?foo@Two@@YA?AUA@1@XZ" # TAILCALL + +void bar() { + // tailcallable + foo(); +} +// X86-LABEL: _ZN3Two3barEv: +// X86: jmp _ZN3Two3fooEv # TAILCALL +// X86_WIN-LABEL: "?bar@Two@@YAXXZ": +// X86_WIN: "?foo@Two@@YA?AUA@1@XZ" # TAILCALL + +} // namespace Two + +namespace Three { + +struct A { // return via pointer + int m[16]; +}; + +A foo(); + +A baz() { + // Although theoretically tailcallable we're unable to trace foo's sret + // argument value is our incoming sret parameter value, and the ABI requires + // Baz to return that pointer value. IMHO that's mis-optimization in the ABI + // as usually the location of the returned object is a stack slot, which is + // easily rematerializable. + // FIXME: See the bug report for more information/ideas + return foo(); +} +// X86-LABEL: _ZN5Three3bazEv: +// X86: pushq %rbx +// X86: movq %rdi, %rbx +// X86: callq _ZN5Three3fooEv +// X86_64: movq %rbx, %rax +// X86_32: movl %ebx, %eax +// X86: popq %rbx +// X86: retq +// X86_WIN-LABEL: "?baz@Three@@YA?AUA@1@XZ": +// X86_WIN: pushq %rsi +// X86_WIN: subq $32, %rsp +// X86_WIN: movq %rcx, %rsi +// X86_WIN: callq "?foo@Three@@YA?AUA@1@XZ" +// X86_WIN: movq %rsi, %rax +// X86_WIN: addq $32, %rsp +// X86_WIN: popq %rsi +// X86_WIN: retq + +void bar() { + // NOT tailcallable + foo(); +} +// X86-LABEL: _ZN5Three3barEv: +// X86_64: subq ${{[0-9]+}}, %rsp +// X86_32: subl ${{[0-9]+}}, %esp +// X86: callq _ZN5Three3fooEv +// X86_64: addq ${{[0-9]+}}, %rsp +// X86_32: addl ${{[0-9]+}}, %esp +// X86: retq +// X86_WIN-LABEL: "?bar@Three@@YAXXZ": +// X86_WIN: subq ${{[0-9]+}}, %rsp +// X86_WIN: callq "?foo@Three@@YA?AUA@1@XZ" +// X86_WIN: addq ${{[0-9]+}}, %rsp +// X86_WIN: retq + +} // namespace Three Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -1480,8 +1480,6 @@ bool IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - bool isCalleeStructRet, - bool isCallerStructRet, Type *RetTy, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4122,10 +4122,8 @@ if (isTailCall && !IsMustTail) { // Check if it's really possible to do a tail call. - isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, SR != NotStructReturn, - MF.getFunction().hasStructRetAttr(), CLI.RetTy, - Outs, OutVals, Ins, DAG); + isTailCall = IsEligibleForTailCallOptimization( + Callee, CallConv, isVarArg, CLI.RetTy, Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. @@ -4800,8 +4798,7 @@ /// Check whether the call is eligible for tail call optimization. Targets /// that want to do tail call optimization should implement this function. bool X86TargetLowering::IsEligibleForTailCallOptimization( - SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy, + SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, Type *RetTy, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG &DAG) const { @@ -4846,10 +4843,16 @@ if (RegInfo->hasStackRealignment(MF)) return false; - // Also avoid sibcall optimization if either caller or callee uses struct - // return semantics. - if (isCalleeStructRet || isCallerStructRet) + // Also avoid sibcall optimization if we're an sret return fn and the callee + // is incompatible. See comment in LowerReturn about why hasStructRetAttr is + // insufficient. + if (MF.getInfo()->getSRetReturnReg()) { + // For a compatible tail call the callee must return our sret pointer. So it + // needs to be (a) an sret function itself and (b) we pass our sret as its + // sret. Condition #b is not easy to determine at this point. + // FIXME: See pr51000 for more information. return false; + } // Do not sibcall optimize vararg calls unless all arguments are passed via // registers. Index: llvm/test/CodeGen/X86/sibcall.ll =================================================================== --- llvm/test/CodeGen/X86/sibcall.ll +++ llvm/test/CodeGen/X86/sibcall.ll @@ -1016,30 +1016,18 @@ ret void } - define ccc void @t22_non_sret_to_sret(%struct.foo* %agg.result) nounwind { ; X86-LABEL: t22_non_sret_to_sret: ; X86: # %bb.0: -; X86-NEXT: subl $12, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: calll t22_f_sret@PLT -; X86-NEXT: addl $8, %esp -; X86-NEXT: retl +; X86-NEXT: jmp t22_f_sret@PLT # TAILCALL ; ; X64-LABEL: t22_non_sret_to_sret: ; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: callq t22_f_sret@PLT -; X64-NEXT: popq %rax -; X64-NEXT: retq +; X64-NEXT: jmp t22_f_sret@PLT # TAILCALL ; ; X32-LABEL: t22_non_sret_to_sret: ; X32: # %bb.0: -; X32-NEXT: pushq %rax -; X32-NEXT: callq t22_f_sret@PLT -; X32-NEXT: popq %rax -; X32-NEXT: retq +; X32-NEXT: jmp t22_f_sret@PLT # TAILCALL tail call ccc void @t22_f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind ret void }