Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4397,13 +4397,18 @@ static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC) { - // Tail or Sibling call optimization (TCO/SCO) needs callee and caller to - // have the same calling convention. - if (CallerCC != CalleeCC) + // Tail calls are possible with fastcc and ccc. + auto isTailCallableCC = [] (CallingConv::ID CC){ + return CC == CallingConv::C || CC == CallingConv::Fast; + }; + if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC)) return false; - // Tail or Sibling calls can be done with fastcc/ccc. - return (CallerCC == CallingConv::Fast || CallerCC == CallingConv::C); + // We can safely tail call both fastcc and ccc callees from a c calling + // convention caller. If the caller is fastcc, we may have less stack space + // than a non-fastcc caller with the same signature so disable tail-calls in + // that case. + return CallerCC == CallingConv::C || CallerCC == CalleeCC; } bool @@ -4434,10 +4439,28 @@ // Callee contains any byval parameter is not supported, too. // Note: This is a quick work around, because in some cases, e.g. // caller's stack size > callee's stack size, we are still able to apply - // sibling call optimization. See: https://reviews.llvm.org/D23441#513574 + // sibling call optimization. For example, gcc is able to do SCO for caller1 + // in the following example, but not for caller2. + // struct test { + // long int a; + // char ary[56]; + // } gTest; + // __attribute__((noinline)) int callee(struct test v, struct test *b) { + // b->a = v.a; + // return 0; + // } + // void caller1(struct test a, struct test c, struct test *b) { + // callee(gTest, b); } + // void caller2(struct test *b) { callee(gTest, b); } if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) return false; + // If callee and caller use different calling conventions, we cannot pass + // parameters on stack since offsets for the parameter area may be different. + if (Caller.getCallingConv() != CalleeCC && + needStackSlotPassParameters(Subtarget, Outs)) + return false; + // No TCO/SCO on indirect call because Caller have to restore its TOC if (!isFunctionGlobalAddress(Callee) && !isa(Callee)) Index: llvm/trunk/test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll +++ llvm/trunk/test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll @@ -42,10 +42,10 @@ if.then6: ; preds = %if.end4 %call7 = tail call fastcc signext i32 @call3(i32 signext %a, i32 signext %b, i32 signext %c) br label %return -; No duplication here because the calling convention mismatch means we won't tail-call +; tail calling a fastcc function from a ccc function is supported. ; CHECK_LABEL: if.then13: -; CHECK: tail call fastcc signext i32 @call3 -; CHECK-NEXT: br +; CHECK: %[[T2:[a-zA-Z0-9]+]] = tail call fastcc signext i32 @call3 +; CHECK-NEXT: ret i32 %[[T2]] return: ; preds = %if.end4, %if.then6, %if.then2, %if.then %retval.0 = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %c, %if.end4 ] Index: llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall.ll @@ -41,6 +41,15 @@ ; CHECK-SCO: b callee_64_64_copy } +define internal fastcc void @callee_64_64_copy_fastcc([8 x i64] %a, [8 x i64] %b) #0 { ret void } +define void @caller_64_64_copy_ccc([8 x i64] %a, [8 x i64] %b) #1 { + tail call fastcc void @callee_64_64_copy_fastcc([8 x i64] %a, [8 x i64] %b) + ret void +; If caller and callee use different calling convensions, we cannot apply TCO. +; CHECK-SCO-LABEL: caller_64_64_copy_ccc: +; CHECK-SCO: bl callee_64_64_copy_fastcc +} + define void @caller_64_64_reorder_copy([8 x i64] %a, [8 x i64] %b) #1 { tail call void @callee_64_64_copy([8 x i64] %b, [8 x i64] %a) ret void