Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -1085,6 +1085,10 @@ /// essentially v16i8 vector version of VINSERTH. SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; + // Return whether the call instruction can potentially be optimized to a + // tail call. This will cause the optimizers to attempt to move, or + // duplicate,return instructions to help enable tail call optimizations. + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; }; // end class PPCTargetLowering namespace PPC { Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -4387,6 +4387,20 @@ return true; } +// Returns true if TCO is possible between the callers and callees +// calling conventions. +static bool +areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, + CallingConv::ID CalleeCC) { + // Tail or Sibling call optimization (TCO/SCO) needs callee and caller to + // have the same calling convention. + if (CallerCC != CalleeCC) + return false; + + // Tail or Sibling calls can be done with fastcc/ccc. + return (CallerCC == CallingConv::Fast || CallerCC == CallingConv::C); +} + bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( SDValue Callee, @@ -4403,15 +4417,9 @@ // Variadic argument functions are not supported. if (isVarArg) return false; - MachineFunction &MF = DAG.getMachineFunction(); - CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); - - // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has - // the same calling convention - if (CallerCC != CalleeCC) return false; - - // SCO support C calling convention - if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C) + auto *Caller = DAG.getMachineFunction().getFunction(); + // Check that the calling conventions are compatible for tco. + if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(), CalleeCC)) return false; // Caller contains any byval parameter is not supported. @@ -4433,7 +4441,7 @@ // If the caller and callee potentially have different TOC bases then we // cannot tail call since we need to restore the TOC pointer after the call. // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 - if (!callsShareTOCBase(MF.getFunction(), Callee, getTargetMachine())) + if (!callsShareTOCBase(Caller, Callee, getTargetMachine())) return false; // TCO allows altering callee ABI, so we don't have to check further. @@ -4445,7 +4453,7 @@ // If callee use the same argument list that caller is using, then we can // apply SCO on this case. If it is not, then we need to check if callee needs // stack for passing arguments. - if (!hasSameArgumentList(MF.getFunction(), CS) && + if (!hasSameArgumentList(Caller, CS) && needStackSlotPassParameters(Subtarget, Outs)) { return false; } @@ -13773,3 +13781,39 @@ return SDValue(); } + +bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { + // Only duplicate to increase tail-calls for the 64bit SysV ABIs. + if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) + return false; + + // If not a tail call then no need to proceed. + if (!CI->isTailCall()) + return false; + + // If tail calls are disabled for the caller then we are done. + const Function *Caller = CI->getParent()->getParent(); + auto Attr = Caller->getFnAttribute("disable-tail-calls"); + if (Attr.getValueAsString() == "true") + return false; + + // If sibling calls have been disabled and tail-calls aren't guranteed + // there is no reason to duplicate. + auto &TM = getTargetMachine(); + if (!TM.Options.GuaranteedTailCallOpt && DisableSCO) + return false; + + // Can't tail call a function called indirectly, or if it has variadic args. + const Function *CalledFunc = CI->getCalledFunction(); + if (!CalledFunc || CalledFunc->isVarArg()) + return false; + + // Make sure the callee and caller calling conventions are eligible for tco. + if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(), + CI->getCallingConv())) + return false; + + // If the function is local then we have a good chance at tail-calling it + return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), + CalledFunc); +} Index: test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll @@ -0,0 +1,63 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -stop-after codegenprepare -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -stop-after codegenprepare -mtriple=powerpc64-unknown-gnu-linux < %s | FileCheck %s + +; Function Attrs: noinline norecurse nounwind readnone +define hidden signext i32 @call1(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %add, %c + ret i32 %add1 +} + +; Function Attrs: nounwind +define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr #1 { +entry: + %cmp = icmp eq i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %call = tail call signext i32 @call1(i32 signext %a, i32 signext %b, i32 signext %c) + br label %return +; The return should get duplciated here to enable a tail-call opertunity. +; CHECK-LABEL: if.then: +; CHECK-NEXT: %[[T1:[a-zA-Z0-9]+]] = tail call signext i32 @call1 +; CHECK-NEXT: ret i32 %[[T1]] + +if.end: ; preds = %entry + %cmp1 = icmp slt i32 %a, %b + br i1 %cmp1, label %if.then2, label %if.end4 + +if.then2: ; preds = %if.end + %call3 = tail call signext i32 @call2(i32 signext %a, i32 signext %b, i32 signext %c) #3 + br label %return +; No duplication here since we cannot tail-call an external function anyway. +; CHECK-LABEL: if.then2: +; CHECK-NEXT: tail call signext i32 @call2 +; CHECK-NEXT: br + +if.end4: ; preds = %if.end + %cmp5 = icmp sgt i32 %b, %c + br i1 %cmp5, label %if.then6, label %return + +if.then6: ; preds = %if.end4 + %call7 = tail call fastcc signext i32 @call3(i32 signext %a, i32 signext %b, i32 signext %c) + br label %return +; No duplication here because the calling convention mismatch means we won't tail-call +; CHECK_LABEL: if.then13: +; CHECK: tail call fastcc signext i32 @call3 +; CHECK-NEXT: br + +return: ; preds = %if.end4, %if.then6, %if.then2, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %c, %if.end4 ] + ret i32 %retval.0 +} + +declare signext i32 @call2(i32 signext, i32 signext, i32 signext) local_unnamed_addr #2 + +; Function Attrs: noinline norecurse nounwind readnone +define internal fastcc signext i32 @call3(i32 signext %a, i32 signext %b, i32 signext %c) unnamed_addr #0 { +entry: + %mul = mul nsw i32 %b, %a + %mul1 = mul nsw i32 %mul, %c + ret i32 %mul1 +}