diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1188,6 +1188,11 @@ CCAssignFn *ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const; + // Return whether the call instruction can potentially be optimized to a + // tail call. This will cause the optimizers to attempt to move, or + // duplicate return instructions to help enable tail call optimizations. + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + private: struct ReuseLoadInfo { SDValue Ptr; @@ -1464,10 +1469,6 @@ /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1. SDValue lowerToXXSPLTI32DX(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; - // Return whether the call instruction can potentially be optimized to a - // tail call. This will cause the optimizers to attempt to move, or - // duplicate return instructions to help enable tail call optimizations. - bool mayBeEmittedAsTailCall(const CallInst *CI) const override; bool hasBitPreservingFPLogic(EVT VT) const override; bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1117,17 +1117,36 @@ } bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const { + // Can't tail call when always use indirect calls. + if (ST->useLongCalls()) + return false; + // Subtargets using PC-Relative addressing supported. if (ST->isUsingPCRelativeCalls()) return true; + auto *C = dyn_cast(CB); + if (!C) + return false; + + if (ST->is64BitELFABI()) + return TLI->mayBeEmittedAsTailCall(C); + + // Below checks mimic IsEligibleForTailCallOptimization + if (!ST->getTargetMachine().Options.GuaranteedTailCallOpt) + return false; + const Function *Callee = CB->getCalledFunction(); // Indirect calls and variadic argument functions not supported. if (!Callee || Callee->isVarArg()) return false; - const Function *Caller = CB->getCaller(); - // Support if we can share TOC base. - return ST->getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), - Callee); + // shouldBeMustTail already confirmed: CallerCC == CalleeCC, and that CB does + // not have any ABI-impacting function attributes including ByVal. Then + // non-PIC/GOT tail calls are supported. + if (CB->getCallingConv() == CallingConv::Fast && + ST->getTargetMachine().getRelocationModel() != Reloc::PIC_) + return true; + + return false; } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail-ppc64le.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail-ppc64le.ll --- a/llvm/test/Transforms/Coroutines/coro-split-musttail-ppc64le.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail-ppc64le.ll @@ -3,7 +3,15 @@ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 | FileCheck %s ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 --code-model=medium \ -; RUN: | FileCheck %s --check-prefix=CHECK-PCREL +; RUN: | FileCheck %s --check-prefix=CHECK-MUSTTAIL +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 --code-model=medium -mattr=+longcall \ +; RUN: | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S \ +; RUN: -mtriple=powerpc-- | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S \ +; RUN: -mtriple=ppc32-- | FileCheck %s +; REQUIRES: asserts define void @f() #0 { entry: @@ -50,11 +58,11 @@ ; CHECK-NEXT: call fastcc void %[[pv2]](i8* null) ; Verify that ppc target using PC-Relative addressing in the resume part resume call is marked with musttail. -; CHECK-PCREL-LABEL: @f.resume( -; CHECK-PCREL: %[[addr2:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0) -; CHECK-PCREL-NEXT: %[[pv2:.+]] = bitcast i8* %[[addr2]] to void (i8*)* -; CHECK-PCREL-NEXT: musttail call fastcc void %[[pv2]](i8* null) -; CHECK-PCREL-NEXT: ret void +; CHECK-MUSTTAIL-LABEL: @f.resume( +; CHECK-MUSTTAIL: %[[addr2:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0) +; CHECK-MUSTTAIL-NEXT: %[[pv2:.+]] = bitcast i8* %[[addr2]] to void (i8*)* +; CHECK-MUSTTAIL-NEXT: musttail call fastcc void %[[pv2]](i8* null) +; CHECK-MUSTTAIL-NEXT: ret void declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 declare i1 @llvm.coro.alloc(token) #2