diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -17760,6 +17760,7 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::CallLoweringInfo CLI(DAG); EVT RetVT = Op.getValueType(); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); SDValue Callee = DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout())); bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false); @@ -17774,11 +17775,19 @@ Entry.IsZExt = !Entry.IsSExt; Args.push_back(Entry); } + + SDValue InChain = DAG.getEntryNode(); + SDValue TCChain = InChain; + const Function &F = DAG.getMachineFunction().getFunction(); + bool isTailCall = + TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) && + (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy()); + if (isTailCall) + InChain = TCChain; CLI.setDebugLoc(SDLoc(Op)) - .setChain(DAG.getEntryNode()) - .setLibCallee(CallingConv::C, RetVT.getTypeForEVT(*DAG.getContext()), - Callee, std::move(Args)) - .setTailCall(true) + .setChain(InChain) + .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)) + .setTailCall(isTailCall) .setSExtResult(SignExtend) .setZExtResult(!SignExtend) .setIsPostTypeLegalization(true); diff --git a/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll b/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll @@ -0,0 +1,50 @@ +; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s + +define void @cos_f64(double* %arg) { +; CHECK-LABEL: cos_f64 +; CHECK: __xl_cos +bb: + %i = bitcast double* %arg to i8* + %i1 = getelementptr i8, i8* %i, i64 undef + br label %bb2 + +bb2: + %i3 = getelementptr inbounds i8, i8* %i1, i64 undef + %i4 = bitcast i8* %i3 to double* + store double undef, double* %i4, align 8 + %i5 = getelementptr inbounds i8, i8* %i1, i64 0 + %i6 = bitcast i8* %i5 to double* + %i7 = tail call afn double @llvm.sqrt.f64(double undef) + %i8 = fmul afn double undef, 0x401921FB54442D28 + %i9 = tail call afn double @llvm.cos.f64(double %i8) #2 + %i10 = fmul afn double %i7, %i9 + store double %i10, double* %i6, align 8 + br label %bb2 +} + +define void @log_f64(double* %arg) { +; CHECK-LABEL: log_f64 +; CHECK: __xl_log +bb: + %i = bitcast double* %arg to i8* + %i1 = getelementptr i8, i8* %i, i64 undef + br label %bb2 + +bb2: + %i3 = getelementptr inbounds i8, i8* %i1, i64 undef + %i4 = bitcast i8* %i3 to double* + store double undef, double* %i4, align 8 + %i5 = getelementptr inbounds i8, i8* %i1, i64 0 + %i6 = bitcast i8* %i5 to double* + %i7 = tail call afn double @llvm.sqrt.f64(double undef) + %i8 = fmul afn double undef, 0x401921FB54442D28 + %i9 = tail call afn double @llvm.log.f64(double %i8) #2 + %i10 = fmul afn double %i7, %i9 + store double %i10, double* %i6, align 8 + br label %bb2 +} + +declare double @llvm.sqrt.f64(double) +declare double @llvm.cos.f64(double) +declare double @llvm.log.f64(double) diff --git a/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll b/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll @@ -0,0 +1,50 @@ +; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s + +define void @cos_f64(double* %arg) { +; CHECK-LABEL: cos_f64 +; CHECK: __xl_cos_finite +bb: + %i = bitcast double* %arg to i8* + %i1 = getelementptr i8, i8* %i, i64 undef + br label %bb2 + +bb2: + %i3 = getelementptr inbounds i8, i8* %i1, i64 undef + %i4 = bitcast i8* %i3 to double* + store double undef, double* %i4, align 8 + %i5 = getelementptr inbounds i8, i8* %i1, i64 0 + %i6 = bitcast i8* %i5 to double* + %i7 = tail call fast double @llvm.sqrt.f64(double undef) + %i8 = fmul fast double undef, 0x401921FB54442D28 + %i9 = tail call fast double @llvm.cos.f64(double %i8) #2 + %i10 = fmul fast double %i7, %i9 + store double %i10, double* %i6, align 8 + br label %bb2 +} + +define void @log_f64(double* %arg) { +; CHECK-LABEL: log_f64 +; CHECK: __xl_log_finite +bb: + %i = bitcast double* %arg to i8* + %i1 = getelementptr i8, i8* %i, i64 undef + br label %bb2 + +bb2: + %i3 = getelementptr inbounds i8, i8* %i1, i64 undef + %i4 = bitcast i8* %i3 to double* + store double undef, double* %i4, align 8 + %i5 = getelementptr inbounds i8, i8* %i1, i64 0 + %i6 = bitcast i8* %i5 to double* + %i7 = tail call fast double @llvm.sqrt.f64(double undef) + %i8 = fmul fast double undef, 0x401921FB54442D28 + %i9 = tail call fast double @llvm.log.f64(double %i8) #2 + %i10 = fmul fast double %i7, %i9 + store double %i10, double* %i6, align 8 + br label %bb2 +} + +declare double @llvm.sqrt.f64(double) +declare double @llvm.cos.f64(double) +declare double @llvm.log.f64(double)