Index: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1541,13 +1541,14 @@
     // 3) A function does not use the TOC pointer R2 but does have calls.
     //    In this case st_other=1 since we do not know whether or not any
     //    of the callees clobber R2. This case is dealt with in this else if
-    //    block.
+    //    block. Tail calls are considered calls and the st_other should also
+    //    be set to 1 in that case as well.
     // 4) The function does not use the TOC pointer but R2 is used inside
     //    the function. In this case st_other=1 once again.
     // 5) This function uses inline asm. We mark R2 as reserved if the function
     //    has inline asm so we have to assume that it may be used.
-    if (MF->getFrameInfo().hasCalls() || MF->hasInlineAsm() ||
-        (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
+    if (MF->getFrameInfo().hasCalls() || MF->getFrameInfo().hasTailCall() ||
+        MF->hasInlineAsm() || (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
       PPCTargetStreamer *TS =
           static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
       if (TS)
Index: llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1692,8 +1692,14 @@
   if (RetOpcode == PPC::TCRETURNdi) {
     MBBI = MBB.getLastNonDebugInstr();
     MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
-      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+    if (JumpTarget.isGlobal())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+    else if (JumpTarget.isSymbol())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+        addExternalSymbol(JumpTarget.getSymbolName());
+    else
+      llvm_unreachable("Expecting Global or External Symbol");
   } else if (RetOpcode == PPC::TCRETURNri) {
     MBBI = MBB.getLastNonDebugInstr();
     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
@@ -1705,8 +1711,14 @@
   } else if (RetOpcode == PPC::TCRETURNdi8) {
     MBBI = MBB.getLastNonDebugInstr();
     MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
-      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+    if (JumpTarget.isGlobal())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+    else if (JumpTarget.isSymbol())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+        addExternalSymbol(JumpTarget.getSymbolName());
+    else
+      llvm_unreachable("Expecting Global or External Symbol");
   } else if (RetOpcode == PPC::TCRETURNri8) {
     MBBI = MBB.getLastNonDebugInstr();
     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4685,6 +4685,12 @@
 
 static bool
 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
+  // If the call site does not have a valid instruction pointer we don't have
+  // enough information to determine if we have the same argument list.
+  // We return false just to be safe.
+  if (!CS.getInstruction())
+    return false;
+
   if (CS.arg_size() != CallerFn->arg_size())
     return false;
 
@@ -4742,16 +4748,6 @@
                                     SelectionDAG& DAG) const {
   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
 
-  // FIXME: Tail calls are currently disabled when using PC Relative addressing.
-  // The issue is that PC Relative is only partially implemented and so there
-  // is currently a mix of functions that require the TOC and functions that do
-  // not require it. If we have A calls B calls C and both A and B require the
-  // TOC and C does not and is marked as clobbering R2 then it is not safe for
-  // B to tail call C. Since we do not have the information of whether or not
-  // a funciton needs to use the TOC here in this function we need to be
-  // conservatively safe and disable all tail calls for now.
-  if (Subtarget.isUsingPCRelativeCalls()) return false;
-
   if (DisableSCO && !TailCallOpt) return false;
 
   // Variadic argument functions are not supported.
@@ -4791,16 +4787,20 @@
       needStackSlotPassParameters(Subtarget, Outs))
     return false;
 
-  // No TCO/SCO on indirect call because Caller have to restore its TOC
-  if (!isFunctionGlobalAddress(Callee) &&
-      !isa<ExternalSymbolSDNode>(Callee))
-    return false;
-
-  // If the caller and callee potentially have different TOC bases then we
-  // cannot tail call since we need to restore the TOC pointer after the call.
-  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
-  if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
-    return false;
+  // With a full implementation of PCRelative Calls callers no longer use the
+  // TOC and so no longer need to restore the TOC pointer. In this case it
+  // does not really matter if the callee is an indirect call or if it shares
+  // the TOC base with the caller.
+  if (!Subtarget.isUsingPCRelativeCalls()) {
+    // No TCO/SCO on indirect call because Caller have to restore its TOC
+    if (!isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
+      return false;
+    // If the caller and callee potentially have different TOC bases then we
+    // cannot tail call since we need to restore the TOC pointer after the call.
+    // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
+    if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
+      return false;
+  }
 
   // TCO allows altering callee ABI, so we don't have to check further.
   if (CalleeCC == CallingConv::Fast && TailCallOpt)
@@ -5506,15 +5506,19 @@
 
   // Emit tail call.
   if (CFlags.IsTailCall) {
-    assert(((Callee.getOpcode() == ISD::Register &&
-             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
-            Callee.getOpcode() == ISD::TargetExternalSymbol ||
-            Callee.getOpcode() == ISD::TargetGlobalAddress ||
-            isa<ConstantSDNode>(Callee)) &&
-           "Expecting a global address, external symbol, absolute value or "
-           "register");
-    assert(CallOpc == PPCISD::TC_RETURN &&
-           "Unexpected call opcode for a tail call.");
+    // Indirect tail call when using PC Relative calls do not have the same
+    // constraints.
+    if (!CFlags.IsIndirect || !Subtarget.isUsingPCRelativeCalls()) {
+      assert(((Callee.getOpcode() == ISD::Register &&
+               cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
+              Callee.getOpcode() == ISD::TargetExternalSymbol ||
+              Callee.getOpcode() == ISD::TargetGlobalAddress ||
+              isa<ConstantSDNode>(Callee)) &&
+             "Expecting a global address, external symbol, absolute value or "
+             "register");
+      assert(CallOpc == PPCISD::TC_RETURN &&
+             "Unexpected call opcode for a tail call.");
+    }
     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
     return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
   }
@@ -5571,17 +5575,28 @@
       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
         ++NumSiblingCalls;
 
-      assert(isa<GlobalAddressSDNode>(Callee) &&
+      // PC Relative calls no longer guarantee that the callee is a Global
+      // Address Node. The callee could be an indirect tail call in which
+      // case the SDValue for callee could be a load (to load the address
+      // of a function pointer) or it may be a register copy (to move the
+      // address of the callee from a function parameter into a virtual
+      // register). It may also be an ExtrernalSymbolSDNode (ex memcopy).
+      assert((Subtarget.isUsingPCRelativeCalls() ||
+              isa<GlobalAddressSDNode>(Callee)) &&
              "Callee should be an llvm::Function object.");
-      LLVM_DEBUG(
-          const GlobalValue *GV =
-              cast<GlobalAddressSDNode>(Callee)->getGlobal();
-          const unsigned Width =
-              80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
-          dbgs() << "TCO caller: "
-                 << left_justify(DAG.getMachineFunction().getName(), Width)
-                 << ", callee linkage: " << GV->getVisibility() << ", "
-                 << GV->getLinkage() << "\n");
+#ifndef NDEBUG
+      if (isa<GlobalAddressSDNode>(Callee)) {
+        LLVM_DEBUG(
+            const GlobalValue *GV =
+                cast<GlobalAddressSDNode>(Callee)->getGlobal();
+            const unsigned Width =
+                80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
+            dbgs() << "TCO caller: "
+                   << left_justify(DAG.getMachineFunction().getName(), Width)
+                   << ", callee linkage: " << GV->getVisibility() << ", "
+                   << GV->getLinkage() << "\n");
+      }
+#endif // NDEBUG
     }
   }
 
Index: llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -86,14 +86,23 @@
     RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL;
 
   const MachineInstr *MI = MO.getParent();
-
-  if (MI->getOpcode() == PPC::BL8_NOTOC)
-    RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
-
   const MachineFunction *MF = MI->getMF();
   const Module *M = MF->getFunction().getParent();
   const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
   const TargetMachine &TM = Printer.TM;
+
+  unsigned MIOpcode = MI->getOpcode();
+  if (Subtarget->isUsingPCRelativeCalls()) {
+    if (MIOpcode == PPC::TAILB || MIOpcode == PPC::TAILB8 ||
+        MIOpcode == PPC::TCRETURNdi || MIOpcode == PPC::TCRETURNdi8 ||
+        MIOpcode == PPC::BL8_NOTOC) {
+      RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
+    }
+  } else {
+    assert(MIOpcode != PPC::BL8_NOTOC &&
+           "BL8_NOTOC is only valid when using PC Relative Calls.");
+  }
+
   const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
   // If -msecure-plt -fPIC, add 32768 to symbol.
   if (Subtarget->isSecurePlt() && TM.isPositionIndependent() &&
Index: llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
@@ -215,20 +215,13 @@
 
 define dso_local void @ReadFuncPtr() local_unnamed_addr  {
 ; CHECK-LABEL: ReadFuncPtr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
-; CHECK-NEXT:    stdu r1, -32(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK:         .localentry ReadFuncPtr, 1
+; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, ptrfunc@got@pcrel(0), 1
-; CHECK-NEXT:    ld r3, 0(r3)
-; CHECK-NEXT:    mtctr r3
-; CHECK-NEXT:    bctrl
-; CHECK-NEXT:    addi r1, r1, 32
-; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    mtlr r0
-; CHECK-NEXT:    blr
+; CHECK-NEXT:    ld r12, 0(r3)
+; CHECK-NEXT:    mtctr r12
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
 entry:
   %0 = load void ()*, void ()** bitcast (void (...)** @ptrfunc to void ()**), align 8
   tail call void %0()
Index: llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
@@ -0,0 +1,237 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -enable-ppc-quad-precision -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+
+; The tests checks the behaviour of PC Relative tail calls. When using
+; PC Relative we are able to do more tail calling than we have done in
+; the past as we no longer need to restore the TOC pointer into R2 after
+; most calls.
+
+@Func = external local_unnamed_addr global i32 (...)*, align 8
+@FuncLocal = common dso_local local_unnamed_addr global i32 (...)* null, align 8
+
+; No calls in this function but we assign the function pointers.
+define dso_local void @AssignFuncPtr() local_unnamed_addr {
+; CHECK-LABEL: AssignFuncPtr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, Func@got@pcrel(0), 1
+; CHECK-NEXT:    pld r4, Function@got@pcrel(0), 1
+; CHECK-NEXT:    std r4, 0(r3)
+; CHECK-NEXT:    pstd r4, FuncLocal@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store i32 (...)* @Function, i32 (...)** @Func, align 8
+  store i32 (...)* @Function, i32 (...)** @FuncLocal, align 8
+  ret void
+}
+
+declare signext i32 @Function(...)
+
+define dso_local void @TailCallLocalFuncPtr() local_unnamed_addr {
+; CHECK-LABEL: TailCallLocalFuncPtr:
+; CHECK:         .localentry TailCallLocalFuncPtr, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    pld r12, FuncLocal@PCREL(0), 1
+; CHECK-NEXT:    mtctr r12
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
+entry:
+  %0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @FuncLocal to i32 ()**), align 8
+  %call = tail call signext i32 %0()
+  ret void
+}
+
+define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr {
+; CHECK-LABEL: TailCallExtrnFuncPtr:
+; CHECK:         .localentry TailCallExtrnFuncPtr, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, Func@got@pcrel(0), 1
+; CHECK-NEXT:    ld r12, 0(r3)
+; CHECK-NEXT:    mtctr r12
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
+entry:
+  %0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @Func to i32 ()**), align 8
+  %call = tail call signext i32 %0()
+  ret void
+}
+
+define dso_local signext i32 @TailCallParamFuncPtr(i32 (...)* nocapture %passedfunc) local_unnamed_addr {
+; CHECK-LABEL: TailCallParamFuncPtr:
+; CHECK:         .localentry TailCallParamFuncPtr, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    mr r12, r3
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
+entry:
+  %callee.knr.cast = bitcast i32 (...)* %passedfunc to i32 ()*
+  %call = tail call signext i32 %callee.knr.cast()
+  ret i32 %call
+}
+
+define dso_local signext i32 @NoTailIndirectCall(i32 (...)* nocapture %passedfunc, i32 signext %a) local_unnamed_addr {
+; CHECK-LABEL: NoTailIndirectCall:
+; CHECK:         .localentry NoTailIndirectCall, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    mr r12, r3
+; CHECK-NEXT:    mr r30, r4
+; CHECK-NEXT:    bctrl
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %callee.knr.cast = bitcast i32 (...)* %passedfunc to i32 ()*
+  %call = tail call signext i32 %callee.knr.cast()
+  %add = add nsw i32 %call, %a
+  ret i32 %add
+}
+
+define dso_local signext i32 @TailCallDirect() local_unnamed_addr {
+; CHECK-LABEL: TailCallDirect:
+; CHECK:         .localentry TailCallDirect, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    b Function@notoc
+; CHECK-NEXT:    #TC_RETURNd8 Function@notoc 0
+entry:
+  %call = tail call signext i32 bitcast (i32 (...)* @Function to i32 ()*)()
+  ret i32 %call
+}
+
+define dso_local signext i32 @NoTailCallDirect(i32 signext %a) local_unnamed_addr {
+; CHECK-LABEL: NoTailCallDirect:
+; CHECK:         .localentry NoTailCallDirect, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    bl Function@notoc
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %call = tail call signext i32 bitcast (i32 (...)* @Function to i32 ()*)()
+  %add = add nsw i32 %call, %a
+  ret i32 %add
+}
+
+define dso_local signext i32 @TailCallDirectLocal() local_unnamed_addr {
+; CHECK-LABEL: TailCallDirectLocal:
+; CHECK:         .localentry TailCallDirectLocal, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    b LocalFunction@notoc
+; CHECK-NEXT:    #TC_RETURNd8 LocalFunction@notoc 0
+entry:
+  %call = tail call fastcc signext i32 @LocalFunction()
+  ret i32 %call
+}
+
+define dso_local signext i32 @NoTailCallDirectLocal(i32 signext %a) local_unnamed_addr {
+; CHECK-LABEL: NoTailCallDirectLocal:
+; CHECK:         .localentry NoTailCallDirectLocal, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    bl LocalFunction@notoc
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %call = tail call fastcc signext i32 @LocalFunction()
+  %add = add nsw i32 %call, %a
+  ret i32 %add
+}
+
+define dso_local signext i32 @TailCallAbs() local_unnamed_addr {
+; CHECK-LABEL: TailCallAbs:
+; CHECK:         .localentry TailCallAbs, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    li r3, 400
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    li r12, 400
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
+entry:
+  %call = tail call signext i32 inttoptr (i64 400 to i32 ()*)()
+  ret i32 %call
+}
+
+define dso_local signext i32 @NoTailCallAbs(i32 signext %a) local_unnamed_addr {
+; CHECK-LABEL: NoTailCallAbs:
+; CHECK:         .localentry NoTailCallAbs, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    li r3, 400
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    li r12, 400
+; CHECK-NEXT:    bctrl
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %call = tail call signext i32 inttoptr (i64 400 to i32 ()*)()
+  %add = add nsw i32 %call, %a
+  ret i32 %add
+}
+
+; Function Attrs: noinline
+; This function should be tail called and not inlined.
+define internal fastcc signext i32 @LocalFunction() unnamed_addr #0 {
+; CHECK-LABEL: LocalFunction:
+; CHECK:         .localentry LocalFunction, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    li r3, 42
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 asm "li $0, 42", "=&r"()
+  ret i32 %0
+}
+
+attributes #0 = { noinline }
+