diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -106,6 +106,11 @@
   return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS;
 }
 
+void elf::writePrefixedInstruction(uint8_t *loc, uint64_t insn) {
+  insn = config->isLE ? insn << 32 | insn >> 32 : insn;
+  write64(loc, insn);
+}
+
 static bool addOptional(StringRef name, uint64_t value,
                         std::vector<Defined *> &defined) {
   Symbol *sym = symtab->find(name);
@@ -376,15 +381,6 @@
   return read32(config->isLE ? loc : loc - 2);
 }
 
-// The prefixed instruction is always a 4 byte prefix followed by a 4 byte
-// instruction. Therefore, the prefix is always in lower memory than the
-// instruction (regardless of endianness).
-// As a result, we need to shift the pieces around on little endian machines.
-static void writePrefixedInstruction(uint8_t *loc, uint64_t insn) {
-  insn = config->isLE ? insn << 32 | insn >> 32 : insn;
-  write64(loc, insn);
-}
-
 static uint64_t readPrefixedInstruction(const uint8_t *loc) {
   uint64_t fullInstr = read64(loc);
   return config->isLE ? (fullInstr << 32 | fullInstr >> 32) : fullInstr;
@@ -1048,17 +1044,15 @@
   if (s.isInPlt())
     return true;
 
-  // FIXME: Remove the fatal error once the call protocol is implemented.
-  if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1)
-    fatal("unimplemented feature: local function call with the reltype"
-          " R_PPC64_REL24_NOTOC and the callee needs toc-pointer setup");
-
   // This check looks at the st_other bits of the callee with relocation
   // R_PPC64_REL14 or R_PPC64_REL24. If the value is 1, then the callee
   // clobbers the TOC and we need an R2 save stub.
   if (type != R_PPC64_REL24_NOTOC && (s.stOther >> 5) == 1)
     return true;
 
+  if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1)
+    return true;
+
   // If a symbol is a weak undefined and we are compiling an executable
   // it doesn't need a range-extending thunk since it can't be called.
   if (s.isUndefWeak() && !config->shared)
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -213,6 +213,11 @@
 // the .toc section.
 bool isPPC64SmallCodeModelTocReloc(RelType type);
 
+// Write a prefixed instruction, which is a 4-byte prefix followed by a 4-byte
+// instruction (regardless of endianness). Therefore, the prefix is always in
+// lower memory than the instruction.
+void writePrefixedInstruction(uint8_t *loc, uint64_t insn);
+
 void addPPC64SaveRestore();
 uint64_t getPPC64TocBase();
 uint64_t getAArch64Page(uint64_t expr);
diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -293,6 +293,18 @@
   void addSymbols(ThunkSection &isec) override;
 };
 
+// PPC64 R12 Setup Stub
+// When a caller that does not maintain a toc-pointer performs a local call to
+// a callee which requires a toc-pointer then we need this stub to place the
+// callee's global entry point into r12 without a save of R2.
+class PPC64R12SetupStub final : public Thunk {
+public:
+  PPC64R12SetupStub(Symbol &dest) : Thunk(dest, 0) {}
+  uint32_t size() override { return 16; }
+  void writeTo(uint8_t *buf) override;
+  void addSymbols(ThunkSection &isec) override;
+};
+
 // A bl instruction uses a signed 24 bit offset, with an implicit 4 byte
 // alignment. This gives a possible 26 bits of 'reach'. If the call offset is
 // larger then that we need to emit a long-branch thunk. The target address
@@ -851,6 +863,23 @@
   s->needsTocRestore = true;
 }
 
+void PPC64R12SetupStub::writeTo(uint8_t *buf) {
+  int64_t offset = destination.getVA() - getThunkTargetSym()->getVA();
+  if (!isInt<34>(offset))
+    fatal("offset must fit in 34 bits to encode in the instruction");
+  uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) |
+                   (offset & 0xffff);
+
+  writePrefixedInstruction(buf + 0, paddi); // paddi r12, 0, func@pcrel, 1
+  write32(buf + 8, MTCTR_R12);              // mtctr r12
+  write32(buf + 12, BCTR);                  // bctr
+}
+
+void PPC64R12SetupStub::addSymbols(ThunkSection &isec) {
+  addSymbol(saver.save("__gep_setup_" + destination.getName()), STT_FUNC, 0,
+            isec);
+}
+
 void PPC64LongBranchThunk::writeTo(uint8_t *buf) {
   int64_t offset = in.ppc64LongBranchTarget->getEntryVA(&destination, addend) -
                    getPPC64TocBase();
@@ -974,7 +1003,8 @@
 }
 
 static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
-  assert((type == R_PPC64_REL14 || type == R_PPC64_REL24) &&
+  assert((type == R_PPC64_REL14 || type == R_PPC64_REL24 ||
+          type == R_PPC64_REL24_NOTOC) &&
          "unexpected relocation type for thunk");
   if (s.isInPlt())
     return make<PPC64PltCallStub>(s);
@@ -984,6 +1014,9 @@
   if ((s.stOther >> 5) == 1)
     return make<PPC64R2SaveStub>(s);
 
+  if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1)
+    return make<PPC64R12SetupStub>(s);
+
   if (config->picThunk)
     return make<PPC64PILongBranchThunk>(s, a);
 
diff --git a/lld/test/ELF/ppc64-pcrel-call-to-toc.s b/lld/test/ELF/ppc64-pcrel-call-to-toc.s
new file mode 100644
--- /dev/null
+++ b/lld/test/ELF/ppc64-pcrel-call-to-toc.s
@@ -0,0 +1,67 @@
+# REQUIRES: ppc
+# RUN: echo 'SECTIONS { \
+# RUN:   .text_func   0x10010000 : { *(.text_func) } \
+# RUN:   .text_callee 0x10020000 : { *(.text_callee) } \
+# RUN:   .text_caller 0x10030000 : { *(.text_caller) } \
+# RUN:   }' > %t.script
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o
+# RUN: ld.lld -T %t.script %t.o -o %t
+# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o
+# RUN: ld.lld -T %t.script %t.o -o %t
+# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s
+
+## When a function without TOC accesses a function using TOC, an r12 setup stub
+## is inserted
+
+# SYMBOL:      1: 0000000010020000 0 NOTYPE LOCAL DEFAULT [<other: 0x60>] 2 callee
+# SYMBOL-NEXT: 2: 0000000010030000 0 NOTYPE LOCAL DEFAULT [<other: 0x20>] 3 caller
+# SYMBOL-NEXT: 3: 0000000010010000 0 NOTYPE LOCAL DEFAULT 1 func
+# SYMBOL:      6: 000000001003000c 16 FUNC LOCAL DEFAULT 3 __gep_setup_callee
+
+# CHECK-LABEL: <func>:
+# CHECK-NEXT:  blr
+
+# CHECK-LABEL: <callee>:
+# CHECK:       bl 0x10010000
+# CHECK-NEXT:  addis 4, 2, -1
+# CHECK-NEXT:  lwz 4, 32744(4)
+# CHECK-NEXT:  blr
+
+# CHECK-LABEL: <caller>:
+# CHECK-NEXT:  bl 0x1003000c
+# CHECK-NEXT:  blr
+
+# CHECK-LABEL: <__gep_setup_callee>:
+# CHECK-NEXT:  paddi 12, 0, -65548, 1
+# CHECK-NEXT:  mtctr 12
+# CHECK-NEXT:  bctr
+
+.section .text_func, "ax", %progbits
+func:
+  blr
+
+.section .text_callee, "ax", %progbits
+callee:
+.Lfunc_gep1:
+  addis 2, 12, .TOC.-.Lfunc_gep1@ha
+  addi 2, 2, .TOC.-.Lfunc_gep1@l
+.Lfunc_lep1:
+  .localentry callee, .Lfunc_lep1-.Lfunc_gep1
+  bl func
+  addis 4, 2, global@toc@ha
+  lwz 4, global@toc@l(4)
+  blr
+
+.section .text_caller, "ax", %progbits
+caller:
+  .localentry caller, 1
+  bl callee@notoc
+  blr
+global:
+  .long	0
+  .size	global, 4
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -48,6 +48,12 @@
   return make_error<StringError>(Err, object_error::parse_failed);
 }
 
+enum PPCInstrMasks : uint64_t {
+  PADDI_R12_NO_DISP = 0x0610000039800000,
+  MTCTR_R12 = 0x7D8903A6,
+  BCTR = 0x4E800420,
+};
+
 template <class ELFT> class ELFFile;
 
 template <class ELFT>