diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -40,28 +40,37 @@
 };
 
 enum DFormOpcd {
-  LBZ = 34,
+  /* Defined below in the PPC enum so they can be used for mapping legacy
+     instructions to pc-relative instructions.
+  LBZ = 34
+  LHZ = 40
+  LWZ = 32
+  LD = 58
+  STB = 38
+  STH = 44
+  STW = 36
+  STD = 62
+  */
   LBZU = 35,
-  LHZ = 40,
   LHZU = 41,
   LHAU = 43,
-  LWZ = 32,
   LWZU = 33,
   LFSU = 49,
-  LD = 58,
   LFDU = 51,
-  STB = 38,
   STBU = 39,
-  STH = 44,
   STHU = 45,
-  STW = 36,
   STWU = 37,
   STFSU = 53,
   STFDU = 55,
-  STD = 62,
   ADDI = 14
 };
 
+// Extracts the 'PO' field of an instruction encoding.
+static uint8_t getPrimaryOpCode(uint32_t encoding) { return (encoding >> 26); }
+
+#define PPC_LEGACY_TO_PREFIXED_LINKER
+#include "llvm/Target/PPCLegacyToPCRelMap.def"
+
 uint64_t elf::getPPC64TocBase() {
   // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
   // TOC starts where the first of these sections starts. We always create a
@@ -326,13 +335,11 @@
 static uint16_t highest(uint64_t v) { return v >> 48; }
 static uint16_t highesta(uint64_t v) { return (v + 0x8000) >> 48; }
 
-// Extracts the 'PO' field of an instruction encoding.
-static uint8_t getPrimaryOpCode(uint32_t encoding) { return (encoding >> 26); }
-
 static bool isDQFormInstruction(uint32_t encoding) {
   switch (getPrimaryOpCode(encoding)) {
   default:
     return false;
+  case 6: // Power10 paired loads/stores (lxvp, stxvp).
   case 56:
     // The only instruction with a primary opcode of 56 is `lq`.
     return true;
@@ -475,6 +482,44 @@
     relocateNoSym(loc, R_PPC64_TOC16_LO, val);
     break;
   }
+  case R_PPC64_GOT_PCREL34: {
+    uint64_t insn = readPrefixedInstruction(loc);
+
+    // Clear the first 8 bits of the prefix and the first 6 bits of the
+    // instruction (the primary opcode).
+    insn &= ~0xFF000000FC000000lu;
+
+    // Replace the cleared bits with the values for PADDI (0x600000038000000);
+    insn |= 0x600000038000000lu;
+    writePrefixedInstruction(loc, insn);
+    relocate(loc, rel, val);
+    break;
+  }
+  case R_PPC64_PCREL_OPT: {
+    // We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can
+    // be relaxed. The eligibility for the relaxation needs to be determined
+    // on that relocation since this one does not relocate a symbol.
+    uint64_t insn = readPrefixedInstruction(loc);
+    uint32_t accessInsn = read32(loc + rel.addend);
+    uint64_t pcRelInsn = getPCRelativeForm(accessInsn);
+
+    // This error is not necessary for correctness but is emitted for now
+    // to ensure we don't miss these opportunities in real code. It can be
+    // removed at a later date.
+    if (pcRelInsn == -1lu) {
+      error("unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x" +
+            Twine::utohexstr(accessInsn));
+      break;
+    }
+
+    // Convert the PADDI to the prefixed version of accessInsn and convert
+    // accessInsn to a nop.
+    uint64_t dispOnly = insn & 0x0003ffff0000ffff;
+    uint64_t finalInsn = dispOnly | pcRelInsn;
+    writePrefixedInstruction(loc, finalInsn);
+    write32(loc + rel.addend, 0x60000000); // nop accessInsn.
+    break;
+  }
   default:
     llvm_unreachable("unexpected relocation type");
   }
@@ -668,6 +713,7 @@
   case R_PPC64_TOC16_LO:
     return R_GOTREL;
   case R_PPC64_GOT_PCREL34:
+  case R_PPC64_PCREL_OPT:
     return R_GOT_PC;
   case R_PPC64_TOC16_HA:
   case R_PPC64_TOC16_LO_DS:
@@ -1085,6 +1131,8 @@
 
 RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data,
                                RelExpr expr) const {
+  if (type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT)
+    return R_RELAX_GOT_PC;
   if (expr == R_RELAX_TLS_GD_TO_IE)
     return R_RELAX_TLS_GD_TO_IE_GOT_OFF;
   if (expr == R_RELAX_TLS_LD_TO_LE)
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -999,6 +999,7 @@
 void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
   assert(flags & SHF_ALLOC);
   const unsigned bits = config->wordsize * 8;
+  uint64_t lastPPCRelaxedRelocOff = -1lu;
 
   for (const Relocation &rel : relocations) {
     if (rel.expr == R_NONE)
@@ -1017,9 +1018,20 @@
 
     switch (expr) {
     case R_RELAX_GOT_PC:
-    case R_RELAX_GOT_PC_NOPIC:
+    case R_RELAX_GOT_PC_NOPIC: {
+      // The R_PPC64_PCREL_OPT relocation must appear immediately after
+      // R_PPC64_GOT_PCREL34 in the relocations table at the same offset.
+      // We can only relax R_PPC64_PCREL_OPT if we have also relaxed
+      // the associated R_PPC64_GOT_PCREL34 since only the latter has an
+      // associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL34
+      // and only relax the other if the saved offset matches.
+      if (type == R_PPC64_GOT_PCREL34)
+        lastPPCRelaxedRelocOff = offset;
+      if (type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff)
+        break;
       target->relaxGot(bufLoc, rel, targetVA);
       break;
+    }
     case R_PPC64_RELAX_TOC:
       // rel.sym refers to the STT_SECTION symbol associated to the .toc input
       // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC
diff --git a/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s
new file mode 100644
--- /dev/null
+++ b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s
@@ -0,0 +1,130 @@
+	.section	".text"
+	.comm	storeVal_vector,8,8
+	.comm	useVal_vector,8,8
+	.globl storeVal_longlong
+	.globl useAddr_longlong
+	.globl useVal_longlong
+	.globl storeVal_sshort
+	.globl useAddr_sshort
+	.globl useVal_sshort
+	.globl storeVal_sint
+	.globl useAddr_sint
+	.globl useVal_sint
+	.globl storeVal_double
+	.globl useAddr_double
+	.globl useVal_double
+	.globl storeVal_float
+	.globl useAddr_float
+	.globl useVal_float
+	.globl storeVal_uint
+	.globl useAddr_uint
+	.globl useVal_uint
+	.globl storeVal_ushort
+	.globl useAddr_ushort
+	.globl useVal_ushort
+	.globl storeVal
+	.globl useAddr
+	.globl useVal
+	.section	".data"
+	.align 3
+	.type	storeVal_longlong, @object
+	.size	storeVal_longlong, 8
+storeVal_longlong:
+	.quad	18
+	.type	useAddr_longlong, @object
+	.size	useAddr_longlong, 8
+useAddr_longlong:
+	.quad	17
+	.type	useVal_longlong, @object
+	.size	useVal_longlong, 8
+useVal_longlong:
+	.quad	16
+	.type	storeVal_sshort, @object
+	.size	storeVal_sshort, 2
+storeVal_sshort:
+	.short	-15
+	.type	useAddr_sshort, @object
+	.size	useAddr_sshort, 2
+useAddr_sshort:
+	.short	-14
+	.type	useVal_sshort, @object
+	.size	useVal_sshort, 2
+useVal_sshort:
+	.short	-13
+	.zero	2
+	.type	storeVal_sint, @object
+	.size	storeVal_sint, 4
+storeVal_sint:
+	.long	-12
+	.type	useAddr_sint, @object
+	.size	useAddr_sint, 4
+useAddr_sint:
+	.long	-11
+	.type	useVal_sint, @object
+	.size	useVal_sint, 4
+useVal_sint:
+	.long	-10
+	.zero	4
+	.type	storeVal_double, @object
+	.size	storeVal_double, 8
+storeVal_double:
+	.long	858993459
+	.long	1076966195
+	.type	useAddr_double, @object
+	.size	useAddr_double, 8
+useAddr_double:
+	.long	-1717986918
+	.long	-1070589543
+	.type	useVal_double, @object
+	.size	useVal_double, 8
+useVal_double:
+	.long	0
+	.long	1076756480
+	.type	storeVal_float, @object
+	.size	storeVal_float, 4
+storeVal_float:
+	.long	1045220557
+	.type	useAddr_float, @object
+	.size	useAddr_float, 4
+useAddr_float:
+	.long	-1050568294
+	.type	useVal_float, @object
+	.size	useVal_float, 4
+useVal_float:
+	.long	1095761920
+	.type	storeVal_uint, @object
+	.size	storeVal_uint, 4
+storeVal_uint:
+	.long	12
+	.type	useAddr_uint, @object
+	.size	useAddr_uint, 4
+useAddr_uint:
+	.long	11
+	.type	useVal_uint, @object
+	.size	useVal_uint, 4
+useVal_uint:
+	.long	10
+	.type	storeVal_ushort, @object
+	.size	storeVal_ushort, 2
+storeVal_ushort:
+	.short	1
+	.type	useAddr_ushort, @object
+	.size	useAddr_ushort, 2
+useAddr_ushort:
+	.short	10
+	.type	useVal_ushort, @object
+	.size	useVal_ushort, 2
+useVal_ushort:
+	.short	5
+	.type	storeVal, @object
+	.size	storeVal, 1
+storeVal:
+	.byte	-1
+	.type	useAddr, @object
+	.size	useAddr, 1
+useAddr:
+	.byte	10
+	.type	useVal, @object
+	.size	useVal, 1
+useVal:
+	.byte	5
diff --git a/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s
new file mode 100644
--- /dev/null
+++ b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s
@@ -0,0 +1,300 @@
+# REQUIRES: ppc
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o
+# RUN: ld.lld --shared %t2.o -o %t2.so
+# RUN: ld.lld %t1.o %t2.o -o %t
+# RUN: ld.lld %t1.o %t2.so -o %ts
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=powerpc64 %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o
+# RUN: ld.lld --shared %t2.o -o %t2.so
+# RUN: ld.lld %t1.o %t2.o -o %t
+# RUN: ld.lld %t1.o %t2.so -o %ts
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D
+
+# CHECK-S-LABEL: <check_LBZ_STB>:
+# CHECK-S-NEXT: plbz 10
+# CHECK-S-NEXT: paddi 9
+# CHECK-S-NEXT: li 3, 0
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: rldicl 9, 9, 9, 60
+# CHECK-S-NEXT: add 9, 9, 10
+# CHECK-S-NEXT: pstb 9
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LBZ_STB>:
+# CHECK-D-NEXT: pld 8
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: li 3, 0
+# CHECK-D-NEXT: lbz 10, 0(8)
+# CHECK-D-NEXT: rldicl 9, 9, 9, 60
+# CHECK-D-NEXT: add 9, 9, 10
+# CHECK-D-NEXT: pld 10
+# CHECK-D-NEXT: stb 9, 0(10)
+# CHECK-D-NEXT: blr
+check_LBZ_STB:
+  pld 8,useVal@got@pcrel(0),1
+.Lpcrel1:
+  pld 9,useAddr@got@pcrel(0),1
+  li 3,0
+  .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+  lbz 10,0(8)
+  rldicl 9,9,9,60
+  add 9,9,10
+  pld 10,storeVal@got@pcrel(0),1
+.Lpcrel2:
+  .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
+  stb 9,0(10)
+  blr
+
+# CHECK-S-LABEL: <check_LHZ_STH>:
+# CHECK-S-NEXT: plhz 3
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: psth 3
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LHZ_STH>:
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: lhz 3, 0(9)
+# CHECK-D-NEXT: nop
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: sth 3, 0(9)
+# CHECK-D-NEXT: blr
+check_LHZ_STH:
+  pld 9,useVal_ushort@got@pcrel(0),1
+.Lpcrel3:
+  .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
+  lhz 3,0(9)
+  pld 9,storeVal_ushort@got@pcrel(0),1
+.Lpcrel4:
+  .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
+  sth 3,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LWZ_STW>:
+# CHECK-S-NEXT: plwz 3
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: pstw 3
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LWZ_STW>:
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: lwz 3, 0(9)
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: stw 3, 0(9)
+# CHECK-D-NEXT: blr
+check_LWZ_STW:
+  pld 9,useVal_uint@got@pcrel(0),1
+.Lpcrel5:
+  .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
+  lwz 3,0(9)
+  pld 9,storeVal_uint@got@pcrel(0),1
+.Lpcrel6:
+  .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
+  stw 3,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LFS_STFS>:
+# CHECK-S-NEXT: plfs 1
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: pstfs 1
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LFS_STFS>:
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: lfs 1, 0(9)
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: stfs 1, 0(9)
+# CHECK-D-NEXT: blr
+check_LFS_STFS:
+  pld 9,useVal_float@got@pcrel(0),1
+.Lpcrel7:
+  .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
+  lfs 1,0(9)
+  pld 9,storeVal_float@got@pcrel(0),1
+.Lpcrel8:
+  .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
+  stfs 1,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LFD_STFD>:
+# CHECK-S-NEXT: plfd 1
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: pstfd 1
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LFD_STFD>:
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: lfd 1, 0(9)
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: stfd 1, 0(9)
+# CHECK-D-NEXT: blr
+check_LFD_STFD:
+  pld 9,useVal_double@got@pcrel(0),1
+.Lpcrel9:
+  .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8)
+  lfd 1,0(9)
+  pld 9,storeVal_double@got@pcrel(0),1
+.Lpcrel10:
+  .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
+  stfd 1,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LWA_STW>:
+# CHECK-S-NEXT: mr 9, 3
+# CHECK-S-NEXT: plwa 3
+# CHECK-S-NEXT: pstw 9
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LWA_STW>:
+# CHECK-D-NEXT: mr 9, 3
+# CHECK-D-NEXT: pld 8
+# CHECK-D-NEXT: pld 10
+# CHECK-D-NEXT: lwa 3, 0(8)
+# CHECK-D-NEXT: stw 9, 0(10)
+# CHECK-D-NEXT: blr
+check_LWA_STW:
+  mr 9,3
+  pld 8,useVal_sint@got@pcrel(0),1
+.Lpcrel11:
+  pld 10,storeVal_sint@got@pcrel(0),1
+.Lpcrel12:
+  .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8)
+  lwa 3,0(8)
+  .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8)
+  stw 9,0(10)
+  blr
+
+# CHECK-S-LABEL: <check_LHA_STH>:
+# CHECK-S-NEXT: mr 9, 3
+# CHECK-S-NEXT: plha 3
+# CHECK-S-NEXT: psth 9
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LHA_STH>:
+# CHECK-D-NEXT: mr 9, 3
+# CHECK-D-NEXT: pld 8
+# CHECK-D-NEXT: pld 10
+# CHECK-D-NEXT: lha 3, 0(8)
+# CHECK-D-NEXT: sth 9, 0(10)
+# CHECK-D-NEXT: blr
+check_LHA_STH:
+  mr 9,3
+  pld 8,useVal_sshort@got@pcrel(0),1
+.Lpcrel13:
+  pld 10,storeVal_sshort@got@pcrel(0),1
+.Lpcrel14:
+  .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8)
+  lha 3,0(8)
+  .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8)
+  sth 9,0(10)
+  blr
+
+# CHECK-S-LABEL: <check_LD_STD>:
+# CHECK-S-NEXT: pld 3
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: pstd 3
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LD_STD>:
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: ld 3, 0(9)
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: std 3, 0(9)
+# CHECK-D-NEXT: blr
+check_LD_STD:
+  pld 9,useVal_longlong@got@pcrel(0),1
+.Lpcrel15:
+  .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8)
+  ld 3,0(9)
+  pld 9,storeVal_longlong@got@pcrel(0),1
+.Lpcrel16:
+  .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8)
+  std 3,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LXV_STXV>:
+# CHECK-S-NEXT: plxv 34
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: pstxv 34
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LXV_STXV>:
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: lxv 34, 0(9)
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: stxv 34, 0(9)
+# CHECK-D-NEXT: blr
+check_LXV_STXV:
+  pld 9,useVal_vector@got@pcrel(0),1
+.Lpcrel17:
+  .reloc .Lpcrel17-8,R_PPC64_PCREL_OPT,.-(.Lpcrel17-8)
+  lxv 34,0(9)
+  pld 9,storeVal_vector@got@pcrel(0),1
+.Lpcrel18:
+  .reloc .Lpcrel18-8,R_PPC64_PCREL_OPT,.-(.Lpcrel18-8)
+  stxv 34,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LXSSP_STXSSP>:
+# CHECK-S-NEXT: plxssp 1
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: pstxssp 1
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LXSSP_STXSSP>:
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: lxssp 1, 0(9)
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: stxssp 1, 0(9)
+# CHECK-D-NEXT: blr
+check_LXSSP_STXSSP:
+  pld 9,useVal_float@got@pcrel(0),1
+.Lpcrel19:
+  .reloc .Lpcrel19-8,R_PPC64_PCREL_OPT,.-(.Lpcrel19-8)
+  lxssp 1,0(9)
+  pld 9,storeVal_float@got@pcrel(0),1
+.Lpcrel20:
+  .reloc .Lpcrel20-8,R_PPC64_PCREL_OPT,.-(.Lpcrel20-8)
+  stxssp 1,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LXSD_STXSD>:
+# CHECK-S-NEXT: plxsd 1
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: pstxsd 1
+# CHECK-S-NEXT: nop
+# CHECK-S-NEXT: blr
+
+# CHECK-D-LABEL: <check_LXSD_STXSD>:
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: lxsd 1, 0(9)
+# CHECK-D-NEXT: pld 9
+# CHECK-D-NEXT: stxsd 1, 0(9)
+# CHECK-D-NEXT: blr
+check_LXSD_STXSD:
+  pld 9,useVal_double@got@pcrel(0),1
+.Lpcrel21:
+  .reloc .Lpcrel21-8,R_PPC64_PCREL_OPT,.-(.Lpcrel21-8)
+  lxsd 1,0(9)
+  pld 9,storeVal_double@got@pcrel(0),1
+.Lpcrel22:
+  .reloc .Lpcrel22-8,R_PPC64_PCREL_OPT,.-(.Lpcrel22-8)
+  stxsd 1,0(9)
+  blr
diff --git a/llvm/include/llvm/Target/PPCLegacyToPCRelMap.def b/llvm/include/llvm/Target/PPCLegacyToPCRelMap.def
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/Target/PPCLegacyToPCRelMap.def
@@ -0,0 +1,257 @@
+/* This file defines a mapping between legacy instructions and their
+   PC-relative prefixed versions. It has two uses:
+   1. In the compiler, to tell the compiler for which instructions
+      it is allowed to emit the R_PPC64_PCREL_OPT relocation.
+   2. In LLD, to provide the encoding for the PC-relative prefixed
+      version when given the legacy instruction encoding.
+
+   The way this file differentiates between the two is with two macros:
+   PPC_LEGACY_TO_PREFIXED_COMPILER
+   PPC_LEGACY_TO_PREFIXED_LINKER
+
+   Since the linker is just looking at instruction encodings, there is
+   an inevitable amount of bit manipulation here that can be difficult to
+   follow. While a significant amount of effort has gone into documenting
+   the various values and bit positions, the definitive source for instruction
+   encodings is ISA3.1. The reader is encouraged to reference the ISA that can
+   be found at:
+
+   https://ibm.ent.box.com/s/hhjfw0x0lrbtyzmiaffnbxh2fuo0fog0
+*/
+#if (!defined PPC_LEGACY_TO_PREFIXED_COMPILER) &&                              \
+    (!defined PPC_LEGACY_TO_PREFIXED_LINKER)
+#error "Need to define PPC_LEGACY_TO_PREFIXED_{COMPILER|LINKER}"
+#endif
+#ifdef PPC_LEGACY_TO_PREFIXED_LINKER
+// For the linker, the enumerators are primary opcodes for the most part.
+// There are some instructions that share primary opcodes. For those, we
+// set the extended opcode field that differentiates them as the most
+// significant bits. For example, LWA and LD share a primary opcode. What
+// differentiates them is the extended opcode field (two least significant
+// bits of the encoding). LWA is 0x10 and LD is 0x0. So in order for the
+// two instructions to have a unique enumerator value and be mapped
+// to the correct prefixed instructions, we shift those values to the
+// most significant bits and or them with the primary opcode.
+enum PPC : uint64_t {
+  // Loads
+  PREFIX_MLS = 0x0610000000000000lu,
+  PREFIX_8LS = 0x0410000000000000lu,
+  LBZ = 34,
+  PLBZpc = PREFIX_MLS, // Prefix only.
+  LBZ8 = 34,
+  PLBZ8pc = PREFIX_MLS, // Prefix only.
+  LHZ = 40,
+  PLHZpc = PREFIX_MLS, // Prefix only.
+  LHZ8 = 40,
+  PLHZ8pc = PREFIX_MLS, // Prefix only.
+  LWZ = 32,
+  PLWZpc = PREFIX_MLS, // Prefix only.
+  LWZ8 = 32,
+  PLWZ8pc = PREFIX_MLS, // Prefix only.
+  LHA = 42,
+  PLHApc = PREFIX_MLS, // Prefix only.
+  LHA8 = 42,
+  PLHA8pc = PREFIX_MLS,               // Prefix only.
+  LWA = 58 | 0x80000000,              // (Encoding & 0x3) << 30.
+  PLWApc = PREFIX_8LS | 0xA4000000lu, // Prefix | Primary opc.
+  LD = 58 | 0x0,                      // (Encoding & 0x3) << 30.
+  PLDpc = PREFIX_8LS | 0xE4000000lu,  // Prefix | Primary opc.
+  LFS = 48,
+  PLFSpc = PREFIX_MLS,                  // Prefix only.
+  LXSSP = 57 | 0xC0000000,              // (Encoding & 0x3) << 30.
+  PLXSSPpc = PREFIX_8LS | 0xAC000000lu, // Prefix | Primary opc.
+  LFD = 50,
+  PLFDpc = PREFIX_MLS,                 // Prefix only.
+  LXSD = 57 | 0x80000000,              // (Encoding & 0x3) << 30.
+  PLXSDpc = PREFIX_8LS | 0xA8000000lu, // Prefix | Primary opc.
+  LXV = 61 | 0x20000000,               // (Encoding & 0x7) << 29.
+  PLXVpc = PREFIX_8LS | 0xC8000000lu,  // Prefix | Primary opc.
+  LXVP = 6 | 0x0,                      // (Encoding & 0xF) << 28.
+  PLXVPpc = PREFIX_8LS | 0xE8000000lu, // Prefix | Primary opc.
+  DFLOADf32 = 48,
+  DFLOADf64 = 50,
+
+  // Stores
+  STB = 38,
+  PSTBpc = PREFIX_MLS, // Prefix only.
+  STB8 = 38,
+  PSTB8pc = PREFIX_MLS, // Prefix only.
+  STH = 44,
+  PSTHpc = PREFIX_MLS, // Prefix only.
+  STH8 = 44,
+  PSTH8pc = PREFIX_MLS, // Prefix only.
+  STW = 36,
+  PSTWpc = PREFIX_MLS, // Prefix only.
+  STW8 = 36,
+  PSTW8pc = PREFIX_MLS, // Prefix only.
+  STD = 62,
+  PSTDpc = PREFIX_8LS | 0xF4000000lu, // Prefix | Primary opc.
+  STFS = 52,
+  PSTFSpc = PREFIX_MLS,                  // Prefix only.
+  STXSSP = 61 | 0xC0000000,              // (Encoding & 0x3) << 30.
+  PSTXSSPpc = PREFIX_8LS | 0xBC000000lu, // Prefix | Primary opc.
+  STFD = 54,
+  PSTFDpc = PREFIX_MLS,                 // Prefix only.
+  STXSD = 61 | 0x80000000,              // (Encoding & 0x3) << 30.
+  PSTXSDpc = PREFIX_8LS | 0xB8000000lu, // Prefix | Primary opc.
+  STXV = 61 | 0xA0000000,               // (Encoding & 0x7) << 29.
+  PSTXVpc = PREFIX_8LS | 0xD8000000lu,  // Prefix | Primary opc.
+  STXVP = 6 | 0x10000000,               // (Encoding & 0xF) << 28.
+  PSTXVPpc = PREFIX_8LS | 0xF8000000lu, // Prefix | Primary opc.
+  DFSTOREf32 = 52,
+  DFSTOREf64 = 54
+};
+
+enum Mask : uint64_t {
+  OPC_AND_RST = 0xFFE00000, // Primary opc (0-5) and R[ST] (6-10).
+  ONLY_RST = 0x3E00000,     // [RS]T (6-10).
+  ST_STX28_TO5 = 0x3E00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5.
+};
+static const uint64_t InstrMasks[][2] = {
+    // Loads.
+    {PPC::LBZ, OPC_AND_RST},
+    {PPC::LHZ, OPC_AND_RST},
+    {PPC::LWZ, OPC_AND_RST},
+    {PPC::LHA, OPC_AND_RST},
+    {PPC::LWA, ONLY_RST},
+    {PPC::LD, ONLY_RST},
+    {PPC::LFS, OPC_AND_RST},
+    {PPC::LXSSP, ONLY_RST},
+    {PPC::LFD, OPC_AND_RST},
+    {PPC::LXSD, ONLY_RST},
+    {PPC::LXV, ST_STX28_TO5},
+    {PPC::LXVP, ONLY_RST},
+    // Stores.
+    {PPC::STB, OPC_AND_RST},
+    {PPC::STH, OPC_AND_RST},
+    {PPC::STW, OPC_AND_RST},
+    {PPC::STD, ONLY_RST},
+    {PPC::STFS, OPC_AND_RST},
+    {PPC::STXSSP, ONLY_RST},
+    {PPC::STFD, OPC_AND_RST},
+    {PPC::STXSD, ONLY_RST},
+    {PPC::STXV, ST_STX28_TO5},
+    {PPC::STXVP, ONLY_RST}};
+#endif
+
+/*******************************************************************************
+ * The interface between the compiler and the linker is the following table.
+ * It contains the mapping between the legacy instructions and their
+ * pc-relative forms. When support for a new such instruction is added to the
+ * compiler, it needs to be added to this table if the compiler will use this
+ * instruction as a target of the R_PPC64_PCREL_OPT relocation. If this is not
+ * done, the linker will not be able to optimize the instruction.
+ ******************************************************************************/
+static const uint64_t Map[][2] = {
+    // Loads.
+    {PPC::LBZ, PPC::PLBZpc},
+    {PPC::LBZ8, PPC::PLBZ8pc},
+    {PPC::LHZ, PPC::PLHZpc},
+    {PPC::LHZ8, PPC::PLHZ8pc},
+    {PPC::LWZ, PPC::PLWZpc},
+    {PPC::LWZ8, PPC::PLWZ8pc},
+    {PPC::LHA, PPC::PLHApc},
+    {PPC::LHA8, PPC::PLHA8pc},
+    {PPC::LWA, PPC::PLWApc},
+    {PPC::LD, PPC::PLDpc},
+    {PPC::LFS, PPC::PLFSpc},
+    {PPC::LXSSP, PPC::PLXSSPpc},
+    {PPC::LFD, PPC::PLFDpc},
+    {PPC::LXSD, PPC::PLXSDpc},
+    {PPC::LXV, PPC::PLXVpc},
+// FIXME: compiler support for paired memory operations will be added soon.
+//    {PPC::LXVP, PPC::PLXVPpc},
+    {PPC::DFLOADf32, PPC::PLFSpc},
+    {PPC::DFLOADf64, PPC::PLFDpc},
+
+    // Stores.
+    {PPC::STB, PPC::PSTBpc},
+    {PPC::STB8, PPC::PSTB8pc},
+    {PPC::STH, PPC::PSTHpc},
+    {PPC::STH8, PPC::PSTH8pc},
+    {PPC::STW, PPC::PSTWpc},
+    {PPC::STW8, PPC::PSTW8pc},
+    {PPC::STD, PPC::PSTDpc},
+    {PPC::STFS, PPC::PSTFSpc},
+    {PPC::STXSSP, PPC::PSTXSSPpc},
+    {PPC::STFD, PPC::PLFSpc},
+    {PPC::STXSD, PPC::PSTXSDpc},
+    {PPC::STXV, PPC::PSTXVpc},
+// FIXME: compiler support for paired memory operations will be added soon.
+//    {PPC::STXVP, PPC::PSTXVPpc},
+    {PPC::DFSTOREf32, PPC::PSTFSpc},
+    {PPC::DFSTOREf64, PPC::PSTFDpc}};
+
+static unsigned getInstrMapIdx(unsigned Opc) {
+  for (unsigned i = 0; i < llvm::array_lengthof(Map); i++)
+    if (Opc == Map[i][0])
+      return i;
+  return -1u;
+}
+
+#ifdef PPC_LEGACY_TO_PREFIXED_COMPILER
+// For the compiler, we only care if the opcode has an entry in the map.
+static bool hasPCRelativeForm(unsigned Opc) {
+  return getInstrMapIdx(Opc) != -1u;
+}
+
+#else
+// For the linker, we need to be able to replace a legacy instruction with a
+// PC-Relative instruction.
+static unsigned getInstrMaskIdx(unsigned Opc) {
+  for (unsigned i = 0; i < llvm::array_lengthof(Map); i++)
+    if (Opc == InstrMasks[i][0])
+      return i;
+  return -1u;
+}
+
+// Returns the opcode from the PPC enumeration above, accounting for adjustments
+// for instructions that share primary opcodes.
+static uint64_t getAdjustedOpc(unsigned Encoding) {
+  uint64_t Opc = getPrimaryOpCode(Encoding);
+
+  // If the primary opcode is shared between multiple instructions, we need to
+  // fix it up to match the actual instruction we are after.
+
+  // For DQ-Form vector instrs, the two least significant bits are 01 and the
+  // field that differentiates them is 3 bits wide.
+  if (Opc == 61 && (Encoding & 0x3) == 0x1) // LXV/STXV.
+    Opc |= (Encoding & 0x7) << 29;
+
+  // For DS-Form instrs, there are 3 different primary opcodes and the two
+  // least significant bits differentiate instrs that share a PO.
+  else if (Opc == 58 || Opc == 57 || Opc == 61)
+    Opc |= (Encoding & 0x3) << 30;
+
+  // Paired loads and stores from ISA3.1 use the 4 least significant bits to
+  // differentiate.
+  else if (Opc == 6)
+    Opc |= (Encoding & 0xf) << 28;
+  return Opc;
+}
+
+// Given the encoding of a legacy instruction, returns its prefixed PC-relative
+// form with all the displacement bits cleared. The caller is to or this with
+// the displacement bits.
+static uint64_t getPCRelativeForm(unsigned Encoding) {
+  uint64_t Opc = getAdjustedOpc(Encoding);
+  unsigned InstrIdx = getInstrMapIdx(Opc);
+  unsigned MaskIdx = getInstrMaskIdx(Opc);
+  if (InstrIdx == -1u || MaskIdx == -1u)
+    return -1lu;
+
+  // The prefixed instruction is computed by masking out bits from the original
+  // instruction and then or-ing that with the prefixed instruction set bits.
+  uint64_t PrefixedInstr = (uint64_t)Encoding & InstrMasks[MaskIdx][1];
+  PrefixedInstr |= Map[InstrIdx][1];
+
+  // If the mask requires moving bit 28 to bit 5, do that now.
+  if (InstrMasks[MaskIdx][1] == ST_STX28_TO5) {
+    uint64_t STX = (Encoding & 0x8) << 23;
+    PrefixedInstr |= STX;
+  }
+  return PrefixedInstr;
+}
+#endif
+#undef PPC_LEGACY_TO_PREFIXED_COMPILER
+#undef PPC_LEGACY_TO_PREFIXED_LINKER
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -44,446 +44,408 @@
 
 namespace {
 
-static bool hasPCRelativeForm(MachineInstr &Use) {
-  switch (Use.getOpcode()) {
-  default:
-    return false;
-  case PPC::LBZ:
-  case PPC::LBZ8:
-  case PPC::LHA:
-  case PPC::LHA8:
-  case PPC::LHZ:
-  case PPC::LHZ8:
-  case PPC::LWZ:
-  case PPC::LWZ8:
-  case PPC::STB:
-  case PPC::STB8:
-  case PPC::STH:
-  case PPC::STH8:
-  case PPC::STW:
-  case PPC::STW8:
-  case PPC::LD:
-  case PPC::STD:
-  case PPC::LWA:
-  case PPC::LXSD:
-  case PPC::LXSSP:
-  case PPC::LXV:
-  case PPC::STXSD:
-  case PPC::STXSSP:
-  case PPC::STXV:
-  case PPC::LFD:
-  case PPC::LFS:
-  case PPC::STFD:
-  case PPC::STFS:
-  case PPC::DFLOADf32:
-  case PPC::DFLOADf64:
-  case PPC::DFSTOREf32:
-  case PPC::DFSTOREf64:
-    return true;
+#define PPC_LEGACY_TO_PREFIXED_COMPILER
+#include "llvm/Target/PPCLegacyToPCRelMap.def"
+
+class PPCPreEmitPeephole : public MachineFunctionPass {
+public:
+  static char ID;
+  PPCPreEmitPeephole() : MachineFunctionPass(ID) {
+    initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
   }
-}
-
-  class PPCPreEmitPeephole : public MachineFunctionPass {
-  public:
-    static char ID;
-    PPCPreEmitPeephole() : MachineFunctionPass(ID) {
-      initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
 
-    MachineFunctionProperties getRequiredProperties() const override {
-      return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::NoVRegs);
-    }
-
-    // This function removes any redundant load immediates. It has two level
-    // loops - The outer loop finds the load immediates BBI that could be used
-    // to replace following redundancy. The inner loop scans instructions that
-    // after BBI to find redundancy and update kill/dead flags accordingly. If
-    // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
-    // that modify the def register of BBI would break the scanning.
-    // DeadOrKillToUnset is a pointer to the previous operand that had the
-    // kill/dead flag set. It keeps track of the def register of BBI, the use
-    // registers of AfterBBIs and the def registers of AfterBBIs.
-    bool removeRedundantLIs(MachineBasicBlock &MBB,
-                            const TargetRegisterInfo *TRI) {
-      LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
-                 MBB.dump(); dbgs() << "\n");
-
-      DenseSet<MachineInstr *> InstrsToErase;
-      for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
-        // Skip load immediate that is marked to be erased later because it
-        // cannot be used to replace any other instructions.
-        if (InstrsToErase.find(&*BBI) != InstrsToErase.end())
-          continue;
-        // Skip non-load immediate.
-        unsigned Opc = BBI->getOpcode();
-        if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
-            Opc != PPC::LIS8)
-          continue;
-        // Skip load immediate, where the operand is a relocation (e.g., $r3 =
-        // LI target-flags(ppc-lo) %const.0).
-        if (!BBI->getOperand(1).isImm())
-          continue;
-        assert(BBI->getOperand(0).isReg() &&
-               "Expected a register for the first operand");
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
 
-        LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
 
-        Register Reg = BBI->getOperand(0).getReg();
-        int64_t Imm = BBI->getOperand(1).getImm();
-        MachineOperand *DeadOrKillToUnset = nullptr;
-        if (BBI->getOperand(0).isDead()) {
-          DeadOrKillToUnset = &BBI->getOperand(0);
-          LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
-                            << " from load immediate " << *BBI
-                            << " is a unsetting candidate\n");
+  // This function removes any redundant load immediates. It has two level
+  // loops - The outer loop finds the load immediates BBI that could be used
+  // to replace following redundancy. The inner loop scans instructions that
+  // after BBI to find redundancy and update kill/dead flags accordingly. If
+  // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
+  // that modify the def register of BBI would break the scanning.
+  // DeadOrKillToUnset is a pointer to the previous operand that had the
+  // kill/dead flag set. It keeps track of the def register of BBI, the use
+  // registers of AfterBBIs and the def registers of AfterBBIs.
+  bool removeRedundantLIs(MachineBasicBlock &MBB,
+                          const TargetRegisterInfo *TRI) {
+    LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
+               MBB.dump(); dbgs() << "\n");
+
+    DenseSet<MachineInstr *> InstrsToErase;
+    for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
+      // Skip load immediate that is marked to be erased later because it
+      // cannot be used to replace any other instructions.
+      if (InstrsToErase.find(&*BBI) != InstrsToErase.end())
+        continue;
+      // Skip non-load immediate.
+      unsigned Opc = BBI->getOpcode();
+      if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
+          Opc != PPC::LIS8)
+        continue;
+      // Skip load immediate, where the operand is a relocation (e.g., $r3 =
+      // LI target-flags(ppc-lo) %const.0).
+      if (!BBI->getOperand(1).isImm())
+        continue;
+      assert(BBI->getOperand(0).isReg() &&
+             "Expected a register for the first operand");
+
+      LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
+
+      Register Reg = BBI->getOperand(0).getReg();
+      int64_t Imm = BBI->getOperand(1).getImm();
+      MachineOperand *DeadOrKillToUnset = nullptr;
+      if (BBI->getOperand(0).isDead()) {
+        DeadOrKillToUnset = &BBI->getOperand(0);
+        LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
+                          << " from load immediate " << *BBI
+                          << " is a unsetting candidate\n");
+      }
+      // This loop scans instructions after BBI to see if there is any
+      // redundant load immediate.
+      for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
+           ++AfterBBI) {
+        // Track the operand that kill Reg. We would unset the kill flag of
+        // the operand if there is a following redundant load immediate.
+        int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
+
+        // We can't just clear implicit kills, so if we encounter one, stop
+        // looking further.
+        if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
+          LLVM_DEBUG(dbgs()
+                     << "Encountered an implicit kill, cannot proceed: ");
+          LLVM_DEBUG(AfterBBI->dump());
+          break;
         }
-        // This loop scans instructions after BBI to see if there is any
-        // redundant load immediate.
-        for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
-             ++AfterBBI) {
-          // Track the operand that kill Reg. We would unset the kill flag of
-          // the operand if there is a following redundant load immediate.
-          int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
-
-          // We can't just clear implicit kills, so if we encounter one, stop
-          // looking further.
-          if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
-            LLVM_DEBUG(dbgs()
-                       << "Encountered an implicit kill, cannot proceed: ");
-            LLVM_DEBUG(AfterBBI->dump());
-            break;
-          }
-
-          if (KillIdx != -1) {
-            assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
-            DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
-            LLVM_DEBUG(dbgs()
-                       << " Kill flag of " << *DeadOrKillToUnset << " from "
-                       << *AfterBBI << " is a unsetting candidate\n");
-          }
 
-          if (!AfterBBI->modifiesRegister(Reg, TRI))
-            continue;
-          // Finish scanning because Reg is overwritten by a non-load
-          // instruction.
-          if (AfterBBI->getOpcode() != Opc)
-            break;
-          assert(AfterBBI->getOperand(0).isReg() &&
-                 "Expected a register for the first operand");
-          // Finish scanning because Reg is overwritten by a relocation or a
-          // different value.
-          if (!AfterBBI->getOperand(1).isImm() ||
-              AfterBBI->getOperand(1).getImm() != Imm)
-            break;
+        if (KillIdx != -1) {
+          assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
+          DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
+          LLVM_DEBUG(dbgs()
+                     << " Kill flag of " << *DeadOrKillToUnset << " from "
+                     << *AfterBBI << " is a unsetting candidate\n");
+        }
 
-          // It loads same immediate value to the same Reg, which is redundant.
-          // We would unset kill flag in previous Reg usage to extend live range
-          // of Reg first, then remove the redundancy.
-          if (DeadOrKillToUnset) {
-            LLVM_DEBUG(dbgs()
-                       << " Unset dead/kill flag of " << *DeadOrKillToUnset
-                       << " from " << *DeadOrKillToUnset->getParent());
-            if (DeadOrKillToUnset->isDef())
-              DeadOrKillToUnset->setIsDead(false);
-            else
-              DeadOrKillToUnset->setIsKill(false);
-          }
-          DeadOrKillToUnset =
-              AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
-          if (DeadOrKillToUnset)
-            LLVM_DEBUG(dbgs()
-                       << " Dead flag of " << *DeadOrKillToUnset << " from "
-                       << *AfterBBI << " is a unsetting candidate\n");
-          InstrsToErase.insert(&*AfterBBI);
-          LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
-                     AfterBBI->dump());
+        if (!AfterBBI->modifiesRegister(Reg, TRI))
+          continue;
+        // Finish scanning because Reg is overwritten by a non-load
+        // instruction.
+        if (AfterBBI->getOpcode() != Opc)
+          break;
+        assert(AfterBBI->getOperand(0).isReg() &&
+               "Expected a register for the first operand");
+        // Finish scanning because Reg is overwritten by a relocation or a
+        // different value.
+        if (!AfterBBI->getOperand(1).isImm() ||
+            AfterBBI->getOperand(1).getImm() != Imm)
+          break;
+
+        // It loads same immediate value to the same Reg, which is redundant.
+        // We would unset kill flag in previous Reg usage to extend live range
+        // of Reg first, then remove the redundancy.
+        if (DeadOrKillToUnset) {
+          LLVM_DEBUG(dbgs() << " Unset dead/kill flag of " << *DeadOrKillToUnset
+                            << " from " << *DeadOrKillToUnset->getParent());
+          if (DeadOrKillToUnset->isDef())
+            DeadOrKillToUnset->setIsDead(false);
+          else
+            DeadOrKillToUnset->setIsKill(false);
         }
+        DeadOrKillToUnset =
+            AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
+        if (DeadOrKillToUnset)
+          LLVM_DEBUG(dbgs()
+                     << " Dead flag of " << *DeadOrKillToUnset << " from "
+                     << *AfterBBI << " is a unsetting candidate\n");
+        InstrsToErase.insert(&*AfterBBI);
+        LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
+                   AfterBBI->dump());
       }
+    }
 
-      for (MachineInstr *MI : InstrsToErase) {
-        MI->eraseFromParent();
-      }
-      NumRemovedInPreEmit += InstrsToErase.size();
-      return !InstrsToErase.empty();
+    for (MachineInstr *MI : InstrsToErase) {
+      MI->eraseFromParent();
     }
+    NumRemovedInPreEmit += InstrsToErase.size();
+    return !InstrsToErase.empty();
+  }
 
-    // Check if this instruction is a PLDpc that is part of a GOT indirect
-    // access.
-    bool isGOTPLDpc(MachineInstr &Instr) {
-      if (Instr.getOpcode() != PPC::PLDpc)
-        return false;
+  // Check if this instruction is a PLDpc that is part of a GOT indirect
+  // access.
+  bool isGOTPLDpc(MachineInstr &Instr) {
+    if (Instr.getOpcode() != PPC::PLDpc)
+      return false;
 
-      // The result must be a register.
-      const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
-      if (!LoadedAddressReg.isReg())
-        return false;
+    // The result must be a register.
+    const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
+    if (!LoadedAddressReg.isReg())
+      return false;
 
-      // Make sure that this is a global symbol.
-      const MachineOperand &SymbolOp = Instr.getOperand(1);
-      if (!SymbolOp.isGlobal())
-        return false;
+    // Make sure that this is a global symbol.
+    const MachineOperand &SymbolOp = Instr.getOperand(1);
+    if (!SymbolOp.isGlobal())
+      return false;
 
-      // Finally return true only if the GOT flag is present.
-      return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
-    }
+    // Finally return true only if the GOT flag is present.
+    return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
+  }
 
-    bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
-      MachineFunction *MF = MBB.getParent();
-      // Add this linker opt only if we are using PC Relative memops.
-      if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
-        return false;
-
-      // Struct to keep track of one def/use pair for a GOT indirect access.
-      struct GOTDefUsePair {
-        MachineBasicBlock::iterator DefInst;
-        MachineBasicBlock::iterator UseInst;
-        Register DefReg;
-        Register UseReg;
-        bool StillValid;
-      };
-      // Vector of def/ues pairs in this basic block.
-      SmallVector<GOTDefUsePair, 4> CandPairs;
-      SmallVector<GOTDefUsePair, 4> ValidPairs;
-      bool MadeChange = false;
-
-      // Run through all of the instructions in the basic block and try to
-      // collect potential pairs of GOT indirect access instructions.
-      for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
-        // Look for the initial GOT indirect load.
-        if (isGOTPLDpc(*BBI)) {
-          GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
-                                    BBI->getOperand(0).getReg(),
-                                    PPC::NoRegister, true};
-          CandPairs.push_back(CurrentPair);
-          continue;
-        }
+  bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
+    MachineFunction *MF = MBB.getParent();
+    // Add this linker opt only if we are using PC Relative memops.
+    if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
+      return false;
+
+    // Struct to keep track of one def/use pair for a GOT indirect access.
+    struct GOTDefUsePair {
+      MachineBasicBlock::iterator DefInst;
+      MachineBasicBlock::iterator UseInst;
+      Register DefReg;
+      Register UseReg;
+      bool StillValid;
+    };
+    // Vector of def/ues pairs in this basic block.
+    SmallVector<GOTDefUsePair, 4> CandPairs;
+    SmallVector<GOTDefUsePair, 4> ValidPairs;
+    bool MadeChange = false;
+
+    // Run through all of the instructions in the basic block and try to
+    // collect potential pairs of GOT indirect access instructions.
+    for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
+      // Look for the initial GOT indirect load.
+      if (isGOTPLDpc(*BBI)) {
+        GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
+                                  BBI->getOperand(0).getReg(), PPC::NoRegister,
+                                  true};
+        CandPairs.push_back(CurrentPair);
+        continue;
+      }
 
-        // We haven't encountered any new PLD instructions, nothing to check.
-        if (CandPairs.empty())
+      // We haven't encountered any new PLD instructions, nothing to check.
+      if (CandPairs.empty())
+        continue;
+
+      // Run through the candidate pairs and see if any of the registers
+      // defined in the PLD instructions are used by this instruction.
+      // Note: the size of CandPairs can change in the loop.
+      for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
+        GOTDefUsePair &Pair = CandPairs[Idx];
+        // The instruction does not use or modify this PLD's def reg,
+        // ignore it.
+        if (!BBI->readsRegister(Pair.DefReg, TRI) &&
+            !BBI->modifiesRegister(Pair.DefReg, TRI))
           continue;
 
-        // Run through the candidate pairs and see if any of the registers
-        // defined in the PLD instructions are used by this instruction.
-        // Note: the size of CandPairs can change in the loop.
-        for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
-          GOTDefUsePair &Pair = CandPairs[Idx];
-          // The instruction does not use or modify this PLD's def reg,
-          // ignore it.
-          if (!BBI->readsRegister(Pair.DefReg, TRI) &&
-              !BBI->modifiesRegister(Pair.DefReg, TRI))
-            continue;
-
-          // The use needs to be used in the address compuation and not
-          // as the register being stored for a store.
-          const MachineOperand *UseOp =
-              hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
-
-          // Check for a valid use.
-          if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
-              UseOp->isUse() && UseOp->isKill()) {
-            Pair.UseInst = BBI;
-            Pair.UseReg = BBI->getOperand(0).getReg();
-            ValidPairs.push_back(Pair);
-          }
-          CandPairs.erase(CandPairs.begin() + Idx);
+        // The use needs to be used in the address compuation and not
+        // as the register being stored for a store.
+        const MachineOperand *UseOp =
+            hasPCRelativeForm(BBI->getOpcode()) ? &BBI->getOperand(2) : nullptr;
+
+        // Check for a valid use.
+        if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
+            UseOp->isUse() && UseOp->isKill()) {
+          Pair.UseInst = BBI;
+          Pair.UseReg = BBI->getOperand(0).getReg();
+          ValidPairs.push_back(Pair);
         }
+        CandPairs.erase(CandPairs.begin() + Idx);
       }
+    }
 
-      // Go through all of the pairs and check for any more valid uses.
-      for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
-        // We shouldn't be here if we don't have a valid pair.
-        assert(Pair->UseInst.isValid() && Pair->StillValid &&
-               "Kept an invalid def/use pair for GOT PCRel opt");
-        // We have found a potential pair. Search through the instructions
-        // between the def and the use to see if it is valid to mark this as a
-        // linker opt.
-        MachineBasicBlock::iterator BBI = Pair->DefInst;
-        ++BBI;
-        for (; BBI != Pair->UseInst; ++BBI) {
-          if (BBI->readsRegister(Pair->UseReg, TRI) ||
-              BBI->modifiesRegister(Pair->UseReg, TRI)) {
-            Pair->StillValid = false;
-            break;
-          }
+    // Go through all of the pairs and check for any more valid uses.
+    for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
+      // We shouldn't be here if we don't have a valid pair.
+      assert(Pair->UseInst.isValid() && Pair->StillValid &&
+             "Kept an invalid def/use pair for GOT PCRel opt");
+      // We have found a potential pair. Search through the instructions
+      // between the def and the use to see if it is valid to mark this as a
+      // linker opt.
+      MachineBasicBlock::iterator BBI = Pair->DefInst;
+      ++BBI;
+      for (; BBI != Pair->UseInst; ++BBI) {
+        if (BBI->readsRegister(Pair->UseReg, TRI) ||
+            BBI->modifiesRegister(Pair->UseReg, TRI)) {
+          Pair->StillValid = false;
+          break;
         }
-
-        if (!Pair->StillValid)
-          continue;
-
-        // The load/store instruction that uses the address from the PLD will
-        // either use a register (for a store) or define a register (for the
-        // load). That register will be added as an implicit def to the PLD
-        // and as an implicit use on the second memory op. This is a precaution
-        // to prevent future passes from using that register between the two
-        // instructions.
-        MachineOperand ImplDef =
-            MachineOperand::CreateReg(Pair->UseReg, true, true);
-        MachineOperand ImplUse =
-            MachineOperand::CreateReg(Pair->UseReg, false, true);
-        Pair->DefInst->addOperand(ImplDef);
-        Pair->UseInst->addOperand(ImplUse);
-
-        // Create the symbol.
-        MCContext &Context = MF->getContext();
-        MCSymbol *Symbol =
-            Context.createTempSymbol(Twine("pcrel"), false, false);
-        MachineOperand PCRelLabel =
-            MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
-        Pair->DefInst->addOperand(*MF, PCRelLabel);
-        Pair->UseInst->addOperand(*MF, PCRelLabel);
-        MadeChange |= true;
       }
-      return MadeChange;
+
+      if (!Pair->StillValid)
+        continue;
+
+      // The load/store instruction that uses the address from the PLD will
+      // either use a register (for a store) or define a register (for the
+      // load). That register will be added as an implicit def to the PLD
+      // and as an implicit use on the second memory op. This is a precaution
+      // to prevent future passes from using that register between the two
+      // instructions.
+      MachineOperand ImplDef =
+          MachineOperand::CreateReg(Pair->UseReg, true, true);
+      MachineOperand ImplUse =
+          MachineOperand::CreateReg(Pair->UseReg, false, true);
+      Pair->DefInst->addOperand(ImplDef);
+      Pair->UseInst->addOperand(ImplUse);
+
+      // Create the symbol.
+      MCContext &Context = MF->getContext();
+      MCSymbol *Symbol = Context.createTempSymbol(Twine("pcrel"), false, false);
+      MachineOperand PCRelLabel =
+          MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
+      Pair->DefInst->addOperand(*MF, PCRelLabel);
+      Pair->UseInst->addOperand(*MF, PCRelLabel);
+      MadeChange |= true;
     }
+    return MadeChange;
+  }
 
-    bool runOnMachineFunction(MachineFunction &MF) override {
-      if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
-        // Remove UNENCODED_NOP even when this pass is disabled.
-        // This needs to be done unconditionally so we don't emit zeros
-        // in the instruction stream.
-        SmallVector<MachineInstr *, 4> InstrsToErase;
-        for (MachineBasicBlock &MBB : MF)
-          for (MachineInstr &MI : MBB)
-            if (MI.getOpcode() == PPC::UNENCODED_NOP)
-              InstrsToErase.push_back(&MI);
-        for (MachineInstr *MI : InstrsToErase)
-          MI->eraseFromParent();
-        return false;
-      }
-      bool Changed = false;
-      const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
-      const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
+      // Remove UNENCODED_NOP even when this pass is disabled.
+      // This needs to be done unconditionally so we don't emit zeros
+      // in the instruction stream.
       SmallVector<MachineInstr *, 4> InstrsToErase;
-      for (MachineBasicBlock &MBB : MF) {
-        Changed |= removeRedundantLIs(MBB, TRI);
-        Changed |= addLinkerOpt(MBB, TRI);
-        for (MachineInstr &MI : MBB) {
-          unsigned Opc = MI.getOpcode();
-          if (Opc == PPC::UNENCODED_NOP) {
+      for (MachineBasicBlock &MBB : MF)
+        for (MachineInstr &MI : MBB)
+          if (MI.getOpcode() == PPC::UNENCODED_NOP)
+            InstrsToErase.push_back(&MI);
+      for (MachineInstr *MI : InstrsToErase)
+        MI->eraseFromParent();
+      return false;
+    }
+    bool Changed = false;
+    const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+    SmallVector<MachineInstr *, 4> InstrsToErase;
+    for (MachineBasicBlock &MBB : MF) {
+      Changed |= removeRedundantLIs(MBB, TRI);
+      Changed |= addLinkerOpt(MBB, TRI);
+      for (MachineInstr &MI : MBB) {
+        unsigned Opc = MI.getOpcode();
+        if (Opc == PPC::UNENCODED_NOP) {
+          InstrsToErase.push_back(&MI);
+          continue;
+        }
+        // Detect self copies - these can result from running AADB.
+        if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
+          const MCInstrDesc &MCID = TII->get(Opc);
+          if (MCID.getNumOperands() == 3 &&
+              MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
+              MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
+            NumberOfSelfCopies++;
+            LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+            LLVM_DEBUG(MI.dump());
             InstrsToErase.push_back(&MI);
             continue;
-          }
-          // Detect self copies - these can result from running AADB.
-          if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
-            const MCInstrDesc &MCID = TII->get(Opc);
-            if (MCID.getNumOperands() == 3 &&
-                MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
-                MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
-              NumberOfSelfCopies++;
-              LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
-              LLVM_DEBUG(MI.dump());
-              InstrsToErase.push_back(&MI);
-              continue;
-            }
-            else if (MCID.getNumOperands() == 2 &&
+          } else if (MCID.getNumOperands() == 2 &&
                      MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
-              NumberOfSelfCopies++;
-              LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
-              LLVM_DEBUG(MI.dump());
-              InstrsToErase.push_back(&MI);
-              continue;
-            }
-          }
-          MachineInstr *DefMIToErase = nullptr;
-          if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
-            Changed = true;
-            NumRRConvertedInPreEmit++;
-            LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
-            LLVM_DEBUG(MI.dump());
-            if (DefMIToErase) {
-              InstrsToErase.push_back(DefMIToErase);
-            }
-          }
-          if (TII->foldFrameOffset(MI)) {
-            Changed = true;
-            NumFrameOffFoldInPreEmit++;
-            LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
+            NumberOfSelfCopies++;
+            LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
             LLVM_DEBUG(MI.dump());
+            InstrsToErase.push_back(&MI);
+            continue;
           }
         }
-
-        // Eliminate conditional branch based on a constant CR bit by
-        // CRSET or CRUNSET. We eliminate the conditional branch or
-        // convert it into an unconditional branch. Also, if the CR bit
-        // is not used by other instructions, we eliminate CRSET as well.
-        auto I = MBB.getFirstInstrTerminator();
-        if (I == MBB.instr_end())
-          continue;
-        MachineInstr *Br = &*I;
-        if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
-          continue;
-        MachineInstr *CRSetMI = nullptr;
-        Register CRBit = Br->getOperand(0).getReg();
-        unsigned CRReg = getCRFromCRBit(CRBit);
-        bool SeenUse = false;
-        MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
-        for (It++; It != Er; It++) {
-          if (It->modifiesRegister(CRBit, TRI)) {
-            if ((It->getOpcode() == PPC::CRUNSET ||
-                 It->getOpcode() == PPC::CRSET) &&
-                It->getOperand(0).getReg() == CRBit)
-              CRSetMI = &*It;
-            break;
+        MachineInstr *DefMIToErase = nullptr;
+        if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
+          Changed = true;
+          NumRRConvertedInPreEmit++;
+          LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
+          LLVM_DEBUG(MI.dump());
+          if (DefMIToErase) {
+            InstrsToErase.push_back(DefMIToErase);
           }
-          if (It->readsRegister(CRBit, TRI))
-            SeenUse = true;
         }
-        if (!CRSetMI) continue;
-
-        unsigned CRSetOp = CRSetMI->getOpcode();
-        if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
-            (Br->getOpcode() == PPC::BC  && CRSetOp == PPC::CRUNSET)) {
-          // Remove this branch since it cannot be taken.
-          InstrsToErase.push_back(Br);
-          MBB.removeSuccessor(Br->getOperand(1).getMBB());
-        }
-        else {
-          // This conditional branch is always taken. So, remove all branches
-          // and insert an unconditional branch to the destination of this.
-          MachineBasicBlock::iterator It = Br, Er = MBB.end();
-          for (; It != Er; It++) {
-            if (It->isDebugInstr()) continue;
-            assert(It->isTerminator() && "Non-terminator after a terminator");
-            InstrsToErase.push_back(&*It);
-          }
-          if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
-            ArrayRef<MachineOperand> NoCond;
-            TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
-                              NoCond, Br->getDebugLoc());
-          }
-          for (auto &Succ : MBB.successors())
-            if (Succ != Br->getOperand(1).getMBB()) {
-              MBB.removeSuccessor(Succ);
-              break;
-            }
+        if (TII->foldFrameOffset(MI)) {
+          Changed = true;
+          NumFrameOffFoldInPreEmit++;
+          LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
+          LLVM_DEBUG(MI.dump());
         }
+      }
 
-        // If the CRBit is not used by another instruction, we can eliminate
-        // CRSET/CRUNSET instruction.
-        if (!SeenUse) {
-          // We need to check use of the CRBit in successors.
-          for (auto &SuccMBB : MBB.successors())
-            if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
-              SeenUse = true;
-              break;
-            }
-          if (!SeenUse)
-            InstrsToErase.push_back(CRSetMI);
+      // Eliminate conditional branch based on a constant CR bit by
+      // CRSET or CRUNSET. We eliminate the conditional branch or
+      // convert it into an unconditional branch. Also, if the CR bit
+      // is not used by other instructions, we eliminate CRSET as well.
+      auto I = MBB.getFirstInstrTerminator();
+      if (I == MBB.instr_end())
+        continue;
+      MachineInstr *Br = &*I;
+      if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
+        continue;
+      MachineInstr *CRSetMI = nullptr;
+      Register CRBit = Br->getOperand(0).getReg();
+      unsigned CRReg = getCRFromCRBit(CRBit);
+      bool SeenUse = false;
+      MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
+      for (It++; It != Er; It++) {
+        if (It->modifiesRegister(CRBit, TRI)) {
+          if ((It->getOpcode() == PPC::CRUNSET ||
+               It->getOpcode() == PPC::CRSET) &&
+              It->getOperand(0).getReg() == CRBit)
+            CRSetMI = &*It;
+          break;
         }
+        if (It->readsRegister(CRBit, TRI))
+          SeenUse = true;
       }
-      for (MachineInstr *MI : InstrsToErase) {
-        LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
-        LLVM_DEBUG(MI->dump());
-        MI->eraseFromParent();
-        NumRemovedInPreEmit++;
+      if (!CRSetMI)
+        continue;
+
+      unsigned CRSetOp = CRSetMI->getOpcode();
+      if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
+          (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) {
+        // Remove this branch since it cannot be taken.
+        InstrsToErase.push_back(Br);
+        MBB.removeSuccessor(Br->getOperand(1).getMBB());
+      } else {
+        // This conditional branch is always taken. So, remove all branches
+        // and insert an unconditional branch to the destination of this.
+        MachineBasicBlock::iterator It = Br, Er = MBB.end();
+        for (; It != Er; It++) {
+          if (It->isDebugInstr())
+            continue;
+          assert(It->isTerminator() && "Non-terminator after a terminator");
+          InstrsToErase.push_back(&*It);
+        }
+        if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
+          ArrayRef<MachineOperand> NoCond;
+          TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr, NoCond,
+                            Br->getDebugLoc());
+        }
+        for (auto &Succ : MBB.successors())
+          if (Succ != Br->getOperand(1).getMBB()) {
+            MBB.removeSuccessor(Succ);
+            break;
+          }
+      }
+
+      // If the CRBit is not used by another instruction, we can eliminate
+      // CRSET/CRUNSET instruction.
+      if (!SeenUse) {
+        // We need to check use of the CRBit in successors.
+        for (auto &SuccMBB : MBB.successors())
+          if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
+            SeenUse = true;
+            break;
+          }
+        if (!SeenUse)
+          InstrsToErase.push_back(CRSetMI);
       }
-      return Changed;
     }
+    for (MachineInstr *MI : InstrsToErase) {
+      LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
+      LLVM_DEBUG(MI->dump());
+      MI->eraseFromParent();
+      NumRemovedInPreEmit++;
+    }
+    return Changed;
+  }
   };
 }