Index: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -928,6 +928,11 @@
     const MCExpr *Exp =
         MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO),
                                 MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
+    if (!MO.isJTI() && MO.getOffset())
+      Exp = MCBinaryExpr::createAdd(Exp,
+                                    MCConstantExpr::create(MO.getOffset(),
+                                                           OutContext),
+                                    OutContext);
     TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -6576,6 +6576,12 @@
     // immediate operand, add it now.
     if (ReplaceFlags) {
       if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+        // ADDI y, x, GA{off1}
+        // LFD z, off2(y)
+        // ==>
+        // LFD z, GA{off1+off2}(x)
+        Offset += GA->getOffset();
+
         SDLoc dl(GA);
         const GlobalValue *GV = GA->getGlobal();
         // We can't perform this optimization for data whose alignment
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1198,6 +1198,7 @@
     SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
                                  DAGCombinerInfo &DCI) const;
 
+    SDValue combineADDOnTOCEntry(SDNode *N, SelectionDAG &DAG) const;
     /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
     /// SETCC with integer subtraction when (1) there is a legal way of doing it
     /// (2) keeping the result of comparison in GPR has performance benefit.
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15486,10 +15486,62 @@
   return SDValue();
 }
 
+SDValue PPCTargetLowering::combineADDOnTOCEntry(SDNode *N,
+                                                SelectionDAG &DAG) const {
+  // The addend in the TOC relocation isn't supported by all platforms.
+  if (!Subtarget.isELFv2ABI())
+    return SDValue();
+
+  // Combine the code seq:
+  // x = TOC_ENTRY<Global{offset1}>
+  // y = add x, offset2
+  // to
+  // y = TOC_ENTRY<Global{offset1 + offset2}>
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op1);
+  MemIntrinsicSDNode *TocEntry = dyn_cast<MemIntrinsicSDNode>(Op0);
+  if (!Offset || !TocEntry || TocEntry->getOpcode() != PPCISD::TOC_ENTRY)
+    return SDValue();
+
+  // Only combine the add TOC_ENTRY for globals. 
+  SDValue GA = TocEntry->getOperand(0);
+  GlobalAddressSDNode *Addr = dyn_cast<GlobalAddressSDNode>(GA);
+  if (!Addr)
+    return SDValue();
+
+  // If the global is accessed as got-indirect, a load is needed to
+  // load the address of the global from TOC entry. It is unsafe to fold the
+  // offset into globals.
+  if (isAccessedAsGotIndirect(GA))
+    return SDValue();
+
+  // This combine will require the linker to use an additional TOC entry to
+  // compute the address. Therefore, do nothing for offset that fit in a 
+  // 16-bit signed value already fit into the displacement field of LDtocL.
+  // Offsets larger than a 32-bit signed value will still not be reachable
+  // by this method. So we only combine if 16 < size of offset in bits < 32. 
+  int64_t Addend = Addr->getOffset() + Offset->getSExtValue();
+  if (isInt<16>(Addend) || !isInt<32>(Addend))
+    return SDValue();
+
+  // Creating new global with offset, and new TOC with the new global.
+  assert(Addr->getValueType(0) == MVT::i64 && "The address must be i64");
+  SDValue NewAddr = DAG.getTargetGlobalAddress(Addr->getGlobal(),
+                                               SDLoc(Addr),
+                                               MVT::i64,
+                                               Addend,
+                                               Addr->getTargetFlags());
+  return getTOCEntry(DAG, SDLoc(TocEntry), NewAddr);
+}
+
 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
     return Value;
 
+  if (auto Value = combineADDOnTOCEntry(N, DCI.DAG))
+    return Value;
+
   return SDValue();
 }
 
Index: llvm/test/CodeGen/PowerPC/toc-float.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/toc-float.ll
+++ llvm/test/CodeGen/PowerPC/toc-float.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 <%s | FileCheck -check-prefix=CHECK-P9 %s
 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 <%s | FileCheck -check-prefix=CHECK-P8 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ppc-late-peephole=false <%s | FileCheck -check-prefix=CHECK-P8-NOPEEPHOLE %s
 
 ; As the constant could be represented as float, a float is
 ; loaded from constant pool.
@@ -81,17 +82,35 @@
 
 ; Access an element with an offset that doesn't fit in the displacement field of LFD. 
 ; CHECK-P9-LABEL: doubleLargeConstantArray
-; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
-; CHECK-P9: li [[REG2:[0-9]+]], 0 
-; CHECK-P9: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l
-; CHECK-P9: ori [[REG4:[0-9]+]], [[REG2]], 32768 
-; CHECK-P9: lfdx {{[0-9]+}}, [[REG3]], [[REG4]] 
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[ADDEND:[0-9]+]]
+; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[ADDEND]]([[REG1]])
 ; CHECK-P8-LABEL: doubleLargeConstantArray
+; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[ADDEND:[0-9]+]]
+; CHECK-P8: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[ADDEND]]([[REG1]])
+; CHECK-P8-NOPEEPHOLE-LABEL: doubleLargeConstantArray
+; CHECK-P8-NOPEEPHOLE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[ADDEND:[0-9]+]]
+; CHECK-P8-NOPEEPHOLE: addi [[REG3:[0-9]+]], [[REG1]], [[VAR]]@toc@l+[[ADDEND]]
+; CHECK-P8-NOPEEPHOLE: lfdx {{[0-9]+}}, 0, [[REG3]]
+}
+
+@arr2 = hidden local_unnamed_addr global [20000 x double] zeroinitializer, align 8
+
+define double @doubleLargeConstantArray2()  {
+  %1 = load double, double* getelementptr inbounds ([20000 x double], [20000 x double]* @arr2, i64 0, i64 0), align 8
+  %2 = load double, double* getelementptr inbounds ([20000 x double], [20000 x double]* @arr2, i64 0, i64 8095), align 8
+  %3 = fadd double %1, %2
+  ret double %3
+
+; CHECK-P8-LABEL: doubleLargeConstantArray2
 ; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
-; CHECK-P8: li [[REG2:[0-9]+]], 0 
-; CHECK-P8: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l
-; CHECK-P8: ori [[REG4:[0-9]+]], [[REG2]], 32768 
-; CHECK-P8: lfdx {{[0-9]+}}, [[REG3]], [[REG4]] 
+; CHECK-P8: addis [[REG2:[0-9]+]], 2, [[VAR]]@toc@ha+[[ADDEND:[0-9]+]]
+; CHECK-P8: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK-P8: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[ADDEND]]([[REG2]])
+; CHECK-P9-LABEL: doubleLargeConstantArray2
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK-P9: addis [[REG2:[0-9]+]], 2, [[VAR]]@toc@ha+[[ADDEND:[0-9]+]]
+; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[ADDEND]]([[REG2]])
 }
 
 @vec_arr = global [10 x <4 x i32>] zeroinitializer, align 16