Index: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -928,6 +928,11 @@ const MCExpr *Exp = MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO), MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); + if (!MO.isJTI() && MO.getOffset()) + Exp = MCBinaryExpr::createAdd(Exp, + MCConstantExpr::create(MO.getOffset(), + OutContext), + OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -6576,6 +6576,12 @@ // immediate operand, add it now. if (ReplaceFlags) { if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { + // ADDI y, x, GA{off1} + // LFD z, off2(y) + // ==> + // LFD z, GA{off1+off2}(x) + Offset += GA->getOffset(); + SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); // We can't perform this optimization for data whose alignment Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1198,6 +1198,7 @@ SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase, DAGCombinerInfo &DCI) const; + SDValue combineADDOnTOCEntry(SDNode *N, SelectionDAG &DAG) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it /// (2) keeping the result of comparison in GPR has performance benefit. Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15486,10 +15486,62 @@ return SDValue(); } +SDValue PPCTargetLowering::combineADDOnTOCEntry(SDNode *N, + SelectionDAG &DAG) const { + // The addend in the TOC relocation isn't supported by all platforms. + if (!Subtarget.isELFv2ABI()) + return SDValue(); + + // Combine the code seq: + // x = TOC_ENTRY + // y = add x, offset2 + // to + // y = TOC_ENTRY + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + ConstantSDNode *Offset = dyn_cast(Op1); + MemIntrinsicSDNode *TocEntry = dyn_cast(Op0); + if (!Offset || !TocEntry || TocEntry->getOpcode() != PPCISD::TOC_ENTRY) + return SDValue(); + + // Only combine the add TOC_ENTRY for globals. + SDValue GA = TocEntry->getOperand(0); + GlobalAddressSDNode *Addr = dyn_cast(GA); + if (!Addr) + return SDValue(); + + // If the global is accessed as got-indirect, a load is needed to + // load the address of the global from TOC entry. It is unsafe to fold the + // offset into globals. + if (isAccessedAsGotIndirect(GA)) + return SDValue(); + + // This combine will require the linker to use an additional TOC entry to + // compute the address. Therefore, do nothing for offset that fit in a + // 16-bit signed value already fit into the displacement field of LDtocL. + // Offsets larger than a 32-bit signed value will still not be reachable + // by this method. So we only combine if 16 < size of offset in bits < 32. + int64_t Addend = Addr->getOffset() + Offset->getSExtValue(); + if (isInt<16>(Addend) || !isInt<32>(Addend)) + return SDValue(); + + // Creating new global with offset, and new TOC with the new global. + assert(Addr->getValueType(0) == MVT::i64 && "The address must be i64"); + SDValue NewAddr = DAG.getTargetGlobalAddress(Addr->getGlobal(), + SDLoc(Addr), + MVT::i64, + Addend, + Addr->getTargetFlags()); + return getTOCEntry(DAG, SDLoc(TocEntry), NewAddr); +} + SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget)) return Value; + if (auto Value = combineADDOnTOCEntry(N, DCI.DAG)) + return Value; + return SDValue(); } Index: llvm/test/CodeGen/PowerPC/toc-float.ll =================================================================== --- llvm/test/CodeGen/PowerPC/toc-float.ll +++ llvm/test/CodeGen/PowerPC/toc-float.ll @@ -1,5 +1,6 @@ ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 <%s | FileCheck -check-prefix=CHECK-P9 %s ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 <%s | FileCheck -check-prefix=CHECK-P8 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ppc-late-peephole=false <%s | FileCheck -check-prefix=CHECK-P8-NOPEEPHOLE %s ; As the constant could be represented as float, a float is ; loaded from constant pool. @@ -81,17 +82,35 @@ ; Access an element with an offset that doesn't fit in the displacement field of LFD. ; CHECK-P9-LABEL: doubleLargeConstantArray -; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha -; CHECK-P9: li [[REG2:[0-9]+]], 0 -; CHECK-P9: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l -; CHECK-P9: ori [[REG4:[0-9]+]], [[REG2]], 32768 -; CHECK-P9: lfdx {{[0-9]+}}, [[REG3]], [[REG4]] +; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[ADDEND:[0-9]+]] +; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[ADDEND]]([[REG1]]) ; CHECK-P8-LABEL: doubleLargeConstantArray +; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[ADDEND:[0-9]+]] +; CHECK-P8: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[ADDEND]]([[REG1]]) +; CHECK-P8-NOPEEPHOLE-LABEL: doubleLargeConstantArray +; CHECK-P8-NOPEEPHOLE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[ADDEND:[0-9]+]] +; CHECK-P8-NOPEEPHOLE: addi [[REG3:[0-9]+]], [[REG1]], [[VAR]]@toc@l+[[ADDEND]] +; CHECK-P8-NOPEEPHOLE: lfdx {{[0-9]+}}, 0, [[REG3]] +} + +@arr2 = hidden local_unnamed_addr global [20000 x double] zeroinitializer, align 8 + +define double @doubleLargeConstantArray2() { + %1 = load double, double* getelementptr inbounds ([20000 x double], [20000 x double]* @arr2, i64 0, i64 0), align 8 + %2 = load double, double* getelementptr inbounds ([20000 x double], [20000 x double]* @arr2, i64 0, i64 8095), align 8 + %3 = fadd double %1, %2 + ret double %3 + +; CHECK-P8-LABEL: doubleLargeConstantArray2 ; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha -; CHECK-P8: li [[REG2:[0-9]+]], 0 -; CHECK-P8: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l -; CHECK-P8: ori [[REG4:[0-9]+]], [[REG2]], 32768 -; CHECK-P8: lfdx {{[0-9]+}}, [[REG3]], [[REG4]] +; CHECK-P8: addis [[REG2:[0-9]+]], 2, [[VAR]]@toc@ha+[[ADDEND:[0-9]+]] +; CHECK-P8: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK-P8: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[ADDEND]]([[REG2]]) +; CHECK-P9-LABEL: doubleLargeConstantArray2 +; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK-P9: addis [[REG2:[0-9]+]], 2, [[VAR]]@toc@ha+[[ADDEND:[0-9]+]] +; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[ADDEND]]([[REG2]]) } @vec_arr = global [10 x <4 x i32>] zeroinitializer, align 16