diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -397,6 +397,11 @@ // The list of all input sections. extern std::vector inputSections; +// The set of TOC entries (.toc + addend) for which we should not apply +// toc-indirect to toc-relative relaxation. const Symbol * refers to the +// STT_SECTION symbol associated to the .toc input section. +extern llvm::DenseSet> ppc64noTocRelax; + } // namespace elf std::string toString(const elf::InputSectionBase *); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -45,6 +45,7 @@ namespace elf { std::vector inputSections; +DenseSet> ppc64noTocRelax; template static ArrayRef getSectionContents(ObjFile &file, @@ -970,7 +971,13 @@ target->relaxGot(bufLoc, rel, targetVA); break; case R_PPC64_RELAX_TOC: - if (!tryRelaxPPC64TocIndirection(rel, bufLoc)) + // rel.sym refers to the STT_SECTION symbol associated to the .toc input + // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC + // entry, there may be R_PPC64_TOC16_HA not paired with + // R_PPC64_TOC16_LO_DS. Don't relax. This loses some relaxation + // opportunities but is safe. + if (ppc64noTocRelax.count({rel.sym, rel.addend}) || + !tryRelaxPPC64TocIndirection(rel, bufLoc)) target->relocate(bufLoc, rel, targetVA); break; case R_RELAX_TLS_IE_TO_LE: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1294,17 +1294,6 @@ if (expr == R_NONE) return; - // We can separate the small code model relocations into 2 categories: - // 1) Those that access the compiler generated .toc sections. - // 2) Those that access the linker allocated got entries. - // lld allocates got entries to symbols on demand. Since we don't try to sort - // the got entries in any way, we don't have to track which objects have - // got-based small code model relocs. The .toc sections get placed after the - // end of the linker allocated .got section and we do sort those so sections - // addressed with small code model relocations come first. - if (config->emachine == EM_PPC64 && isPPC64SmallCodeModelTocReloc(type)) - sec.file->ppc64SmallCodeModelTocRelocs = true; - if (sym.isGnuIFunc() && !config->zText && config->warnIfuncTextrel) { warn("using ifunc symbols when text relocations are allowed may produce " "a binary that will segfault, if the object file is linked with " @@ -1318,6 +1307,25 @@ // Read an addend. int64_t addend = computeAddend(rel, end, sec, expr, sym.isLocal()); + if (config->emachine == EM_PPC64) { + // We can separate the small code model relocations into 2 categories: + // 1) Those that access the compiler generated .toc sections. + // 2) Those that access the linker allocated got entries. + // lld allocates got entries to symbols on demand. Since we don't try to + // sort the got entries in any way, we don't have to track which objects + // have got-based small code model relocs. The .toc sections get placed + // after the end of the linker allocated .got section and we do sort those + // so sections addressed with small code model relocations come first. + if (isPPC64SmallCodeModelTocReloc(type)) + sec.file->ppc64SmallCodeModelTocRelocs = true; + + // Record the TOC entry (.toc + addend) as not relaxable. See the comment in + // InputSectionBase::relocateAlloc(). + if (type == R_PPC64_TOC16_LO && sym.isSection() && isa(sym) && + cast(sym).section->name == ".toc") + ppc64noTocRelax.insert({&sym, addend}); + } + // Relax relocations. // // If we know that a PLT entry will be resolved within the same ELF module, we diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1933,6 +1933,7 @@ // we can correctly decide if a dynamic relocation is needed. This is called // after processSymbolAssignments() because it needs to know whether a // linker-script-defined symbol is absolute. + ppc64noTocRelax.clear(); if (!config->relocatable) { forEachRelSec(scanRelocations); reportUndefinedSymbols(); diff --git a/lld/test/ELF/ppc64-toc-relax2.s b/lld/test/ELF/ppc64-toc-relax2.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/ppc64-toc-relax2.s @@ -0,0 +1,66 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o +# RUN: echo 'addis 5, 2, .LC0@toc@ha; ld 5, .LC0@toc@l(5); foo: \ +# RUN: .section .toc,"aw",@progbits; .LC0: .tc foo[TC], foo' \ +# RUN: | llvm-mc -filetype=obj -triple=powerpc64le - -o %t1.o +# RUN: ld.lld %t.o %t1.o -o %t +# RUN: llvm-objdump -d %t | FileCheck %s + +# CHECK-LABEL: <_start>: +.globl _start +_start: +## Perform toc-indirect to toc-relative relaxation even if there are unrelated instructions in between. +# CHECK-NEXT: addis 3, 2, -2 +# CHECK-NEXT: li 9, 0 +# CHECK-NEXT: addi 3, 3, 32752 +# CHECK-NEXT: lwa 3, 0(3) + addis 3, 2, .LC1@toc@ha # R_PPC64_TOC16_HA + li 9, 0 + ld 3, .LC1@toc@l(3) # R_PPC64_TOC16_LO_DS + lwa 3, 0(3) + +## R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS can interleave. +# CHECK-NEXT: addis 3, 2, -2 +# CHECK-NEXT: addis 4, 2, -2 +# CHECK-NEXT: addi 3, 3, 32752 +# CHECK-NEXT: addi 4, 4, 32756 + addis 3, 2, .LC1@toc@ha + addis 4, 2, .LC2@toc@ha + ld 3, .LC1@toc@l(3) + ld 4, .LC2@toc@l(4) + +## We choose to be conservative: the presence of R_PPC64_TOC16_LO +## suppresses relaxation for the symbol. +## R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS pairs are not relaxed as well. +# CHECK-NEXT: nop +# CHECK-NEXT: addi 3, 2, -32768 +# CHECK-NEXT: li 9, 0 +# CHECK-NEXT: nop +# CHECK-NEXT: ld 4, -32768(2) + addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA + addi 3, 3, .LC0@toc@l # R_PPC64_TOC16_LO + li 9, 0 + addis 4, 2, .LC0@toc@ha + ld 4, .LC0@toc@l(4) + +# CHECK-COUNT-3: blr +AES_encrypt: + blr +AES_decrypt: + blr +BN_free: + blr + +## %t1.o has relaxable relocation pairs referencing its .toc which is different +## from %t.o(.toc). The suppression in %t.o does not affect %t1.o even if +## the relocation addends are the same. +# CHECK-NEXT: addis 5, 2, -1 +# CHECK-NEXT: addi 5, 5, -32768 + +.section .toc,"aw",@progbits +.LC0: + .tc AES_encrypt[TC], AES_encrypt +.LC1: + .tc AES_decrypt[TC], AES_decrypt +.LC2: + .tc BN_free[TC], BN_free