Index: ELF/Arch/PPC64.cpp =================================================================== --- ELF/Arch/PPC64.cpp +++ ELF/Arch/PPC64.cpp @@ -103,6 +103,7 @@ void writeGotHeader(uint8_t *Buf) const override; bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const override; + bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; @@ -509,7 +510,7 @@ break; case R_PPC64_REL24: { uint32_t Mask = 0x03FFFFFC; - checkInt(Loc, Val, 24, Type); + checkInt(Loc, Val, 26, Type); write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask)); break; } @@ -523,9 +524,33 @@ bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const { - // If a function is in the plt it needs to be called through - // a call stub. - return Type == R_PPC64_REL24 && S.isInPlt(); + if (Type == R_PPC64_REL24) { + // If a function is in the plt it needs to be called through + // a call stub. + if (S.isInPlt()) + return true; + + // If a symbol is a weak undefined and we are compiling an executable + // it doesn't need a range-extending thunk since it can't be called. + if (S.isUndefWeak() && !Config->Pic) + return false; + + // If the offset exceeds the range of the branch type then it will need + // a range-extending thunk. + if (!inBranchRange(Type, BranchAddr, S.getVA())) { + return true; + } + } + + return false; +} + +bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { + if (Type != R_PPC64_REL24) + llvm_unreachable("Unexepected relocation type used in branch"); + + int64_t Offset = Dst - Src; + return Offset >= llvm::minIntN(26) && Offset <= llvm::maxIntN(26); } RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data, Index: ELF/Symbols.h =================================================================== --- ELF/Symbols.h +++ ELF/Symbols.h @@ -71,6 +71,9 @@ uint32_t GotIndex = -1; uint32_t PltIndex = -1; uint32_t GlobalDynIndex = -1; + // If this symbol is the target of a long branch which needs a range extending + // thunk, this index will be set. + uint32_t LongBranchTargetIndex = -1; // This field is a index to the symbol's version definition. uint32_t VerdefIndex = -1; @@ -141,6 +144,7 @@ bool isInGot() const { return GotIndex != -1U; } bool isInPlt() const { return PltIndex != -1U; } + bool isLongBranchTarget() const { return LongBranchTargetIndex != -1U; } uint64_t getVA(int64_t Addend = 0) const; @@ -151,6 +155,8 @@ uint64_t getPltVA() const; uint64_t getPltOffset() const; uint64_t getSize() const; + uint64_t getLongBranchGotPltOffset() const; + uint64_t getLongBranchGotPltVA() const; OutputSection *getOutputSection() const; protected: Index: ELF/Symbols.cpp =================================================================== --- ELF/Symbols.cpp +++ ELF/Symbols.cpp @@ -141,6 +141,16 @@ return Target->getPltEntryOffset(PltIndex); } +uint64_t Symbol::getLongBranchGotPltOffset() const { + assert(LongBranchTargetIndex != -1); + return (Target->GotPltHeaderEntriesNum + LongBranchTargetIndex) * + Target->GotPltEntrySize; +} + +uint64_t Symbol::getLongBranchGotPltVA() const { + return InX::GotPlt->getVA() + getLongBranchGotPltOffset(); +} + uint64_t Symbol::getSize() const { if (const auto *DR = dyn_cast(this)) return DR->Size; Index: ELF/SyntheticSections.h =================================================================== --- ELF/SyntheticSections.h +++ ELF/SyntheticSections.h @@ -355,12 +355,16 @@ public: GotPltSection(); void addEntry(Symbol &Sym); + void addLongBranch(Symbol &Sym); size_t getSize() const override; void writeTo(uint8_t *Buf) override; bool empty() const override; + void postThunkContents() override; private: std::vector Entries; + int LongBranchTargets = 0; + bool PostThunk = false; }; // The IgotPltSection is a Got associated with the PltSection for GNU Ifunc Index: ELF/SyntheticSections.cpp =================================================================== --- ELF/SyntheticSections.cpp +++ ELF/SyntheticSections.cpp @@ -1080,11 +1080,18 @@ void GotPltSection::addEntry(Symbol &Sym) { assert(Sym.PltIndex == Entries.size()); + assert(LongBranchTargets == 0); Entries.push_back(&Sym); } +void GotPltSection::addLongBranch(Symbol &Sym) { + assert(Sym.LongBranchTargetIndex == -1U); + Sym.LongBranchTargetIndex = (Entries.size() + LongBranchTargets); + ++LongBranchTargets; +} + size_t GotPltSection::getSize() const { - return (Target->GotPltHeaderEntriesNum + Entries.size()) * + return (Target->GotPltHeaderEntriesNum + Entries.size() + LongBranchTargets) * Target->GotPltEntrySize; } @@ -1098,13 +1105,25 @@ } bool GotPltSection::empty() const { - // We need to emit a GOT.PLT even if it's empty if there's a symbol that - // references the _GLOBAL_OFFSET_TABLE_ and the Target defines the symbol - // relative to the .got.plt section. - return Entries.empty() && + // There are a couple of situations where we must emit a GOT.PLT even if it is + // empty. The first is if there is a symbol that references the + // _GLOBAL_OFFSET_TABLE_ and the Target defines the symbol relative to the + // .got.plt section. The second is on PowerPC64 when we are linking + // position-independent code. The GOT.PLT section is used to store a table of + // long branch targets for range-extending thunks. Empty sections are cleaned + // up before we emit thunks, so we need to keep this section around in + // this case. + if (Config->EMachine == EM_PPC64 && Config->Pic && !PostThunk) + return false; + + return (Entries.empty() && !LongBranchTargets) && !(ElfSym::GlobalOffsetTable && Target->GotBaseSymInGotPlt); } +void GotPltSection::postThunkContents() { + PostThunk = true; +} + static StringRef getIgotPltName() { // On ARM the IgotPltSection is part of the GotSection. if (Config->EMachine == EM_ARM) Index: ELF/Thunks.cpp =================================================================== --- ELF/Thunks.cpp +++ ELF/Thunks.cpp @@ -209,6 +209,35 @@ void addSymbols(ThunkSection &IS) override; }; +// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte +// alignment. This gives a possible 26 bits of 'reach'. If the callee is further +// then that we need to emit a long-branch thunk. In position independent code +// the long-branch target address is stored in the GotPlt (which on PPC64 is the +// .plt section), with a corresponding relative dynamic relocation. Since only +// local calls might need a long-branch thunk (since non-local calls already +// have plt stubs), the local entry point is used as the branch target. +class PPC64PILongBranchThunk final : public Thunk { +public: + PPC64PILongBranchThunk(Symbol &Dest) : Thunk(Dest) { + assert(!Dest.IsPreemptible); + if (!Dest.isLongBranchTarget()) { + InX::GotPlt->addLongBranch(Dest); + + uint32_t LocalEntryOffset = 0; + uint8_t Shift = (Dest.StOther >> 5) & 7; + if (Shift > 1) + LocalEntryOffset = 1 << Shift; + + InX::RelaDyn->addReloc({Target->RelativeRel, InX::GotPlt, + Dest.getLongBranchGotPltOffset(), true, &Dest, + LocalEntryOffset}); + } + } + uint32_t size() override { return 16; } + void writeTo(uint8_t *Buf) override; + void addSymbols(ThunkSection &IS) override; +}; + } // end anonymous namespace Defined *Thunk::addSymbol(StringRef Name, uint8_t Type, uint64_t Value, @@ -502,17 +531,20 @@ return dyn_cast(DR.Section); } -void PPC64PltCallStub::writeTo(uint8_t *Buf) { - int64_t Off = Destination.getGotPltVA() - getPPC64TocBase(); - // Need to add 0x8000 to offset to account for the low bits being signed. - uint16_t OffHa = (Off + 0x8000) >> 16; - uint16_t OffLo = Off; +static void writePPCLoadAndBranch(uint8_t *Buf, int64_t Offset) { + uint16_t OffHa = (Offset + 0x8000) >> 16; + uint16_t OffLo = Offset; + write32(Buf + 0, 0x3d820000 | OffHa); // addis r12, r2, OffHa + write32(Buf + 4, 0xe98c0000 | OffLo); // ld r12, OffLo(r12) + write32(Buf + 8, 0x7d8903a6); // mtctr r12 + write32(Buf + 12, 0x4e800420); // bctr +} + +void PPC64PltCallStub::writeTo(uint8_t *Buf) { + int64_t Offset = Destination.getGotPltVA() - getPPC64TocBase(); write32(Buf + 0, 0xf8410018); // std r2,24(r1) - write32(Buf + 4, 0x3d820000 | OffHa); // addis r12,r2, X@plt@to@ha - write32(Buf + 8, 0xe98c0000 | OffLo); // ld r12,X@plt@toc@l(r12) - write32(Buf + 12, 0x7d8903a6); // mtctr r12 - write32(Buf + 16, 0x4e800420); // bctr + writePPCLoadAndBranch(Buf + 4, Offset); } void PPC64PltCallStub::addSymbols(ThunkSection &IS) { @@ -521,6 +553,16 @@ S->NeedsTocRestore = true; } +void PPC64PILongBranchThunk::writeTo(uint8_t *Buf) { + int64_t Offset = Destination.getLongBranchGotPltVA() - getPPC64TocBase(); + writePPCLoadAndBranch(Buf, Offset); +} + +void PPC64PILongBranchThunk::addSymbols(ThunkSection &IS) { + addSymbol(Saver.save("__long_branch_" + Destination.getName()), STT_FUNC, 0, + IS); +} + Thunk::Thunk(Symbol &D) : Destination(D), Offset(0) {} Thunk::~Thunk() = default; @@ -565,8 +607,15 @@ } static Thunk *addThunkPPC64(RelType Type, Symbol &S) { - if (Type == R_PPC64_REL24) - return make(S); + if (Type == R_PPC64_REL24) { + if (S.isInPlt()) + return make(S); + + if (Config->Pic) + return make(S); + else + fatal("Position dependant long_branch thunks not implemented yet!"); + } fatal("unexpected relocation type"); } Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -1683,7 +1683,7 @@ } // createThunks may have added local symbols to the static symbol table - applySynthetic({InX::SymTab}, + applySynthetic({InX::GotPlt, InX::SymTab}, [](SyntheticSection *SS) { SS->postThunkContents(); }); // Fill other section headers. The dynamic table is finalized Index: test/ELF/basic-ppc64.s =================================================================== --- test/ELF/basic-ppc64.s +++ test/ELF/basic-ppc64.s @@ -36,8 +36,8 @@ // CHECK-NEXT: ProgramHeaderEntrySize: 56 // CHECK-NEXT: ProgramHeaderCount: 7 // CHECK-NEXT: SectionHeaderEntrySize: 64 -// CHECK-NEXT: SectionHeaderCount: 10 -// CHECK-NEXT: StringTableSectionIndex: 8 +// CHECK-NEXT: SectionHeaderCount: 11 +// CHECK-NEXT: StringTableSectionIndex: 9 // CHECK-NEXT:} // CHECK-NEXT:Sections [ // CHECK-NEXT: Section { @@ -156,7 +156,23 @@ // CHECK-NEXT: } // CHECK-NEXT: Section { // CHECK-NEXT: Index: 6 -// CHECK-NEXT: Name: .comment (38) +// CHECK-NEXT: Name: .plt (38) +// CHECK-NEXT: Type: SHT_NOBITS (0x8) +// CHECK-NEXT: Flags [ (0x3) +// CHECK-NEXT: SHF_ALLOC (0x2) +// CHECK-NEXT: SHF_WRITE (0x1) +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x30000 +// CHECK-NEXT: Offset: 0x20060 +// CHECK-NEXT: Size: 0 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: } +// CHECK-NEXT: Section { +// CHECK-NEXT: Index: 7 +// CHECK-NEXT: Name: .comment (43) // CHECK-NEXT: Type: SHT_PROGBITS (0x1) // CHECK-NEXT: Flags [ (0x30) // CHECK-NEXT: SHF_MERGE (0x10) @@ -174,15 +190,15 @@ // CHECK-NEXT: ) // CHECK-NEXT: } // CHECK-NEXT: Section { -// CHECK-NEXT: Index: 7 -// CHECK-NEXT: Name: .symtab (47) +// CHECK-NEXT: Index: 8 +// CHECK-NEXT: Name: .symtab (52) // CHECK-NEXT: Type: SHT_SYMTAB (0x2) // CHECK-NEXT: Flags [ (0x0) // CHECK-NEXT: ] // CHECK-NEXT: Address: 0x0 // CHECK-NEXT: Offset: 0x20068 // CHECK-NEXT: Size: 48 -// CHECK-NEXT: Link: 9 +// CHECK-NEXT: Link: 10 // CHECK-NEXT: Info: 2 // CHECK-NEXT: AddressAlignment: 8 // CHECK-NEXT: EntrySize: 24 @@ -193,14 +209,14 @@ // CHECK-NEXT: ) // CHECK-NEXT: } // CHECK-NEXT: Section { -// CHECK-NEXT: Index: 8 -// CHECK-NEXT: Name: .shstrtab (55) +// CHECK-NEXT: Index: 9 +// CHECK-NEXT: Name: .shstrtab (60) // CHECK-NEXT: Type: SHT_STRTAB (0x3) // CHECK-NEXT: Flags [ (0x0) // CHECK-NEXT: ] // CHECK-NEXT: Address: 0x0 // CHECK-NEXT: Offset: 0x20098 -// CHECK-NEXT: Size: 73 +// CHECK-NEXT: Size: 78 // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 // CHECK-NEXT: AddressAlignment: 1 @@ -208,19 +224,19 @@ // CHECK-NEXT: SectionData ( // CHECK-NEXT: 0000: 002E6479 6E73796D 002E6861 7368002E |..dynsym..hash..| // CHECK-NEXT: 0010: 64796E73 7472002E 74657874 002E6479 |dynstr..text..dy| -// CHECK-NEXT: 0020: 6E616D69 63002E63 6F6D6D65 6E74002E |namic..comment..| -// CHECK-NEXT: 0030: 73796D74 6162002E 73687374 72746162 |symtab..shstrtab| -// CHECK-NEXT: 0040: 002E7374 72746162 00 |..strtab.| +// CHECK-NEXT: 0020: 6E616D69 63002E70 6C74002E 636F6D6D |namic..plt..comm| +// CHECK-NEXT: 0030: 656E7400 2E73796D 74616200 2E736873 |ent..symtab..shs| +// CHECK-NEXT: 0040: 74727461 62002E73 74727461 6200 |trtab..strtab.| // CHECK-NEXT: ) // CHECK-NEXT: } // CHECK-NEXT: Section { -// CHECK-NEXT: Index: 9 -// CHECK-NEXT: Name: .strtab (65) +// CHECK-NEXT: Index: 10 +// CHECK-NEXT: Name: .strtab (70) // CHECK-NEXT: Type: SHT_STRTAB (0x3) // CHECK-NEXT: Flags [ (0x0) // CHECK-NEXT: ] // CHECK-NEXT: Address: 0x0 -// CHECK-NEXT: Offset: 0x200E1 +// CHECK-NEXT: Offset: 0x200E6 // CHECK-NEXT: Size: 10 // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 @@ -275,7 +291,7 @@ // CHECK-NEXT: VirtualAddress: 0x20000 // CHECK-NEXT: PhysicalAddress: 0x20000 // CHECK-NEXT: FileSize: 96 -// CHECK-NEXT: MemSize: 96 +// CHECK-NEXT: MemSize: 65536 // CHECK-NEXT: Flags [ (0x6) // CHECK-NEXT: PF_R (0x4) // CHECK-NEXT: PF_W (0x2) Index: test/ELF/ppc64-rel24-reach.s =================================================================== --- /dev/null +++ test/ELF/ppc64-rel24-reach.s @@ -0,0 +1,93 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: ld.lld -shared %t.o -o %t +# RUN: llvm-objdump -d -start-address=0x10000 -stop-address=0x10018 %t | FileCheck %s -check-prefix=CALLEE_DUMP +# RUN: llvm-objdump -d -start-address=0x200ffa0 %t | FileCheck %s -check-prefix=CALLER_DUMP +# RUN: llvm-readelf --relocations %t | FileCheck %s -check-prefix=DYNRELOC + +# caller calls protected function callee. Since callee is protected no plt stub +# is needed. The binary however has been padded out by a bit less then 26 bits +# worth of space, so that the branch range is only just within the distance +# a bl instrution can reach. + + .text + .abiversion 2 + .protected callee + .globl callee + .p2align 4 + .type callee,@function +callee: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry callee, .Lfunc_lep0-.Lfunc_gep0 + addis 4, 2, .LC0@toc@ha + ld 4, .LC0@toc@l(4) + lwz 3, 0(4) + blr + +.space 0x1FFFF80 + + .protected caller + .globl caller + .p2align 4 + .type caller,@function +caller: +.Lfunc_begin1: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry caller, .Lfunc_lep1-.Lfunc_gep1 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl callee + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + + addis 4, 2, .LC1@toc@ha + ld 4, .LC1@toc@l(4) + lwz 4, 0(4) + add 3, 3, 4 + blr + + .section .toc,"aw",@progbits +.LC0: + .tc a[TC],a +.LC1: + .tc b[TC],b + + + .data + .type a,@object # @a + .globl a + .p2align 2 +a: + .long 11 + .size a, 4 + + .type b,@object + .globl b + .p2align 2 +b: + .long 33 # 0x21 + .size b, 4 + +# Verify address of the callee +# CALLEE_DUMP: callee: +# CALLEE_DUMP: 10000: {{.*}} addis 2, 12, 514 + +# Verify the address of caller, and that we branch back to callee rather +# then use a range-extending thunk. The printed offset is misleading because +# the instruction treats it as a negative 26-bit offset despite being printed +# as a positive offset. +# CALLER_DUMP: caller: +# CALLER_DUMP: 200ffa0: {{.*}} addis 2, 12, 2 +# CALLER_DUMP: 200ffb4: 55 00 00 4a bl .+33554516 + +# DYNRELOC: Relocation section '.rela.dyn' at offset 0x2b0 contains 2 entries: +# DYNRELOC-NOT: R_PPC64_RELATIVE Index: test/ELF/ppc64-shared-long_branch.s =================================================================== --- /dev/null +++ test/ELF/ppc64-shared-long_branch.s @@ -0,0 +1,111 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: ld.lld -shared %t.o -o %t +# RUN: llvm-objdump -d -start-address=0x10000 -stop-address=0x10018 %t | FileCheck %s -check-prefix=CALLEE_DUMP +# RUN: llvm-objdump -d -start-address=0x2010020 -stop-address=0x2010068 %t | FileCheck %s -check-prefix=CALLER_DUMP +# RUN: llvm-readelf --sections %t | FileCheck %s -check-prefix=SECTIONS +# RUN: llvm-readelf --relocations %t | FileCheck %s -check-prefix=DYNRELOC + + +# caller calls protected function callee. Since callee is protected no plt stub +# is needed. The binary however has been padded out with space so that the call +# distance is further then a bl instrution can reach. + + .text + .abiversion 2 + .protected callee + .globl callee + .p2align 4 + .type callee,@function +callee: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry callee, .Lfunc_lep0-.Lfunc_gep0 + addis 4, 2, .LC0@toc@ha + ld 4, .LC0@toc@l(4) + lwz 3, 0(4) + blr + +.space 0x2000000 + + .protected caller + .globl caller + .p2align 4 + .type caller,@function +caller: +.Lfunc_begin1: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry caller, .Lfunc_lep1-.Lfunc_gep1 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl callee + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + + addis 4, 2, .LC1@toc@ha + ld 4, .LC1@toc@l(4) + lwz 4, 0(4) + add 3, 3, 4 + blr + + + .section .toc,"aw",@progbits +.LC0: + .tc a[TC],a +.LC1: + .tc b[TC],b + + + .data + .type a,@object # @a + .globl a + .p2align 2 +a: + .long 11 + .size a, 4 + + .type b,@object + .globl b + .p2align 2 +b: + .long 33 # 0x21 + .size b, 4 + +# Verify address of the caller +# CALLEE_DUMP: callee: +# CALLEE_DUMP: 10000: {{.*}} addis 2, 12, 515 +# CALLEE_DUMP: 10004: {{.*}} addi 2, 2, -3260 +# CALLEE_DUMP: 10008: {{.*}} addis 4, 2, 0 + +# Verify the address of caller, and the call to the long-branch thunk. +# CALLER_DUMP: caller: +# CALLER_DUMP: 2010020: {{.*}} addis 2, 12, 3 +# CALLER_DUMP: 2010034: {{.*}} bl .+36 + +# Verify the thunks contents. +# CALLER_DUMP: __long_branch_callee: +# CALLER_DUMP: 2010058: {{.*}} addis 12, 2, 0 +# CALLER_DUMP: 201005c: {{.*}} ld 12, 32624(12) +# CALLER_DUMP: 2010060: {{.*}} mtctr 12 +# CALLER_DUMP: 2010064: {{.*}} bctr + +# .got section is at address 0x20300a0 so TOC is 0x20380a0 +# .plt section has a 2 entry header and a single entry for the long branch +# target address, so it size is 24 bytes. +# [Nr] Name Type Address Off Size +# SECTIONS: [ 9] .got PROGBITS 00000000020300a0 20300a0 000008 +# SECTIONS: [11] .plt NOBITS 0000000002040000 20300b8 000018 + +# There is a relative dynamic relocation for (.plt + 16 bytes), with a base +# address equal to callees local entry point (0x10000 + 8). +# DYNRELOC: Relocation section '.rela.dyn' at offset 0x{{[0-9a-f]+}} contains 3 entries: +# DYNRELOC: Offset Info Type Symbol's Value Symbol's Name + Addend +# DYNRELOC: 0000000002040010 0000000000000016 R_PPC64_RELATIVE 10008