Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -614,11 +614,9 @@ // point. Section 3.4.1 describes using the 3 most significant bits of the // st_other field to find out how many instructions there are between the // local and global entry point. - uint8_t StOther = (Sym.StOther >> 5) & 7; - if (StOther == 0 || StOther == 1) - return SymVA - P; - - return SymVA - P + (1LL << StOther); + uint32_t LocalEntryOffset = + (Config->EMachine == EM_PPC64) ? getPPC64LocalEntryOffset(Sym) : 0; + return (SymVA + LocalEntryOffset) - P; } case R_PPC_TOC: return getPPC64TocBase() + A; Index: ELF/Symbols.h =================================================================== --- ELF/Symbols.h +++ ELF/Symbols.h @@ -157,6 +157,8 @@ uint64_t getSize() const; uint64_t getLongBranchGotPltOffset() const; uint64_t getLongBranchGotPltVA() const; + uint64_t getLongBranchTableVA() const; + OutputSection *getOutputSection() const; protected: @@ -381,6 +383,11 @@ } void warnUnorderableSymbol(const Symbol *Sym); + +// Functions in the PPC64 V2 abi can have 2 entry points, one global and +// and one local. This calculates the offset from the global entry-point to the +// local entry-point. If the function has a single entry point, the offset is 0. +uint32_t getPPC64LocalEntryOffset(const Symbol &Func); } // namespace elf std::string toString(const elf::Symbol &B); Index: ELF/Symbols.cpp =================================================================== --- ELF/Symbols.cpp +++ ELF/Symbols.cpp @@ -151,6 +151,11 @@ return InX::GotPlt->getVA() + getLongBranchGotPltOffset(); } +uint64_t Symbol::getLongBranchTableVA() const { + assert(isLongBranchTarget()); + return InX::LongBranchTarget->getVA() + LongBranchTargetIndex * 8; +} + uint64_t Symbol::getSize() const { if (const auto *DR = dyn_cast(this)) return DR->Size; @@ -274,6 +279,19 @@ Warn(": unable to order discarded symbol: "); } +uint32_t lld::elf::getPPC64LocalEntryOffset(const Symbol &Func) { + assert(Config->EMachine == EM_PPC64); + + uint8_t LocalEntryShift = (Func.StOther >> 5); + assert(LocalEntryShift != 7); // Reserved value. + + // 0 or 1 indicates the function has a single entry point. + if (LocalEntryShift == 0 || LocalEntryShift == 1) + return 0; + + return 1 << LocalEntryShift; +} + // Returns a symbol for an error message. std::string lld::toString(const Symbol &B) { if (Config->Demangle) Index: ELF/SyntheticSections.h =================================================================== --- ELF/SyntheticSections.h +++ ELF/SyntheticSections.h @@ -367,6 +367,20 @@ bool PostThunk = false; }; +class LongBranchTargetSection final : public SyntheticSection { +public: + LongBranchTargetSection(); + void addEntry(Symbol &Sym); + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override; + void postThunkContents(void) override { PostThunk = true; } + +private: + std::vector Entries; + bool PostThunk = false; +}; + // The IgotPltSection is a Got associated with the PltSection for GNU Ifunc // Symbols that will be relocated by Target->IRelativeRel. // On most Targets the IgotPltSection will immediately follow the GotPltSection @@ -996,6 +1010,7 @@ static StringTableSection *ShStrTab; static StringTableSection *StrTab; static SymbolTableBaseSection *SymTab; + static LongBranchTargetSection *LongBranchTarget; }; template struct In { Index: ELF/SyntheticSections.cpp =================================================================== --- ELF/SyntheticSections.cpp +++ ELF/SyntheticSections.cpp @@ -1161,6 +1161,29 @@ } } +LongBranchTargetSection::LongBranchTargetSection() + : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 8, ".branch_lt") {} + +void LongBranchTargetSection::addEntry(Symbol &Sym) { + assert(!Sym.isLongBranchTarget()); + Sym.LongBranchTargetIndex = Entries.size(); + Entries.push_back(&Sym); +} + +size_t LongBranchTargetSection::getSize() const { return Entries.size() * 8; } + +void LongBranchTargetSection::writeTo(uint8_t *Buf) { + for (const Symbol *Sym : Entries) { + assert(Sym->getVA()); + write64(Buf, Sym->getVA() + getPPC64LocalEntryOffset(*Sym)); + Buf += 8; + } +} + +bool LongBranchTargetSection::empty() const { + return PostThunk && Entries.size() == 0; +} + StringTableSection::StringTableSection(StringRef Name, bool Dynamic) : SyntheticSection(Dynamic ? (uint64_t)SHF_ALLOC : 0, SHT_STRTAB, 1, Name), Dynamic(Dynamic) { @@ -3038,6 +3061,7 @@ StringTableSection *InX::ShStrTab; StringTableSection *InX::StrTab; SymbolTableBaseSection *InX::SymTab; +LongBranchTargetSection *InX::LongBranchTarget; template GdbIndexSection *GdbIndexSection::create(); template GdbIndexSection *GdbIndexSection::create(); Index: ELF/Thunks.cpp =================================================================== --- ELF/Thunks.cpp +++ ELF/Thunks.cpp @@ -223,11 +223,7 @@ if (!Dest.isLongBranchTarget()) { InX::GotPlt->addLongBranch(Dest); - uint32_t LocalEntryOffset = 0; - uint8_t Shift = (Dest.StOther >> 5) & 7; - if (Shift > 1) - LocalEntryOffset = 1 << Shift; - + uint32_t LocalEntryOffset = getPPC64LocalEntryOffset(Dest); InX::RelaDyn->addReloc({Target->RelativeRel, InX::GotPlt, Dest.getLongBranchGotPltOffset(), true, &Dest, LocalEntryOffset}); @@ -238,6 +234,22 @@ void addSymbols(ThunkSection &IS) override; }; +// Long branch thunks when linking position dependent code. The branch target +// address (local entry point of the callee) is placed into the the '.branch_lt' +// section. No dynamic relocations are needed, since the branch target is a link +// time constant. +class PPC64LongBranchThunk final : public Thunk { +public: + PPC64LongBranchThunk(Symbol &Dest) : Thunk(Dest) { + if (!Dest.isLongBranchTarget()) + InX::LongBranchTarget->addEntry(Dest); + } + + uint32_t size() override { return 16; } + void writeTo(uint8_t *Buf) override; + void addSymbols(ThunkSection &IS) override; +}; + } // end anonymous namespace Defined *Thunk::addSymbol(StringRef Name, uint8_t Type, uint64_t Value, @@ -563,6 +575,16 @@ IS); } +void PPC64LongBranchThunk::writeTo(uint8_t *Buf) { + int64_t Off = Destination.getLongBranchTableVA() - getPPC64TocBase(); + writePPCLoadAndBranch(Buf, Off); +} + +void PPC64LongBranchThunk::addSymbols(ThunkSection & IS) { + addSymbol(Saver.save("__long_branch_" + Destination.getName()), STT_FUNC, 0, + IS); +} + Thunk::Thunk(Symbol &D) : Destination(D), Offset(0) {} Thunk::~Thunk() = default; @@ -613,8 +635,7 @@ if (Config->Pic) return make(S); - else - fatal("Position dependant long_branch thunks not implemented yet!"); + return make(S); } fatal("unexpected relocation type"); } Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -364,6 +364,13 @@ Add(InX::Got); } + // If we are linking position-independent code then long branch targets + // go into the GotPlt, otherwise they go into the LongBranchTarget section. + if (Config->EMachine == EM_PPC64 && !Config->Pic) { + InX::LongBranchTarget = make(); + Add(InX::LongBranchTarget); + } + InX::GotPlt = make(); Add(InX::GotPlt); InX::IgotPlt = make(); @@ -1683,8 +1690,9 @@ } // createThunks may have added local symbols to the static symbol table - applySynthetic({InX::GotPlt, InX::SymTab}, - [](SyntheticSection *SS) { SS->postThunkContents(); }); + applySynthetic({InX::LongBranchTarget, InX::GotPlt, InX::SymTab}, + [](SyntheticSection *SS) { if (SS) SS->postThunkContents(); }); + // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result Index: test/ELF/basic64be.s =================================================================== --- test/ELF/basic64be.s +++ test/ELF/basic64be.s @@ -23,7 +23,7 @@ # CHECK-NEXT: Version: 1 # CHECK-NEXT: Entry: 0x10010000 # CHECK-NEXT: ProgramHeaderOffset: 0x40 -# CHECK-NEXT: SectionHeaderOffset: 0x11050 +# CHECK-NEXT: SectionHeaderOffset: 0x20058 # CHECK-NEXT: Flags [ (0x2) # CHECK-NEXT: 0x2 # CHECK-NEXT: ] @@ -31,8 +31,8 @@ # CHECK-NEXT: ProgramHeaderEntrySize: 56 # CHECK-NEXT: ProgramHeaderCount: 4 # CHECK-NEXT: SectionHeaderEntrySize: 64 -# CHECK-NEXT: SectionHeaderCount: 6 -# CHECK-NEXT: StringTableSectionIndex: 4 +# CHECK-NEXT: SectionHeaderCount: 7 +# CHECK-NEXT: StringTableSectionIndex: 5 # CHECK-NEXT: } # CHECK-NEXT: Sections [ # CHECK-NEXT: Section { @@ -70,16 +70,34 @@ # CHECK-NEXT: 0000: 38000001 3860002A 44000002 |8...8`.*D...| # CHECK-NEXT: ) # CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 2 +# CHECK-NEXT: Name: .branch_lt (7) +# CHECK-NEXT: Type: SHT_PROGBITS (0x1) +# CHECK-NEXT: Flags [ (0x3) +# CHECK-NEXT: SHF_ALLOC (0x2) +# CHECK-NEXT: SHF_WRITE (0x1) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x10020000 +# CHECK-NEXT: Offset: 0x20000 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 8 +# CHECK-NEXT: EntrySize: 0 +# CHECK-NEXT: SectionData ( +# CHECK-NEXT: ) +# CHECK-NEXT: } # CHECK-NEXT: Section { -# CHECK-NEXT: Index: 2 -# CHECK-NEXT: Name: .comment (7) +# CHECK-NEXT: Index: 3 +# CHECK-NEXT: Name: .comment (18) # CHECK-NEXT: Type: SHT_PROGBITS (0x1) # CHECK-NEXT: Flags [ (0x30) # CHECK-NEXT: SHF_MERGE (0x10) # CHECK-NEXT: SHF_STRINGS (0x20) # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x11000 +# CHECK-NEXT: Offset: 0x20000 # CHECK-NEXT: Size: 8 # CHECK-NEXT: Link: 0 # CHECK-NEXT: Info: 0 @@ -90,15 +108,15 @@ # CHECK-NEXT: ) # CHECK-NEXT: } # CHECK-NEXT: Section { -# CHECK-NEXT: Index: 3 -# CHECK-NEXT: Name: .symtab (16) +# CHECK-NEXT: Index: 4 +# CHECK-NEXT: Name: .symtab (27) # CHECK-NEXT: Type: SHT_SYMTAB (0x2) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x11008 +# CHECK-NEXT: Offset: 0x20008 # CHECK-NEXT: Size: 24 -# CHECK-NEXT: Link: 5 +# CHECK-NEXT: Link: 6 # CHECK-NEXT: Info: 1 # CHECK-NEXT: AddressAlignment: 8 # CHECK-NEXT: EntrySize: 24 @@ -108,32 +126,33 @@ # CHECK-NEXT: ) # CHECK-NEXT: } # CHECK-NEXT: Section { -# CHECK-NEXT: Index: 4 -# CHECK-NEXT: Name: .shstrtab (24) +# CHECK-NEXT: Index: 5 +# CHECK-NEXT: Name: .shstrtab (35) # CHECK-NEXT: Type: SHT_STRTAB (0x3) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x11020 -# CHECK-NEXT: Size: 42 +# CHECK-NEXT: Offset: 0x20020 +# CHECK-NEXT: Size: 53 # CHECK-NEXT: Link: 0 # CHECK-NEXT: Info: 0 # CHECK-NEXT: AddressAlignment: 1 # CHECK-NEXT: EntrySize: 0 # CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 002E7465 7874002E 636F6D6D 656E7400 |..text..comment.| -# CHECK-NEXT: 0010: 2E73796D 74616200 2E736873 74727461 |.symtab..shstrta| -# CHECK-NEXT: 0020: 62002E73 74727461 6200 |b..strtab.| +# CHECK-NEXT: 0000: 002E7465 7874002E 6272616E 63685F6C |..text..branch_l| +# CHECK-NEXT: 0010: 74002E63 6F6D6D65 6E74002E 73796D74 |t..comment..symt| +# CHECK-NEXT: 0020: 6162002E 73687374 72746162 002E7374 |ab..shstrtab..st| +# CHECK-NEXT: 0030: 72746162 00 |rtab.| # CHECK-NEXT: ) # CHECK-NEXT: } # CHECK-NEXT: Section { -# CHECK-NEXT: Index: 5 -# CHECK-NEXT: Name: .strtab (34) +# CHECK-NEXT: Index: 6 +# CHECK-NEXT: Name: .strtab (45) # CHECK-NEXT: Type: SHT_STRTAB (0x3) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x1104A +# CHECK-NEXT: Offset: 0x20055 # CHECK-NEXT: Size: 1 # CHECK-NEXT: Link: 0 # CHECK-NEXT: Info: 0 @@ -150,8 +169,8 @@ # CHECK-NEXT: Offset: 0x40 # CHECK-NEXT: VirtualAddress: 0x10000040 # CHECK-NEXT: PhysicalAddress: 0x10000040 -# CHECK-NEXT: FileSize: 224 -# CHECK-NEXT: MemSize: 224 +# CHECK-NEXT: FileSize: 280 +# CHECK-NEXT: MemSize: 280 # CHECK-NEXT: Flags [ (0x4) # CHECK-NEXT: PF_R (0x4) # CHECK-NEXT: ] @@ -162,8 +181,8 @@ # CHECK-NEXT: Offset: 0x0 # CHECK-NEXT: VirtualAddress: 0x10000000 # CHECK-NEXT: PhysicalAddress: 0x10000000 -# CHECK-NEXT: FileSize: 288 -# CHECK-NEXT: MemSize: 288 +# CHECK-NEXT: FileSize: 344 +# CHECK-NEXT: MemSize: 344 # CHECK-NEXT: Flags [ (0x4) # CHECK-NEXT: PF_R (0x4) # CHECK-NEXT: ] Index: test/ELF/ppc64-long_branch.s =================================================================== --- /dev/null +++ test/ELF/ppc64-long_branch.s @@ -0,0 +1,96 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -d -start-address=0x10010000 -stop-address=0x10010014 %t | FileCheck %s -check-prefix=CALLEE_DUMP +# RUN: llvm-objdump -d -start-address=0x12010020 -stop-address=0x12010068 %t | FileCheck %s -check-prefix=CALLER_DUMP +# RUN: llvm-readelf --sections %t | FileCheck %s -check-prefix=SECTIONS +# RUN: llvm-objdump -D -start-address=0x12020008 -stop-address=0x12020018 %t | FileCheck %s -check-prefix=BRANCHLT + + .text + .abiversion 2 + .globl callee + .p2align 4 + .type callee,@function +callee: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry callee, .Lfunc_lep0-.Lfunc_gep0 + addis 4, 2, a@toc@ha + lwz 3, a@toc@l(4) + blr + +.space 0x2000000 + + .globl caller + .p2align 4 + .type caller,@function +caller: +.Lfunc_begin1: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry caller, .Lfunc_lep1-.Lfunc_gep1 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl callee + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + + addis 4, 2, b@toc@ha + lwz 4, b@toc@l(4) + add 3, 3, 4 + blr + + + .data + .type a,@object # @a + .globl a + .p2align 2 +a: + .long 11 + .size a, 4 + + .type b,@object + .globl b + .p2align 2 +b: + .long 33 # 0x21 + .size b, 4 + +# Verify address of the callee +# CALLEE_DUMP: callee: +# CALLEE_DUMP: 10010000: {{.*}} addis 2, 12, 515 +# CALLEE_DUMP: 10010004: {{.*}} addi 2, 2, -32768 +# CALLEE_DUMP: 10010008: {{.*}} addis 4, 2, -1 +# CALLEE_DUMP: 1001000c: {{.*}} lwz 3, -32768(4) +# CALLEE_DUMP: 10010010: {{.*}} blr + +# Verify the thunks contents. +# Offset from TOC to .branch_lt[0] : (0x12020008 - 0x12038000) = 0xfffe8008. +# 0xfffe8008 = (-1 << 16) + (-32760). +# CALLER_DUMP: __long_branch_callee: +# CALLER_DUMP: 12010058: {{.*}} addis 12, 2, -1 +# CALLER_DUMP: 1201005c: {{.*}} ld 12, -32760(12) +# CALLER_DUMP: 12010060: {{.*}} mtctr 12 +# CALLER_DUMP: 12010064: {{.*}} bctr + +# .branch_lt is at address 0x12020008 and has a single entry. +# .got section is at address 0x12030000 so TOC is 0x12038000 +# No .plt section should be emitted. +# [Nr] Name Type Address Off Size +# SECTIONS: [ 3] .branch_lt PROGBITS 0000000012020008 2020008 000008 +# SECTIONS: [ 4] .got PROGBITS 0000000012030000 2030000 000008 +# SECTIONS-NOT: .plt + +# .branch_lt contains 1 entry: the local entry point of callee +# BRANCHLT: Disassembly of section .branch_lt: +# BRANCHLT: .branch_lt: +# BRANCHLT: 12020008: 08 00 01 10 +# BRANCHLT: 1202000c: 00 00 00 00 +