Index: lld/trunk/ELF/Arch/PPC64.cpp =================================================================== --- lld/trunk/ELF/Arch/PPC64.cpp +++ lld/trunk/ELF/Arch/PPC64.cpp @@ -43,10 +43,10 @@ uint32_t calcEFlags() const override; RelExpr getRelExpr(RelType Type, const Symbol &S, const uint8_t *Loc) const override; - void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, - int32_t Index, unsigned RelOff) const override; void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override; void writeGotHeader(uint8_t *Buf) const override; + bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, + uint64_t BranchAddr, const Symbol &S) const override; }; } // namespace @@ -64,17 +64,17 @@ PPC64::PPC64() { GotRel = R_PPC64_GLOB_DAT; + PltRel = R_PPC64_JMP_SLOT; RelativeRel = R_PPC64_RELATIVE; GotEntrySize = 8; GotPltEntrySize = 8; - PltEntrySize = 32; + PltEntrySize = 0; PltHeaderSize = 0; GotBaseSymInGotPlt = false; GotBaseSymOff = 0x8000; - GotHeaderEntriesNum = 1; GotPltHeaderEntriesNum = 2; - PltRel = R_PPC64_JMP_SLOT; + NeedsThunks = true; // We need 64K pages (at least under glibc/Linux, the loader won't // set different permissions on a finer granularity than that). @@ -170,28 +170,6 @@ write64(Buf, getPPC64TocBase()); } -void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, - uint64_t PltEntryAddr, int32_t Index, - unsigned RelOff) const { - uint64_t Off = GotPltEntryAddr - getPPC64TocBase(); - - // The most-common form of the plt stub. This assumes that the toc-pointer - // register is properly initalized, and that the stub must save the toc - // pointer value to the stack-save slot reserved for it (sp + 24). - // There are 2 other variants but we don't have to emit those until we add - // support for R_PPC64_REL24_NOTOC and R_PPC64_TOCSAVE relocations. - // We are missing a super simple optimization, where if the upper 16 bits of - // the offset are zero, then we can omit the addis instruction, and load - // r2 + lo-offset directly into r12. I decided to leave this out in the - // spirit of keeping it simple until we can link actual non-trivial - // programs. - write32(Buf + 0, 0xf8410018); // std r2,24(r1) - write32(Buf + 4, 0x3d820000 | applyPPCHa(Off)); // addis r12,r2, X@plt@to@ha - write32(Buf + 8, 0xe98c0000 | applyPPCLo(Off)); // ld r12,X@plt@toc@l(r12) - write32(Buf + 12, 0x7d8903a6); // mtctr r12 - write32(Buf + 16, 0x4e800420); // bctr -} - static std::pair toAddr16Rel(RelType Type, uint64_t Val) { uint64_t V = Val - PPC64TocOffset; switch (Type) { @@ -281,6 +259,13 @@ } } +bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, + uint64_t BranchAddr, const Symbol &S) const { + // If a function is in the plt it needs to be called through + // a call stub. + return Type == R_PPC64_REL24 && S.isInPlt(); +} + TargetInfo *elf::getPPC64TargetInfo() { static PPC64 Target; return &Target; Index: lld/trunk/ELF/InputSection.cpp =================================================================== --- lld/trunk/ELF/InputSection.cpp +++ lld/trunk/ELF/InputSection.cpp @@ -740,13 +740,15 @@ case R_RELAX_TLS_GD_TO_IE_END: Target->relaxTlsGdToIe(BufLoc, Type, TargetVA); break; - case R_PPC_CALL_PLT: + case R_PPC_CALL: // Patch a nop (0x60000000) to a ld. - if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) { - error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc"); - break; + if (Rel.Sym->NeedsTocRestore) { + if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) { + error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc"); + break; + } + write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1) } - write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1) Target->relocateOne(BufLoc, Type, TargetVA); break; default: Index: lld/trunk/ELF/Symbols.h =================================================================== --- lld/trunk/ELF/Symbols.h +++ lld/trunk/ELF/Symbols.h @@ -159,7 +159,8 @@ : File(File), NameData(Name.Data), NameSize(Name.Size), Binding(Binding), Type(Type), StOther(StOther), SymbolKind(K), NeedsPltAddr(false), IsInGlobalMipsGot(false), Is32BitMipsGot(false), IsInIplt(false), - IsInIgot(false), IsPreemptible(false), Used(!Config->GcSections) {} + IsInIgot(false), IsPreemptible(false), Used(!Config->GcSections), + NeedsTocRestore(false) {} public: // True the symbol should point to its PLT entry. @@ -183,6 +184,10 @@ // True if an undefined or shared symbol is used from a live section. unsigned Used : 1; + // True if a call to this symbol needs to be followed by a restore of the + // PPC64 toc pointer. + unsigned NeedsTocRestore : 1; + // The Type field may also have this value. It means that we have not yet seen // a non-Lazy symbol with this name, so we don't know what its type is. The // Type field is normally set to this value for Lazy symbols unless we saw a Index: lld/trunk/ELF/Thunks.cpp =================================================================== --- lld/trunk/ELF/Thunks.cpp +++ lld/trunk/ELF/Thunks.cpp @@ -192,6 +192,23 @@ InputSection *getTargetInputSection() const override; }; + +// PPC64 Plt call stubs. +// Any call site that needs to call through a plt entry needs a call stub in +// the .text section. The call stub is responsible for: +// 1) Saving the toc-pointer to the stack. +// 2) Loading the target functions address from the procedure linkage table into +// r12 for use by the target functions global entry point, and into the count +// register. +// 3) Transfering control to the target function through an indirect branch. +class PPC64PltCallStub final : public Thunk { +public: + PPC64PltCallStub(Symbol &Dest) : Thunk(Dest) {} + uint32_t size() { return 20; } + void writeTo(uint8_t *Buf) override; + void addSymbols(ThunkSection &IS) override; +}; + } // end anonymous namespace Defined *Thunk::addSymbol(StringRef Name, uint8_t Type, uint64_t Value, @@ -485,6 +502,25 @@ return dyn_cast(DR.Section); } +void PPC64PltCallStub::writeTo(uint8_t *Buf) { + int64_t Off = Destination.getGotPltVA() - getPPC64TocBase(); + // Need to add 0x8000 to offset to account for the low bits being signed. + uint16_t OffHa = (Off + 0x8000) >> 16; + uint16_t OffLo = Off; + + write32(Buf + 0, 0xf8410018); // std r2,24(r1) + write32(Buf + 4, 0x3d820000 | OffHa); // addis r12,r2, X@plt@to@ha + write32(Buf + 8, 0xe98c0000 | OffLo); // ld r12,X@plt@toc@l(r12) + write32(Buf + 12, 0x7d8903a6); // mtctr r12 + write32(Buf + 16, 0x4e800420); // bctr +} + +void PPC64PltCallStub::addSymbols(ThunkSection &IS) { + Defined *S = addSymbol(Saver.save("__plt_" + Destination.getName()), STT_FUNC, + 0, IS); + S->NeedsTocRestore = true; +} + Thunk::Thunk(Symbol &D) : Destination(D), Offset(0) {} Thunk::~Thunk() = default; @@ -528,15 +564,26 @@ return make(S); } +static Thunk *addThunkPPC64(RelType Type, Symbol &S) { + if (Type == R_PPC64_REL24) + return make(S); + fatal("unexpected relocation type"); +} + Thunk *addThunk(RelType Type, Symbol &S) { if (Config->EMachine == EM_AARCH64) return addThunkAArch64(Type, S); - else if (Config->EMachine == EM_ARM) + + if (Config->EMachine == EM_ARM) return addThunkArm(Type, S); - else if (Config->EMachine == EM_MIPS) + + if (Config->EMachine == EM_MIPS) return addThunkMips(Type, S); - llvm_unreachable("add Thunk only supported for ARM and Mips"); - return nullptr; + + if (Config->EMachine == EM_PPC64) + return addThunkPPC64(Type, S); + + llvm_unreachable("add Thunk only supported for ARM, Mips and PowerPC"); } } // end namespace elf Index: lld/trunk/test/ELF/ppc64-ifunc.s =================================================================== --- lld/trunk/test/ELF/ppc64-ifunc.s +++ lld/trunk/test/ELF/ppc64-ifunc.s @@ -4,37 +4,57 @@ # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o # RUN: ld.lld -shared %t2.o -o %t2.so # RUN: ld.lld %t.o %t2.so -o %t -# RUN: llvm-objdump -d %t | FileCheck %s +# RUN: llvm-objdump -D %t | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o # RUN: ld.lld -shared %t2.o -o %t2.so # RUN: ld.lld %t.o %t2.so -o %t -# RUN: llvm-objdump -d %t | FileCheck %s +# RUN: llvm-objdump -D %t | FileCheck %s + +# CHECK: Disassembly of section .text: + +# Tocbase + (-2 << 16) + 32576 +# 0x100380d0 + (-131072) + 32576 = 0x10020010 (.got.plt[2]) +# CHECK: __plt_foo: +# CHECK-NEXT: std 2, 24(1) +# CHECK-NEXT: addis 12, 2, -2 +# CHECK-NEXT: ld 12, 32576(12) +# CHECK-NEXT: mtctr 12 +# CHECK-NEXT: bctr + +# Tocbase + (-2 << 16) + 32584 +# 0x100380d0 + (-131072) + 32584 = 0x10020018 (.got.plt[3]) +# CHECK: __plt_ifunc: +# CHECK-NEXT: std 2, 24(1) +# CHECK-NEXT: addis 12, 2, -2 +# CHECK-NEXT: ld 12, 32584(12) +# CHECK-NEXT: mtctr 12 +# CHECK-NEXT: bctr + +# CHECK: ifunc: +# CHECK-NEXT: 10010028: {{.*}} nop + +# CHECK: _start: +# CHECK-NEXT: addis 2, 12, 3 +# CHECK-NEXT: addi 2, 2, -32604 +# CHECK-NEXT: bl .+67108812 +# CHECK-NEXT: ld 2, 24(1) +# CHECK-NEXT: bl .+67108824 +# CHECK-NEXT: ld 2, 24(1) + +# Address of .got.plt +# CHECK: Disassembly of section .got.plt: +# CHECK-NEXT: .got.plt: +# CHECK-NEXT: 10020000: + + +# Check tocbase +# CHECK: Disassembly of section .got: +# CHECK-NEXT: .got: +# CHECK-NEXT: 100300d0: + -# CHECK: _start: -# CHECK-NEXT: 10010004: {{.*}} bl .+28 -# CHECK-NEXT: 10010008: {{.*}} ld 2, 24(1) -# CHECK-NEXT: 1001000c: {{.*}} bl .+52 -# CHECK-NEXT: 10010010: {{.*}} ld 2, 24(1) - -# 0x10010004 + 28 = 0x10010020 (PLT entry 0) -# 0x1001000c + 52 = 0x10010040 (PLT entry 1) - -# CHECK: Disassembly of section .plt: -# CHECK-NEXT: .plt: -# CHECK-NEXT: 10010020: {{.*}} std 2, 24(1) -# CHECK-NEXT: 10010024: {{.*}} addis 12, 2, 4098 -# CHECK-NEXT: 10010028: {{.*}} ld 12, -32752(12) -# CHECK-NEXT: 1001002c: {{.*}} mtctr 12 -# CHECK-NEXT: 10010030: {{.*}} bctr -# CHECK-NEXT: 10010034: {{.*}} trap -# CHECK-NEXT: 10010038: {{.*}} trap -# CHECK-NEXT: 1001003c: {{.*}} trap -# CHECK-NEXT: 10010040: {{.*}} std 2, 24(1) -# CHECK-NEXT: 10010044: {{.*}} addis 12, 2, 4098 -# CHECK-NEXT: 10010048: {{.*}} ld 12, -32744(12) -# CHECK-NEXT: 1001004c: {{.*}} mtctr 12 .text .abiversion 2 @@ -43,8 +63,15 @@ ifunc: nop -.global _start + .global _start + .type _start,@function + _start: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry _start, .Lfunc_lep0-.Lfunc_gep0 bl foo nop bl ifunc Index: lld/trunk/test/ELF/ppc64-plt-stub.s =================================================================== --- lld/trunk/test/ELF/ppc64-plt-stub.s +++ lld/trunk/test/ELF/ppc64-plt-stub.s @@ -12,9 +12,17 @@ // RUN: ld.lld %t.o %t2.so -o %t // RUN: llvm-objdump -d %t | FileCheck %s -// CHECK: Disassembly of section .text: +// CHECK: Disassembly of section .text: +// CHECK-NEXT: __plt_foo: +// CHECK-NEXT: std 2, 24(1) +// CHECK-NEXT: addis 12, 2, -2 +// CHECK-NEXT: ld 12, 32576(12) +// CHECK-NEXT: mtctr 12 +// CHECK-NEXT: bctr + + // CHECK: _start: -// CHECK: bl .+24 +// CHECK: bl .+67108824 .text .abiversion 2 .globl _start @@ -32,13 +40,3 @@ li 0, 1 sc .size _start, .-.Lfunc_begin0 - - - -// CHECK: Disassembly of section .plt: -// CHECK: .plt: -// CHECK-NEXT: {{.*}} std 2, 24(1) -// CHECK-NEXT: {{.*}} addis 12, 2, -2 -// CHECK-NEXT: {{.*}} ld 12, 32576(12) -// CHECK-NEXT: {{.*}} mtctr 12 -// CHECK: {{.*}} bctr Index: lld/trunk/test/ELF/ppc64-toc-restore.s =================================================================== --- lld/trunk/test/ELF/ppc64-toc-restore.s +++ lld/trunk/test/ELF/ppc64-toc-restore.s @@ -23,20 +23,21 @@ # Calling external function foo in a shared object needs a nop. # Calling local function bar_local doe snot need a nop. -// CHECK: Disassembly of section .text: .global _start _start: bl foo nop bl bar_local + +// CHECK: Disassembly of section .text: // CHECK: _start: -// CHECK: 10010008: {{.*}} bl .+72 -// CHECK-NOT: 1001000c: {{.*}} nop -// CHECK: 1001000c: {{.*}} ld 2, 24(1) -// CHECK: 10010010: {{.*}} bl .+67108848 -// CHECK-NOT: 10010014: {{.*}} nop -// CHECK-NOT: 10010014: {{.*}} ld 2, 24(1) +// CHECK: 1001001c: {{.*}} bl .+67108836 +// CHECK-NOT: 10010020: {{.*}} nop +// CHECK: 10010020: {{.*}} ld 2, 24(1) +// CHECK: 10010024: {{.*}} bl .+67108848 +// CHECK-NOT: 10010028: {{.*}} nop +// CHECK-NOT: 10010028: {{.*}} ld 2, 24(1) # Calling a function in another object file which will have same # TOC base does not need a nop. If nop present, do not rewrite to @@ -48,18 +49,18 @@ nop // CHECK: _diff_object: -// CHECK-NEXT: 10010014: {{.*}} bl .+28 -// CHECK-NEXT: 10010018: {{.*}} bl .+24 -// CHECK-NEXT: 1001001c: {{.*}} nop +// CHECK-NEXT: 10010028: {{.*}} bl .+24 +// CHECK-NEXT: 1001002c: {{.*}} bl .+20 +// CHECK-NEXT: 10010030: {{.*}} nop # Branching to a local function does not need a nop .global noretbranch noretbranch: b bar_local // CHECK: noretbranch: -// CHECK: 10010020: {{.*}} b .+67108832 -// CHECK-NOT: 10010024: {{.*}} nop -// CHECK-NOT: 10010024: {{.*}} ld 2, 24(1) +// CHECK: 10010034: {{.*}} b .+67108832 +// CHECK-NOT: 10010038: {{.*}} nop +// CHECK-NOT: 1001003c: {{.*}} ld 2, 24(1) // This should come last to check the end-of-buffer condition. .global last @@ -67,12 +68,5 @@ bl foo nop // CHECK: last: -// CHECK: 10010024: {{.*}} bl .+44 -// CHECK-NEXT: 10010028: {{.*}} ld 2, 24(1) - -// CHECK: Disassembly of section .plt: -// CHECK: .plt: -// CHECK-NEXT: 10010050: {{.*}} std 2, 24(1) -// CHECK-NEXT: 10010054: {{.*}} addis 12, 2, 4098 -// CHECK-NEXT: 10010058: {{.*}} ld 12, -32752(12) -// CHECK-NEXT: 1001005c: {{.*}} mtctr 12 +// CHECK: 10010038: {{.*}} bl .+67108808 +// CHECK-NEXT: 1001003c: {{.*}} ld 2, 24(1)