Index: ELF/Arch/PPC64.cpp =================================================================== --- ELF/Arch/PPC64.cpp +++ ELF/Arch/PPC64.cpp @@ -55,6 +55,8 @@ int32_t Index, unsigned RelOff) const override; void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override; void writeGotHeader(uint8_t *Buf) const override; + bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, + uint64_t BranchAddr, const Symbol &S) const override; }; } // namespace @@ -84,6 +86,7 @@ GotHeaderEntriesNum = 1; GotPltHeaderEntriesNum = 2; PltRel = R_PPC64_JMP_SLOT; + NeedsThunks = true; } else { PltRel = R_PPC64_GLOB_DAT; } @@ -172,39 +175,23 @@ void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { - uint64_t Off = GotPltEntryAddr - getPPC64TocBase(); - if (isPPC64ElfV2()) { - // The most-common form of the plt stub. This assumes that the toc-pointer - // register is properly initalized, and that the stub must save the toc - // pointer value to the stack-save slot reserved for it (sp + 24). - // There are 2 other variants but we don't have to emit those until we add - // support for R_PPC64_REL24_NOTOC and R_PPC64_TOCSAVE relocations. - // We are missing a super simple optimization, where if the upper 16 bits of - // the offset are zero, then we can omit the addis instruction, and load - // r2 + lo-offset directly into r12. I decided to leave this out in the - // spirit of keeping it simple until we can link actual non-trivial - // programs. - write32(Buf + 0, 0xf8410018); // std r2,24(r1) - write32(Buf + 4, 0x3d820000 | applyPPCHa(Off)); // addis r12,r2, X@plt@to@ha - write32(Buf + 8, 0xe98c0000 | applyPPCLo(Off)); // ld r12,X@plt@toc@l(r12) - write32(Buf + 12, 0x7d8903a6); // mtctr r12 - write32(Buf + 16, 0x4e800420); // bctr - } else { - // FIXME: What we should do, in theory, is get the offset of the function - // descriptor in the .opd section, and use that as the offset from %r2 (the - // TOC-base pointer). Instead, we have the GOT-entry offset, and that will - // be a pointer to the function descriptor in the .opd section. Using - // this scheme is simpler, but requires an extra indirection per PLT dispatch. - write32(Buf, 0xf8410028); // std %r2, 40(%r1) - write32(Buf + 4, 0x3d620000 | applyPPCHa(Off)); // addis %r11, %r2, X@ha - write32(Buf + 8, 0xe98b0000 | applyPPCLo(Off)); // ld %r12, X@l(%r11) - write32(Buf + 12, 0xe96c0000); // ld %r11,0(%r12) - write32(Buf + 16, 0x7d6903a6); // mtctr %r11 - write32(Buf + 20, 0xe84c0008); // ld %r2,8(%r12) - write32(Buf + 24, 0xe96c0010); // ld %r11,16(%r12) - write32(Buf + 28, 0x4e800420); // bctr - } + if (!isPPC64ElfV2()) { + uint64_t Off = GotPltEntryAddr - getPPC64TocBase(); + // FIXME: What we should do, in theory, is get the offset of the function + // descriptor in the .opd section, and use that as the offset from %r2 (the + // TOC-base pointer). Instead, we have the GOT-entry offset, and that will + // be a pointer to the function descriptor in the .opd section. Using + // this scheme is simpler, but requires an extra indirection per PLT dispatch. + write32(Buf, 0xf8410028); // std %r2, 40(%r1) + write32(Buf + 4, 0x3d620000 | applyPPCHa(Off)); // addis %r11, %r2, X@ha + write32(Buf + 8, 0xe98b0000 | applyPPCLo(Off)); // ld %r12, X@l(%r11) + write32(Buf + 12, 0xe96c0000); // ld %r11,0(%r12) + write32(Buf + 16, 0x7d6903a6); // mtctr %r11 + write32(Buf + 20, 0xe84c0008); // ld %r2,8(%r12) + write32(Buf + 24, 0xe96c0010); // ld %r11,16(%r12) + write32(Buf + 28, 0x4e800420); // bctr + } } static std::pair toAddr16Rel(RelType Type, uint64_t Val) { @@ -296,6 +283,16 @@ } } +bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, + uint64_t BranchAddr, const Symbol &S) const { + if (Type != R_PPC64_REL24) + return false; + + // If a function is in the plt it needs to be called through + // a call stub. + return S.isInPlt(); +} + TargetInfo *elf::getPPC64TargetInfo() { static PPC64 Target; return &Target; Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -749,13 +749,15 @@ case R_RELAX_TLS_GD_TO_IE_END: Target->relaxTlsGdToIe(BufLoc, Type, TargetVA); break; - case R_PPC_PLT_OPD: + case R_PPC_OPD: // Patch a nop (0x60000000) to a ld. - if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) { - error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc"); - break; + if (Rel.Sym->NeedsTocRestore) { + if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) { + error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc"); + break; + } + write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1) } - write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1) Target->relocateOne(BufLoc, Type, TargetVA); break; default: Index: ELF/Symbols.h =================================================================== --- ELF/Symbols.h +++ ELF/Symbols.h @@ -159,7 +159,8 @@ : File(File), NameData(Name.Data), NameSize(Name.Size), Binding(Binding), Type(Type), StOther(StOther), SymbolKind(K), NeedsPltAddr(false), IsInGlobalMipsGot(false), Is32BitMipsGot(false), IsInIplt(false), - IsInIgot(false), IsPreemptible(false), Used(!Config->GcSections) {} + IsInIgot(false), IsPreemptible(false), Used(!Config->GcSections), + NeedsTocRestore(0) {} public: // True the symbol should point to its PLT entry. @@ -183,6 +184,10 @@ // True if an undefined or shared symbol is used from a live section. unsigned Used : 1; + // True if a call to this symbol needs to be followed by a restore of the + // PPC64 toc pointer. + unsigned NeedsTocRestore : 1; + // The Type field may also have this value. It means that we have not yet seen // a non-Lazy symbol with this name, so we don't know what its type is. The // Type field is normally set to this value for Lazy symbols unless we saw a Index: ELF/Thunks.cpp =================================================================== --- ELF/Thunks.cpp +++ ELF/Thunks.cpp @@ -192,6 +192,16 @@ InputSection *getTargetInputSection() const override; }; + +// PPC64 Plt call stubs. +class PPC64PltCallStub final : public Thunk { +public: + PPC64PltCallStub(Symbol &Dest) : Thunk(Dest) {} + uint32_t size() { return 20; } + void writeTo(uint8_t *Buf) override; + void addSymbols(ThunkSection &IS) override; +}; + } // end anonymous namespace Defined *Thunk::addSymbol(StringRef Name, uint8_t Type, uint64_t Value, @@ -485,6 +495,26 @@ return dyn_cast(DR.Section); } +void PPC64PltCallStub::writeTo(uint8_t *Buf) { + int64_t Off = Destination.getGotPltVA() - getPPC64TocBase(); + // Need to add 0x8000 to offset to account for the low bits being signed. + uint16_t OffHa = (Off + 0x8000) >> 16; + uint16_t OffLo = (uint16_t)Off; + + write32(Buf + 0, 0xf8410018); // std r2,24(r1) + write32(Buf + 4, 0x3d820000 | OffHa); // addis r12,r2, X@plt@to@ha + write32(Buf + 8, 0xe98c0000 | OffLo); // ld r12,X@plt@toc@l(r12) + write32(Buf + 12, 0x7d8903a6); // mtctr r12 + write32(Buf + 16, 0x4e800420); // bctr +} + +void PPC64PltCallStub::addSymbols(ThunkSection &IS) +{ + Defined *S = addSymbol(Saver.save("__plt_" + Destination.getName()), STT_FUNC, + 0, IS); + S->NeedsTocRestore = true; +} + Thunk::Thunk(Symbol &D) : Destination(D), Offset(0) {} Thunk::~Thunk() = default; @@ -528,6 +558,14 @@ return make(S); } +static Thunk *addThunkPPC64(RelType Type, Symbol &S) { + switch(Type) { + case R_PPC64_REL24: + return make(S); + } + fatal("unexpected relocation type"); +} + Thunk *addThunk(RelType Type, Symbol &S) { if (Config->EMachine == EM_AARCH64) return addThunkAArch64(Type, S); @@ -535,6 +573,8 @@ return addThunkArm(Type, S); else if (Config->EMachine == EM_MIPS) return addThunkMips(Type, S); + else if (Config->EMachine == EM_PPC64) + return addThunkPPC64(Type, S); llvm_unreachable("add Thunk only supported for ARM and Mips"); return nullptr; } Index: test/ELF/ppc64-ifunc.s =================================================================== --- test/ELF/ppc64-ifunc.s +++ test/ELF/ppc64-ifunc.s @@ -3,31 +3,52 @@ # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64le.s -o %t2.o # RUN: ld.lld -shared %t2.o -o %t2.so # RUN: ld.lld %t.o %t2.so -o %t -# RUN: llvm-objdump -d %t | FileCheck %s - -# CHECK: _start: -# CHECK-NEXT: 10010004: 1d 00 00 48 bl .+28 -# CHECK-NEXT: 10010008: 18 00 41 e8 ld 2, 24(1) -# CHECK-NEXT: 1001000c: 35 00 00 48 bl .+52 -# CHECK-NEXT: 10010010: 18 00 41 e8 ld 2, 24(1) - -# 0x10010004 + 28 = 0x10010020 (PLT entry 0) -# 0x1001000c + 52 = 0x10010040 (PLT entry 1) - -# CHECK: Disassembly of section .plt: -# CHECK-NEXT: .plt: -# CHECK-NEXT: 10010020: 18 00 41 f8 std 2, 24(1) -# CHECK-NEXT: 10010024: 02 10 82 3d addis 12, 2, 4098 -# CHECK-NEXT: 10010028: 10 80 8c e9 ld 12, -32752(12) -# CHECK-NEXT: 1001002c: a6 03 89 7d mtctr 12 -# CHECK-NEXT: 10010030: 20 04 80 4e bctr -# CHECK-NEXT: 10010034: 08 00 e0 7f trap -# CHECK-NEXT: 10010038: 08 00 e0 7f trap -# CHECK-NEXT: 1001003c: 08 00 e0 7f trap -# CHECK-NEXT: 10010040: 18 00 41 f8 std 2, 24(1) -# CHECK-NEXT: 10010044: 02 10 82 3d addis 12, 2, 4098 -# CHECK-NEXT: 10010048: 18 80 8c e9 ld 12, -32744(12) -# CHECK-NEXT: 1001004c: a6 03 89 7d mtctr 12 +# RUN: llvm-objdump -D %t | FileCheck %s + +# CHECK: Disassembly of section .text: + +# Tocbase + (-2 << 16) + 32576 +# 0x100380d0 + (-131072) + 32576 = 0x10020010 (.got.plt[2]) +# CHECK: __plt_foo: +# CHECK-NEXT: std 2, 24(1) +# CHECK-NEXT: addis 12, 2, -2 +# CHECK-NEXT: ld 12, 32576(12) +# CHECK-NEXT: mtctr 12 +# CHECK-NEXT: bctr + +# Tocbase + (-2 << 16) + 32584 +# 0x100380d0 + (-131072) + 32584 = 0x10020018 (.got.plt[3]) +# CHECK: __plt_ifunc: +# CHECK-NEXT: std 2, 24(1) +# CHECK-NEXT: addis 12, 2, -2 +# CHECK-NEXT: ld 12, 32584(12) +# CHECK-NEXT: mtctr 12 +# CHECK-NEXT: bctr + +# CHECK: ifunc: +# CHECK-NEXT: 10010028: 00 00 00 60 nop + +# CHECK: _start: +# CHECK-NEXT: addis 2, 12, 3 +# CHECK-NEXT: addi 2, 2, -32604 +# CHECK-NEXT: bl .+67108812 +# CHECK-NEXT: ld 2, 24(1) +# CHECK-NEXT: bl .+67108824 +# CHECK-NEXT: ld 2, 24(1) + +# Address of .got.plt +# CHECK: Disassembly of section .got.plt: +# CHECK-NEXT: .got.plt: +# CHECK-NEXT: 10020000: + + +# Check tocbase +# CHECK: Disassembly of section .got: +# CHECK-NEXT: .got: +# CHECK-NEXT: 100300d0: d0 80 03 10 +# CHECK-NEXT: 100300d4: 00 00 00 00 + + .text .abiversion 2 @@ -36,8 +57,15 @@ ifunc: nop -.global _start + .global _start + .type _start,@function + _start: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry _start, .Lfunc_lep0-.Lfunc_gep0 bl foo nop bl ifunc Index: test/ELF/ppc64-toc-restore.s =================================================================== --- test/ELF/ppc64-toc-restore.s +++ test/ELF/ppc64-toc-restore.s @@ -14,20 +14,21 @@ # Calling external function foo in a shared object needs a nop. # Calling local function bar_local doe snot need a nop. -// CHECK: Disassembly of section .text: .global _start _start: bl foo nop bl bar_local + +// CHECK: Disassembly of section .text: // CHECK: _start: -// CHECK: 10010008: 49 00 00 48 bl .+72 -// CHECK-NOT: 1001000c: 00 00 00 60 nop -// CHECK: 1001000c: 18 00 41 e8 ld 2, 24(1) -// CHECK: 10010010: f1 ff ff 4b bl .+67108848 -// CHECK-NOT: 10010014: 00 00 00 60 nop -// CHECK-NOT: 10010014: 18 00 41 e8 ld 2, 24(1) +// CHECK: 1001001c: e5 ff ff 4b bl .+67108836 +// CHECK-NOT: 10010020: 00 00 00 60 nop +// CHECK: 10010020: 18 00 41 e8 ld 2, 24(1) +// CHECK: 10010024: f1 ff ff 4b bl .+67108848 +// CHECK-NOT: 10010028: 00 00 00 60 nop +// CHECK-NOT: 10010028: 18 00 41 e8 ld 2, 24(1) # Calling a function in another object file which will have same # TOC base does not need a nop. If nop present, do not rewrite to @@ -39,18 +40,18 @@ nop // CHECK: _diff_object: -// CHECK-NEXT: 10010014: 1d 00 00 48 bl .+28 -// CHECK-NEXT: 10010018: 19 00 00 48 bl .+24 -// CHECK-NEXT: 1001001c: 00 00 00 60 nop +// CHECK-NEXT: 10010028: 19 00 00 48 bl .+24 +// CHECK-NEXT: 1001002c: 15 00 00 48 bl .+20 +// CHECK-NEXT: 10010030: 00 00 00 60 nop # Branching to a local function does not need a nop .global noretbranch noretbranch: b bar_local // CHECK: noretbranch: -// CHECK: 10010020: e0 ff ff 4b b .+67108832 -// CHECK-NOT: 10010024: 00 00 00 60 nop -// CHECK-NOT: 10010024: 18 00 41 e8 ld 2, 24(1) +// CHECK: 10010034: e0 ff ff 4b b .+67108832 +// CHECK-NOT: 10010038: 00 00 00 60 nop +// CHECK-NOT: 1001003c: 18 00 41 e8 ld 2, 24(1) // This should come last to check the end-of-buffer condition. .global last @@ -58,12 +59,5 @@ bl foo nop // CHECK: last: -// CHECK: 10010024: 2d 00 00 48 bl .+44 -// CHECK-NEXT: 10010028: 18 00 41 e8 ld 2, 24(1) - -// CHECK: Disassembly of section .plt: -// CHECK: .plt: -// CHECK-NEXT: 10010050: 18 00 41 f8 std 2, 24(1) -// CHECK-NEXT: 10010054: 02 10 82 3d addis 12, 2, 4098 -// CHECK-NEXT: 10010058: 10 80 8c e9 ld 12, -32752(12) -// CHECK-NEXT: 1001005c: a6 03 89 7d mtctr 12 +// CHECK: 10010038: c9 ff ff 4b bl .+67108808 +// CHECK-NEXT: 1001003c: 18 00 41 e8 ld 2, 24(1) Index: test/ELF/ppc64le-plt-stub.s =================================================================== --- test/ELF/ppc64le-plt-stub.s +++ test/ELF/ppc64le-plt-stub.s @@ -5,9 +5,17 @@ // RUN: ld.lld %t.o %t2.so -o %t // RUN: llvm-objdump -d %t | FileCheck %s -// CHECK: Disassembly of section .text: +// CHECK: Disassembly of section .text: +// CHECK-NEXT: __plt_foo: +// CHECK-NEXT: std 2, 24(1) +// CHECK-NEXT: addis 12, 2, -2 +// CHECK-NEXT: ld 12, 32576(12) +// CHECK-NEXT: mtctr 12 +// CHECK-NEXT: bctr + + // CHECK: _start: -// CHECK: bl .+24 +// CHECK: bl .+67108824 .text .abiversion 2 .globl _start @@ -25,13 +33,3 @@ li 0, 1 sc .size _start, .-.Lfunc_begin0 - - - -// CHECK: Disassembly of section .plt: -// CHECK: .plt: -// CHECK-NEXT: 18 00 41 f8 std 2, 24(1) -// CHECK-NEXT: fe ff 82 3d addis 12, 2, -2 -// CHECK-NEXT: 40 7f 8c e9 ld 12, 32576(12) -// CHECK-NEXT: a6 03 89 7d mtctr 12 -// CHECK: 20 04 80 4e bctr