diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -106,6 +106,11 @@ return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS; } +void elf::writePrefixedInstruction(uint8_t *loc, uint64_t insn) { + insn = config->isLE ? insn << 32 | insn >> 32 : insn; + write64(loc, insn); +} + static bool addOptional(StringRef name, uint64_t value, std::vector &defined) { Symbol *sym = symtab->find(name); @@ -376,15 +381,6 @@ return read32(config->isLE ? loc : loc - 2); } -// The prefixed instruction is always a 4 byte prefix followed by a 4 byte -// instruction. Therefore, the prefix is always in lower memory than the -// instruction (regardless of endianness). -// As a result, we need to shift the pieces around on little endian machines. -static void writePrefixedInstruction(uint8_t *loc, uint64_t insn) { - insn = config->isLE ? insn << 32 | insn >> 32 : insn; - write64(loc, insn); -} - static uint64_t readPrefixedInstruction(const uint8_t *loc) { uint64_t fullInstr = read64(loc); return config->isLE ? (fullInstr << 32 | fullInstr >> 32) : fullInstr; @@ -1048,17 +1044,15 @@ if (s.isInPlt()) return true; - // FIXME: Remove the fatal error once the call protocol is implemented. - if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1) - fatal("unimplemented feature: local function call with the reltype" - " R_PPC64_REL24_NOTOC and the callee needs toc-pointer setup"); - // This check looks at the st_other bits of the callee with relocation // R_PPC64_REL14 or R_PPC64_REL24. If the value is 1, then the callee // clobbers the TOC and we need an R2 save stub. if (type != R_PPC64_REL24_NOTOC && (s.stOther >> 5) == 1) return true; + if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1) + return true; + // If a symbol is a weak undefined and we are compiling an executable // it doesn't need a range-extending thunk since it can't be called. if (s.isUndefWeak() && !config->shared) diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -213,6 +213,11 @@ // the .toc section. bool isPPC64SmallCodeModelTocReloc(RelType type); +// Write a prefixed instruction, which is a 4-byte prefix followed by a 4-byte +// instruction (regardless of endianness). Therefore, the prefix is always in +// lower memory than the instruction. +void writePrefixedInstruction(uint8_t *loc, uint64_t insn); + void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -293,6 +293,18 @@ void addSymbols(ThunkSection &isec) override; }; +// PPC64 R12 Setup Stub +// When a caller that does not maintain a toc-pointer performs a local call to +// a callee which requires a toc-pointer then we need this stub to place the +// callee's global entry point into r12 without a save of R2. +class PPC64R12SetupStub final : public Thunk { +public: + PPC64R12SetupStub(Symbol &dest) : Thunk(dest, 0) {} + uint32_t size() override { return 16; } + void writeTo(uint8_t *buf) override; + void addSymbols(ThunkSection &isec) override; +}; + // A bl instruction uses a signed 24 bit offset, with an implicit 4 byte // alignment. This gives a possible 26 bits of 'reach'. If the call offset is // larger then that we need to emit a long-branch thunk. The target address @@ -851,6 +863,23 @@ s->needsTocRestore = true; } +void PPC64R12SetupStub::writeTo(uint8_t *buf) { + int64_t offset = destination.getVA() - getThunkTargetSym()->getVA(); + if (!isInt<34>(offset)) + fatal("offset must fit in 34 bits to encode in the instruction"); + uint64_t prefix = (PADDI_R12_NO_DISP >> 32) | ((offset >> 16) & 0x3ffff); + uint32_t suffix = (PADDI_R12_NO_DISP & 0xffffffff) | (offset & 0xffff); + + writePrefixedInstruction(buf + 0, (prefix << 32) | suffix); // paddi r12, 0, func@pcrel, 1 + write32(buf + 8, MTCTR_R12); // mtctr r12 + write32(buf + 12, BCTR); // bctr +} + +void PPC64R12SetupStub::addSymbols(ThunkSection &isec) { + addSymbol(saver.save("__gep_setup_" + destination.getName()), + STT_FUNC, 0, isec); +} + void PPC64LongBranchThunk::writeTo(uint8_t *buf) { int64_t offset = in.ppc64LongBranchTarget->getEntryVA(&destination, addend) - getPPC64TocBase(); @@ -974,7 +1003,8 @@ } static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) { - assert((type == R_PPC64_REL14 || type == R_PPC64_REL24) && + assert((type == R_PPC64_REL14 || type == R_PPC64_REL24 || + type == R_PPC64_REL24_NOTOC) && "unexpected relocation type for thunk"); if (s.isInPlt()) return make(s); @@ -984,6 +1014,9 @@ if ((s.stOther >> 5) == 1) return make(s); + if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1) + return make(s); + if (config->picThunk) return make(s, a); diff --git a/lld/test/ELF/ppc64-pcrel-call-to-toc.s b/lld/test/ELF/ppc64-pcrel-call-to-toc.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/ppc64-pcrel-call-to-toc.s @@ -0,0 +1,67 @@ +# REQUIRES: ppc +# RUN: echo 'SECTIONS { \ +# RUN: .text_func 0x10010000 : { *(.text_func) } \ +# RUN: .text_callee 0x10020000 : { *(.text_callee) } \ +# RUN: .text_caller 0x10030000 : { *(.text_caller) } \ +# RUN: }' > %t.script + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o +# RUN: ld.lld -T %t.script %t.o -o %t +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o +# RUN: ld.lld -T %t.script %t.o -o %t +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s + +## When a function without TOC accesses a function using TOC, an r12 setup stub +## is inserted + +# SYMBOL: 1: 0000000010020000 0 NOTYPE LOCAL DEFAULT [] 2 callee +# SYMBOL-NEXT: 2: 0000000010030000 0 NOTYPE LOCAL DEFAULT [] 3 caller +# SYMBOL-NEXT: 3: 0000000010010000 0 NOTYPE LOCAL DEFAULT 1 func +# SYMBOL: 6: 000000001003000c 16 FUNC LOCAL DEFAULT 3 __gep_setup_callee + +# CHECK-LABEL: : +# CHECK-NEXT: blr + +# CHECK-LABEL: : +# CHECK: bl 0x10010000 +# CHECK-NEXT: addis 4, 2, -1 +# CHECK-NEXT: lwz 4, 32744(4) +# CHECK-NEXT: blr + +# CHECK-LABEL: : +# CHECK-NEXT: bl 0x1003000c +# CHECK-NEXT: blr + +# CHECK-LABEL: <__gep_setup_callee>: +# CHECK-NEXT: paddi 12, 0, -65548, 1 +# CHECK-NEXT: mtctr 12 +# CHECK-NEXT: bctr + +.section .text_func, "ax", %progbits +func: + blr + +.section .text_callee, "ax", %progbits +callee: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry callee, .Lfunc_lep1-.Lfunc_gep1 + bl func + addis 4, 2, global@toc@ha + lwz 4, global@toc@l(4) + blr + +.section .text_caller, "ax", %progbits +caller: + .localentry caller, 1 + bl callee@notoc + blr +global: + .long 0 + .size global, 4 diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -48,6 +48,12 @@ return make_error(Err, object_error::parse_failed); } +enum PPCInstrMasks : uint64_t { + PADDI_R12_NO_DISP = 0x0610000039800000, + MTCTR_R12 = 0x7D8903A6, + BCTR = 0x4E800420, +}; + template class ELFFile; template