diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -106,6 +106,11 @@ return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS; } +void elf::writePrefixedInstruction(uint8_t *loc, uint64_t insn) { + insn = config->isLE ? insn << 32 | insn >> 32 : insn; + write64(loc, insn); +} + static bool addOptional(StringRef name, uint64_t value, std::vector &defined) { Symbol *sym = symtab->find(name); @@ -376,15 +381,6 @@ return read32(config->isLE ? loc : loc - 2); } -// The prefixed instruction is always a 4 byte prefix followed by a 4 byte -// instruction. Therefore, the prefix is always in lower memory than the -// instruction (regardless of endianness). -// As a result, we need to shift the pieces around on little endian machines. -static void writePrefixedInstruction(uint8_t *loc, uint64_t insn) { - insn = config->isLE ? insn << 32 | insn >> 32 : insn; - write64(loc, insn); -} - static uint64_t readPrefixedInstruction(const uint8_t *loc) { uint64_t fullInstr = read64(loc); return config->isLE ? (fullInstr << 32 | fullInstr >> 32) : fullInstr; @@ -1049,14 +1045,13 @@ return true; // FIXME: Remove the fatal errors once the call protocols are implemented. - if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1) - fatal("unimplemented feature: local function call with the reltype" - " R_PPC64_REL24_NOTOC and the callee needs toc-pointer setup"); - if (type != R_PPC64_REL24_NOTOC && (s.stOther >> 5) == 1) fatal("unimplemented feature: local function call with the reltype" " is not R_PPC64_REL24_NOTOC and the callee tramples the toc"); + if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1) + return true; + // If a symbol is a weak undefined and we are compiling an executable // it doesn't need a range-extending thunk since it can't be called. if (s.isUndefWeak() && !config->shared) diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -213,6 +213,12 @@ // the .toc section. bool isPPC64SmallCodeModelTocReloc(RelType type); +// The prefixed instruction is always a 4 byte prefix followed by a 4 byte +// instruction. Therefore, the prefix is always in lower memory than the +// instruction (regardless of endianness). +// As a result, we need to shift the pieces around on little endian machines. +void writePrefixedInstruction(uint8_t *loc, uint64_t insn); + void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -279,6 +279,18 @@ void addSymbols(ThunkSection &isec) override; }; +// PPC64 R12 Setup Stub +// When the caller does not use TOC and does not preserve R2 makes a local call +// to a callee that requires a TOC pointer then we need this stub to place +// the callee's global entry point into r12 without a save of R2. +class PPC64R12SetupStub final : public Thunk { +public: + PPC64R12SetupStub(Symbol &dest) : Thunk(dest, 0) {} + uint32_t size() override { return 16; } + void writeTo(uint8_t *buf) override; + void addSymbols(ThunkSection &isec) override; +}; + // A bl instruction uses a signed 24 bit offset, with an implicit 4 byte // alignment. This gives a possible 26 bits of 'reach'. If the call offset is // larger then that we need to emit a long-branch thunk. The target address @@ -822,6 +834,21 @@ s->file = destination.file; } +void PPC64R12SetupStub::writeTo(uint8_t *buf) { + int64_t offset = destination.getVA() - getThunkTargetSym()->getVA(); + uint64_t prefix = 0x04100000 | ((offset >> 16) & 0x3ffff); + uint32_t suffix = 0xE5800000 | (offset & 0xffff); + + writePrefixedInstruction(buf + 0, (prefix << 32) | suffix); // pld r12, func@plt@pcrel + write32(buf + 8, 0x7d8903a6); // mtctr r12 + write32(buf + 12, 0x4e800420); // bctr +} + +void PPC64R12SetupStub::addSymbols(ThunkSection &isec) { + addSymbol(saver.save("__global_entry_point_setup_" + destination.getName()), + STT_FUNC, 0, isec); +} + void PPC64LongBranchThunk::writeTo(uint8_t *buf) { int64_t offset = in.ppc64LongBranchTarget->getEntryVA(&destination, addend) - getPPC64TocBase(); @@ -945,11 +972,15 @@ } static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) { - assert((type == R_PPC64_REL14 || type == R_PPC64_REL24) && + assert((type == R_PPC64_REL14 || type == R_PPC64_REL24 || + type == R_PPC64_REL24_NOTOC) && "unexpected relocation type for thunk"); if (s.isInPlt()) return make(s); + if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1) + return make(s); + if (config->picThunk) return make(s, a); diff --git a/lld/test/ELF/ppc64-pcrel-call-to-toc.s b/lld/test/ELF/ppc64-pcrel-call-to-toc.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/ppc64-pcrel-call-to-toc.s @@ -0,0 +1,67 @@ +# REQUIRES: ppc +# RUN: echo 'SECTIONS { \ +# RUN: .text_func 0x10010000 : { *(.text_func) } \ +# RUN: .text_callee 0x10020000 : { *(.text_callee) } \ +# RUN: .text_caller 0x10030000 : { *(.text_caller) } \ +# RUN: }' > %t.script + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o +# RUN: ld.lld -T %t.script %t.o -o %t +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o +# RUN: ld.lld -T %t.script %t.o -o %t +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s + +# The test is created to check that when a function without TOC access a +# local function using TOC, a r12 setup stub is inserted. + +# SYMBOL: 1: 0000000010020000 0 NOTYPE LOCAL DEFAULT [] 2 callee +# SYMBOL: 2: 0000000010030000 0 NOTYPE LOCAL DEFAULT [] 3 caller +# SYMBOL: 3: 0000000010010000 0 NOTYPE LOCAL DEFAULT 1 func +# SYMBOL: 6: 000000001003000c 16 FUNC LOCAL DEFAULT 3 __global_entry_point_setup_callee + +# CHECK-LABEL: : +# CHECK-NEXT: blr + +# CHECK-LABEL: : +# CHECK: bl 0x10010000 +# CHECK-NEXT: addis 4, 2, -1 +# CHECK-NEXT: lwz 4, 32744(4) +# CHECK-NEXT: blr + +# CHECK-LABEL: : +# CHECK-NEXT: bl 0x1003000c +# CHECK-NEXT: blr + +# CHECK-LABEL: <__global_entry_point_setup_callee>: +# CHECK-NEXT: pld 12, -65548(0), 1 +# CHECK-NEXT: mtctr 12 +# CHECK-NEXT: bctr + +.section .text_func, "ax", %progbits +func: + blr + +.section .text_callee, "ax", %progbits +callee: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry callee, .Lfunc_lep1-.Lfunc_gep1 + bl func + addis 4, 2, global@toc@ha + lwz 4, global@toc@l(4) + blr + +.section .text_caller, "ax", %progbits +caller: + .localentry caller, 1 + bl callee@notoc + blr +global: + .long 0 + .size global, 4