diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -1039,6 +1039,10 @@ if (s.isInPlt()) return true; + // If the callee clobbers the TOC but the caller requires a TOC. + if ((s.stOther >> 5) == 1 && type == R_PPC64_REL24) + return true; + // If a symbol is a weak undefined and we are compiling an executable // it doesn't need a range-extending thunk since it can't be called. if (s.isUndefWeak() && !config->shared) diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -279,6 +279,20 @@ void addSymbols(ThunkSection &isec) override; }; +// PPC64 R2 Save Stub +// When the caller requires a valid R2 TOC pointer but the callee does not +// require a TOC pointer and the callee cannot guarantee that it doesn't +// clobber R2 then we need to save R2. This stub: +// 1) Saves the TOC pointer to the stack. +// 2) Tail calls the callee. +class PPC64R2SaveStub final : public Thunk { +public: + PPC64R2SaveStub(Symbol &dest) : Thunk(dest, 0) {} + uint32_t size() override { return 8; } + void writeTo(uint8_t *buf) override; + void addSymbols(ThunkSection &isec) override; +}; + // A bl instruction uses a signed 24 bit offset, with an implicit 4 byte // alignment. This gives a possible 26 bits of 'reach'. If the call offset is // larger then that we need to emit a long-branch thunk. The target address @@ -822,6 +836,21 @@ s->file = destination.file; } +void PPC64R2SaveStub::writeTo(uint8_t *buf) { + int64_t offset = destination.getVA() - (getThunkTargetSym()->getVA() + 4); + // The branch offset needs to fit in 26 bits. + if (!isInt<26>(offset)) + fatal("R2 save stub branch offset is too large: " + Twine(offset)); + write32(buf + 0, 0xf8410018); // std r2,24(r1) + write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b +} + +void PPC64R2SaveStub::addSymbols(ThunkSection &isec) { + Defined *s = addSymbol(saver.save("__toc_save_" + destination.getName()), + STT_FUNC, 0, isec); + s->needsTocRestore = true; +} + void PPC64LongBranchThunk::writeTo(uint8_t *buf) { int64_t offset = in.ppc64LongBranchTarget->getEntryVA(&destination, addend) - getPPC64TocBase(); @@ -950,6 +979,9 @@ if (s.isInPlt()) return make(s); + if ((s.stOther >> 5) == 1 && type == R_PPC64_REL24) + return make(s); + if (config->picThunk) return make(s, a); diff --git a/lld/test/ELF/ppc64-error-toc-local-call.s b/lld/test/ELF/ppc64-error-toc-local-call.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/ppc64-error-toc-local-call.s @@ -0,0 +1,32 @@ +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o +# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o +# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s + +# This test checks that the linker produces errors when it is missing the nop +# after a local call to a callee with st_other=1. + +# CHECK: (.text+0xC): call to save_callee lacks nop, can't restore toc +# CHECK: (.text+0x1C): call to save_callee lacks nop, can't restore toc + +callee: + .localentry callee, 1 + blr # 0x0 + +caller: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry caller, .Lfunc_lep1-.Lfunc_gep1 + bl callee # 0xC + blr + +caller_tail: +.Lfunc_gep2: + addis 2, 12, .TOC.-.Lfunc_gep2@ha + addi 2, 2, .TOC.-.Lfunc_gep2@l +.Lfunc_lep2: + .localentry caller_tail, .Lfunc_lep2-.Lfunc_gep2 + b callee # 0x1C diff --git a/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s b/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s @@ -0,0 +1,35 @@ +# REQUIRES: ppc +# RUN: echo 'SECTIONS { \ +# RUN: .text_callee 0x10010000 : { *(.text_callee) } \ +# RUN: .text_caller 0x20020000 : { *(.text_caller) } \ +# RUN: }' > %t.script + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o +# RUN: not ld.lld -T %t.script %t.o -o %t 2>&1 >/dev/null | FileCheck %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o +# RUN: not ld.lld -T %t.script %t.o -o %t 2>&1 >/dev/null | FileCheck %s + +# CHECK: error: R2 save stub branch offset is too large: -268501032 + +.section .text_callee, "ax", %progbits +callee: + .localentry callee, 1 + blr + +.section .text_caller, "ax", %progbits +caller: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry caller, .Lfunc_lep1-.Lfunc_gep1 + addis 30, 2, global@toc@ha + lwz 4, global@toc@l(30) + add 3, 4, 3 + bl callee + nop + blr +global: + .long 0 + .size global, 4 diff --git a/lld/test/ELF/ppc64-toc-call-to-pcrel.s b/lld/test/ELF/ppc64-toc-call-to-pcrel.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/ppc64-toc-call-to-pcrel.s @@ -0,0 +1,58 @@ +# REQUIRES: ppc +# RUN: echo 'SECTIONS { \ +# RUN: .text_callee 0x10010000 : { *(.text_callee) } \ +# RUN: .text_caller 0x10020000 : { *(.text_caller) } \ +# RUN: }' > %t.script + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o +# RUN: ld.lld -T %t.script %t.o -o %t +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=future %t | FileCheck %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o +# RUN: ld.lld -T %t.script %t.o -o %t +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=future %t | FileCheck %s + +# The point of this test is to make sure that when a function with TOC access +# a local function with st_other=1, a TOC save stub is inserted. + +# SYMBOL: Symbol table '.symtab' contains 6 entries +# SYMBOL: 10010000 0 NOTYPE LOCAL DEFAULT [] 1 callee +# SYMBOL: 10020000 0 NOTYPE LOCAL DEFAULT [] 2 caller +# SYMBOL: 10020024 8 FUNC LOCAL DEFAULT 2 __toc_save_callee + +# CHECK-LABEL: callee +# CHECK: blr + +# CHECK-LABEL: caller +# CHECK: bl 0x10020024 +# CHECK-NEXT: ld 2, 24(1) +# CHECK-NEXT: blr + +# CHECK-LABEL: __toc_save_callee +# CHECK-NEXT: std 2, 24(1) +# CHECK-NEXT: b 0x10010000 + + +.section .text_callee, "ax", %progbits +callee: + .localentry callee, 1 + blr + +.section .text_caller, "ax", %progbits +caller: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry caller, .Lfunc_lep1-.Lfunc_gep1 + addis 30, 2, global@toc@ha + lwz 4, global@toc@l(30) + add 3, 4, 3 + bl callee + nop + blr +global: + .long 0 + .size global, 4