diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -40,7 +40,8 @@ void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, int32_t index, unsigned relOff) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; bool usesOnlyLowPageBits(RelType type) const override; @@ -230,13 +231,14 @@ } bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, + int64_t a) const { // ELF for the ARM 64-bit architecture, section Call and Jump relocations // only permits range extension thunks for R_AARCH64_CALL26 and // R_AARCH64_JUMP26 relocation types. if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) return false; - uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA(); + uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(a); return !inBranchRange(type, branchAddr, dst); } diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -39,7 +39,8 @@ void addPltSymbols(InputSection &isec, uint64_t off) const override; void addPltHeaderSymbols(InputSection &isd) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; @@ -262,7 +263,7 @@ } bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, int64_t a) const { // If S is an undefined weak symbol and does not have a PLT entry then it // will be resolved as a branch to the next instruction. if (s.isUndefWeak() && !s.isInPlt()) diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -35,7 +35,8 @@ void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, int32_t index, unsigned relOff) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; bool usesOnlyLowPageBits(RelType type) const override; }; @@ -356,7 +357,8 @@ template bool MIPS::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, + int64_t a) const { // Any MIPS PIC code function is invoked with its address in register $t9. // So if we have a branch instruction from non-PIC code to the PIC one // we cannot make the jump directly and need to create a small stubs diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -37,7 +37,8 @@ } void writeGotPlt(uint8_t *buf, const Symbol &s) const override; bool needsThunk(RelExpr expr, RelType relocType, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; @@ -169,7 +170,7 @@ } bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, int64_t a) const { if (type != R_PPC_REL24 && type != R_PPC_PLTREL24) return false; if (s.isInPlt()) diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -205,7 +205,8 @@ void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; void writeGotHeader(uint8_t *buf) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, @@ -898,7 +899,7 @@ } bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, int64_t a) const { if (type != R_PPC64_REL14 && type != R_PPC64_REL24) return false; diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -150,10 +150,17 @@ bool normalizeExistingThunk(Relocation &rel, uint64_t src); - // Record all the available Thunks for a Symbol - llvm::DenseMap, std::vector> - thunkedSymbolsBySection; - llvm::DenseMap> thunkedSymbols; + // Record all the available Thunks for a (Symbol, addend) pair, where Symbol + // is represented as a (section, offset) pair. There may be multiple + // relocations sharing the same (section, offset + addend) pair. We may revert + // a relocation back to its original non-Thunk target, and restore the + // original addend, so we cannot fold offset + addend. A nested pair is used + // because DenseMapInfo is not specialized for std::tuple. + llvm::DenseMap, int64_t>, + std::vector> + thunkedSymbolsBySectionAndAddend; + llvm::DenseMap, std::vector> + thunkedSymbols; // Find a Thunk from the Thunks symbol definition, we can use this to find // the Thunk from a relocation to the Thunks symbol definition. diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1779,14 +1779,19 @@ std::pair ThunkCreator::getThunk(InputSection *isec, Relocation &rel, uint64_t src) { std::vector *thunkVec = nullptr; + int64_t addend = rel.addend + getPCBias(rel.type); - // We use (section, offset) pair to find the thunk position if possible so - // that we create only one thunk for aliased symbols or ICFed sections. + // We use a ((section, offset), addend) pair to find the thunk position if + // possible so that we create only one thunk for aliased symbols or ICFed + // sections. There may be multiple relocations sharing the same (section, + // offset + addend) pair. We may revert the relocation back to its original + // non-Thunk target, so we cannot fold offset + addend. if (auto *d = dyn_cast(rel.sym)) if (!d->isInPlt() && d->section) - thunkVec = &thunkedSymbolsBySection[{d->section->repl, d->value}]; + thunkVec = &thunkedSymbolsBySectionAndAddend[{ + {d->section->repl, d->value}, addend}]; if (!thunkVec) - thunkVec = &thunkedSymbols[rel.sym]; + thunkVec = &thunkedSymbols[{rel.sym, addend}]; // Check existing Thunks for Sym to see if they can be reused for (Thunk *t : *thunkVec) @@ -1813,6 +1818,9 @@ rel.sym->getVA(rel.addend) + getPCBias(rel.type))) return true; rel.sym = &t->destination; + // TODO Restore addend on all targets. + if (config->emachine == EM_AARCH64) + rel.addend = t->addend; if (rel.sym->isInPlt()) rel.expr = toPlt(rel.expr); } @@ -1868,7 +1876,7 @@ continue; if (!target->needsThunk(rel.expr, rel.type, isec->file, src, - *rel.sym)) + *rel.sym, rel.addend)) continue; Thunk *t; @@ -1890,9 +1898,13 @@ rel.sym = t->getThunkTargetSym(); rel.expr = fromPlt(rel.expr); + // On AArch64, a jump/call relocation may be encoded as STT_SECTION + // + non-zero addend, clear the addend after redirection. + // // The addend of R_PPC_PLTREL24 should be ignored after changing to // R_PC. - if (config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24) + if (config->emachine == EM_AARCH64 || + (config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24)) rel.addend = 0; } diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -58,7 +58,7 @@ // targeting S. virtual bool needsThunk(RelExpr expr, RelType relocType, const InputFile *file, uint64_t branchAddr, - const Symbol &s) const; + const Symbol &s, int64_t a) const; // On systems with range extensions we place collections of Thunks at // regular spacings that enable the majority of branches reach the Thunks. diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -130,7 +130,8 @@ bool TargetInfo::usesOnlyLowPageBits(RelType type) const { return false; } bool TargetInfo::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, + int64_t a) const { return false; } diff --git a/lld/ELF/Thunks.h b/lld/ELF/Thunks.h --- a/lld/ELF/Thunks.h +++ b/lld/ELF/Thunks.h @@ -27,7 +27,7 @@ // Thunks are assigned to synthetic ThunkSections class Thunk { public: - Thunk(Symbol &destination); + Thunk(Symbol &destination, int64_t addend); virtual ~Thunk(); virtual uint32_t size() = 0; @@ -55,11 +55,12 @@ Defined *getThunkTargetSym() const { return syms[0]; } - // The alignment requirement for this Thunk, defaults to the size of the - // typical code section alignment. Symbol &destination; + int64_t addend; llvm::SmallVector syms; uint64_t offset = 0; + // The alignment requirement for this Thunk, defaults to the size of the + // typical code section alignment. uint32_t alignment = 4; }; diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -49,7 +49,7 @@ // AArch64 long range Thunks class AArch64ABSLongThunk final : public Thunk { public: - AArch64ABSLongThunk(Symbol &dest) : Thunk(dest) {} + AArch64ABSLongThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -57,7 +57,7 @@ class AArch64ADRPThunk final : public Thunk { public: - AArch64ADRPThunk(Symbol &dest) : Thunk(dest) {} + AArch64ADRPThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} uint32_t size() override { return 12; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -73,7 +73,7 @@ // if the target is in range, otherwise it creates a long thunk. class ARMThunk : public Thunk { public: - ARMThunk(Symbol &dest) : Thunk(dest) {} + ARMThunk(Symbol &dest) : Thunk(dest, 0) {} bool getMayUseShortThunk(); uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); } @@ -103,7 +103,7 @@ // which has a range of 16MB. class ThumbThunk : public Thunk { public: - ThumbThunk(Symbol &dest) : Thunk(dest) { alignment = 2; } + ThumbThunk(Symbol &dest) : Thunk(dest, 0) { alignment = 2; } bool getMayUseShortThunk(); uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); } @@ -209,7 +209,7 @@ // MIPS LA25 thunk class MipsThunk final : public Thunk { public: - MipsThunk(Symbol &dest) : Thunk(dest) {} + MipsThunk(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; @@ -220,7 +220,7 @@ // microMIPS R2-R5 LA25 thunk class MicroMipsThunk final : public Thunk { public: - MicroMipsThunk(Symbol &dest) : Thunk(dest) {} + MicroMipsThunk(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 14; } void writeTo(uint8_t *buf) override; @@ -231,7 +231,7 @@ // microMIPS R6 LA25 thunk class MicroMipsR6Thunk final : public Thunk { public: - MicroMipsR6Thunk(Symbol &dest) : Thunk(dest) {} + MicroMipsR6Thunk(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 12; } void writeTo(uint8_t *buf) override; @@ -241,8 +241,11 @@ class PPC32PltCallStub final : public Thunk { public: - PPC32PltCallStub(const InputSection &isec, const Relocation &rel, Symbol &dest) - : Thunk(dest), addend(rel.type == R_PPC_PLTREL24 ? rel.addend : 0), + // For R_PPC_PLTREL24, Thunk::addend records the addend which will be used to + // decide the offsets in the call stub. + PPC32PltCallStub(const InputSection &isec, const Relocation &rel, + Symbol &dest) + : Thunk(dest, rel.type == R_PPC_PLTREL24 ? rel.addend : 0), file(isec.file) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; @@ -250,10 +253,6 @@ bool isCompatibleWith(const InputSection &isec, const Relocation &rel) const override; private: - // For R_PPC_PLTREL24, this records the addend, which will be used to decide - // the offsets in the call stub. - uint32_t addend; - // Records the call site of the call stub. const InputFile *file; }; @@ -268,7 +267,7 @@ // 3) Transferring control to the target function through an indirect branch. class PPC64PltCallStub final : public Thunk { public: - PPC64PltCallStub(Symbol &dest) : Thunk(dest) {} + PPC64PltCallStub(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 20; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -289,7 +288,7 @@ void addSymbols(ThunkSection &isec) override; protected: - PPC64LongBranchThunk(Symbol &dest) : Thunk(dest) {} + PPC64LongBranchThunk(Symbol &dest) : Thunk(dest, 0) {} }; class PPC64PILongBranchThunk final : public PPC64LongBranchThunk { @@ -332,8 +331,8 @@ // AArch64 long range Thunks -static uint64_t getAArch64ThunkDestVA(const Symbol &s) { - uint64_t v = s.isInPlt() ? s.getPltVA() : s.getVA(); +static uint64_t getAArch64ThunkDestVA(const Symbol &s, int64_t a) { + uint64_t v = s.isInPlt() ? s.getPltVA() : s.getVA(a); return v; } @@ -344,7 +343,7 @@ 0x00, 0x00, 0x00, 0x00, // L0: .xword S 0x00, 0x00, 0x00, 0x00, }; - uint64_t s = getAArch64ThunkDestVA(destination); + uint64_t s = getAArch64ThunkDestVA(destination, addend); memcpy(buf, data, sizeof(data)); target->relocateOne(buf + 8, R_AARCH64_ABS64, s); } @@ -367,7 +366,7 @@ 0x10, 0x02, 0x00, 0x91, // add x16, x16, R_AARCH64_ADD_ABS_LO12_NC(Dest) 0x00, 0x02, 0x1f, 0xd6, // br x16 }; - uint64_t s = getAArch64ThunkDestVA(destination); + uint64_t s = getAArch64ThunkDestVA(destination, addend); uint64_t p = getThunkTargetSym()->getVA(); memcpy(buf, data, sizeof(data)); target->relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21, @@ -795,16 +794,16 @@ isec); } -Thunk::Thunk(Symbol &d) : destination(d), offset(0) {} +Thunk::Thunk(Symbol &d, int64_t a) : destination(d), addend(a), offset(0) {} Thunk::~Thunk() = default; -static Thunk *addThunkAArch64(RelType type, Symbol &s) { +static Thunk *addThunkAArch64(RelType type, Symbol &s, int64_t a) { if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) fatal("unrecognized relocation type"); if (config->picThunk) - return make(s); - return make(s); + return make(s, a); + return make(s, a); } // Creates a thunk for Thumb-ARM interworking. @@ -895,7 +894,8 @@ return make(s); } -static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel, Symbol &s) { +static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel, + Symbol &s) { assert((rel.type == R_PPC_REL24 || rel.type == R_PPC_PLTREL24) && "unexpected relocation type for thunk"); return make(isec, rel, s); @@ -914,9 +914,10 @@ Thunk *addThunk(const InputSection &isec, Relocation &rel) { Symbol &s = *rel.sym; + int64_t a = rel.addend; if (config->emachine == EM_AARCH64) - return addThunkAArch64(rel.type, s); + return addThunkAArch64(rel.type, s, a); if (config->emachine == EM_ARM) return addThunkArm(rel.type, s); diff --git a/lld/test/ELF/aarch64-thunk-pi.s b/lld/test/ELF/aarch64-thunk-pi.s --- a/lld/test/ELF/aarch64-thunk-pi.s +++ b/lld/test/ELF/aarch64-thunk-pi.s @@ -16,28 +16,36 @@ bl high_target ret // CHECK: low_target: -// CHECK-NEXT: d8: bl #0x10 <__AArch64ADRPThunk_high_target> +// CHECK-NEXT: d8: bl #0x18 <__AArch64ADRPThunk_high_target> // CHECK-NEXT: ret .hidden low_target2 .globl low_target2 .type low_target2, %function low_target2: - // Need thunk to high_target + // Need thunk to high_target2 bl high_target2 + // .text_high+8 = high_target2 + bl .text_high+8 ret // CHECK: low_target2: -// CHECK-NEXT: e0: bl #0x14 <__AArch64ADRPThunk_high_target2> +// CHECK-NEXT: e0: bl #0x1c <__AArch64ADRPThunk_high_target2> +// CHECK-NEXT: e4: bl #0x24 <__AArch64ADRPThunk_> // CHECK-NEXT: ret // Expect range extension thunks for .text_low // adrp calculation is (PC + signed immediate) & (!0xfff) // CHECK: __AArch64ADRPThunk_high_target: -// CHECK-NEXT: e8: adrp x16, #0x10000000 +// CHECK-NEXT: f0: adrp x16, #0x10000000 // CHECK-NEXT: add x16, x16, #0x40 // CHECK-NEXT: br x16 // CHECK: __AArch64ADRPThunk_high_target2: -// CHECK-NEXT: f4: adrp x16, #0x10000000 +// CHECK-NEXT: fc: adrp x16, #0x10000000 +// CHECK-NEXT: add x16, x16, #0x8 +// CHECK-NEXT: br x16 +/// Identical to the previous one, but for the target .text_high+8. +// CHECK: __AArch64ADRPThunk_: +// CHECK-NEXT: 108: adrp x16, #0x10000000 // CHECK-NEXT: add x16, x16, #0x8 // CHECK-NEXT: br x16 diff --git a/lld/test/ELF/aarch64-thunk-script.s b/lld/test/ELF/aarch64-thunk-script.s --- a/lld/test/ELF/aarch64-thunk-script.s +++ b/lld/test/ELF/aarch64-thunk-script.s @@ -15,6 +15,8 @@ _start: // Need thunk to high_target@plt bl high_target + // Need thunk to .text_high+4 + bl .text_high+4 ret .section .text_high, "ax", %progbits @@ -28,14 +30,21 @@ // CHECK: Disassembly of section .text_low: // CHECK-EMPTY: // CHECK-NEXT: _start: -// CHECK-NEXT: 2000: bl #0x8 <__AArch64AbsLongThunk_high_target> +// CHECK-NEXT: 2000: bl #0x10 <__AArch64AbsLongThunk_high_target> +// CHECK-NEXT: 2004: bl #0x1c <__AArch64AbsLongThunk_> // CHECK-NEXT: ret // CHECK: __AArch64AbsLongThunk_high_target: -// CHECK-NEXT: 2008: ldr x16, #0x8 +// CHECK-NEXT: 2010: ldr x16, #0x8 // CHECK-NEXT: br x16 // CHECK: $d: -// CHECK-NEXT: 2010: 00 20 00 08 .word 0x08002000 -// CHECK-NEXT: 2014: 00 00 00 00 .word 0x00000000 +// CHECK-NEXT: 2018: 00 20 00 08 .word 0x08002000 +// CHECK-NEXT: 201c: 00 00 00 00 .word 0x00000000 +// CHECK: __AArch64AbsLongThunk_: +// CHECK-NEXT: 2020: ldr x16, #0x8 +// CHECK-NEXT: 2024: br x16 +// CHECK: $d: +// CHECK-NEXT: 2028: 04 20 00 08 .word 0x08002004 +// CHECK-NEXT: 202c: 00 00 00 00 .word 0x00000000 // CHECK: Disassembly of section .text_high: // CHECK-EMPTY: // CHECK-NEXT: high_target: