diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -34,8 +34,8 @@ void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, uint64_t entryAddr) const override; - void prepareDylibSymbolRelocation(DylibSymbol &, uint8_t type) override; - uint64_t getDylibSymbolVA(const DylibSymbol &, uint8_t type) const override; + void prepareSymbolRelocation(lld::macho::Symbol &, uint8_t type) override; + uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override; }; } // namespace @@ -208,30 +208,54 @@ in.stubHelper->addr); } -void X86_64::prepareDylibSymbolRelocation(DylibSymbol &sym, uint8_t type) { +void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) { switch (type) { case X86_64_RELOC_GOT_LOAD: // TODO: implement mov -> lea relaxation for non-dynamic symbols case X86_64_RELOC_GOT: in.got->addEntry(sym); break; - case X86_64_RELOC_BRANCH: - in.stubs->addEntry(sym); + case X86_64_RELOC_BRANCH: { + if (auto *dysym = dyn_cast(&sym)) + in.stubs->addEntry(*dysym); + break; + } + case X86_64_RELOC_UNSIGNED: + case X86_64_RELOC_SIGNED: + case X86_64_RELOC_SIGNED_1: + case X86_64_RELOC_SIGNED_2: + case X86_64_RELOC_SIGNED_4: + break; + case X86_64_RELOC_SUBTRACTOR: + case X86_64_RELOC_TLV: + fatal("TODO: handle relocation type " + std::to_string(type)); break; default: - llvm_unreachable("Unexpected dylib relocation type"); + llvm_unreachable("unexpected relocation type"); } } -uint64_t X86_64::getDylibSymbolVA(const DylibSymbol &sym, uint8_t type) const { +uint64_t X86_64::getSymbolVA(const lld::macho::Symbol &sym, + uint8_t type) const { switch (type) { case X86_64_RELOC_GOT_LOAD: case X86_64_RELOC_GOT: return in.got->addr + sym.gotIndex * WordSize; case X86_64_RELOC_BRANCH: - return in.stubs->addr + sym.stubsIndex * sizeof(stub); + if (auto *dysym = dyn_cast(&sym)) + return in.stubs->addr + dysym->stubsIndex * sizeof(stub); + return sym.getVA(); + case X86_64_RELOC_UNSIGNED: + case X86_64_RELOC_SIGNED: + case X86_64_RELOC_SIGNED_1: + case X86_64_RELOC_SIGNED_2: + case X86_64_RELOC_SIGNED_4: + return sym.getVA(); + case X86_64_RELOC_SUBTRACTOR: + case X86_64_RELOC_TLV: + fatal("TODO: handle relocation type " + std::to_string(type)); default: - llvm_unreachable("Unexpected dylib relocation type"); + llvm_unreachable("Unexpected relocation type"); } } diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -33,15 +33,10 @@ for (Reloc &r : relocs) { uint64_t va = 0; - if (auto *s = r.target.dyn_cast()) { - if (auto *dylibSymbol = dyn_cast(s)) { - va = target->getDylibSymbolVA(*dylibSymbol, r.type); - } else { - va = s->getVA(); - } - } else if (auto *isec = r.target.dyn_cast()) { + if (auto *s = r.target.dyn_cast()) + va = target->getSymbolVA(*s, r.type); + else if (auto *isec = r.target.dyn_cast()) va = isec->getVA(); - } uint64_t val = va + r.addend; if (r.pcrel) diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -47,6 +47,8 @@ uint64_t getFileOffset() const; + uint32_t gotIndex = UINT32_MAX; + protected: Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {} @@ -80,7 +82,6 @@ static bool classof(const Symbol *s) { return s->kind() == DylibKind; } DylibFile *file; - uint32_t gotIndex = UINT32_MAX; uint32_t stubsIndex = UINT32_MAX; uint32_t lazyBindOffset = UINT32_MAX; }; diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -78,23 +78,18 @@ public: GotSection(); - const llvm::SetVector &getEntries() const { - return entries; - } + const llvm::SetVector &getEntries() const { return entries; } bool isNeeded() const override { return !entries.empty(); } uint64_t getSize() const override { return entries.size() * WordSize; } - void writeTo(uint8_t *buf) const override { - // Nothing to write, GOT contains all zeros at link time; it's populated at - // runtime by dyld. - } + void writeTo(uint8_t *buf) const override; - void addEntry(DylibSymbol &sym); + void addEntry(Symbol &sym); private: - llvm::SetVector entries; + llvm::SetVector entries; }; // Stores bind opcodes for telling dyld which symbols to load non-lazily. diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -80,12 +80,18 @@ // table, which we do not currently emit } -void GotSection::addEntry(DylibSymbol &sym) { +void GotSection::addEntry(Symbol &sym) { if (entries.insert(&sym)) { sym.gotIndex = entries.size() - 1; } } +void GotSection::writeTo(uint8_t *buf) const { + for (size_t i = 0, n = entries.size(); i < n; ++i) + if (auto *defined = dyn_cast(entries[i])) + write64le(&buf[i * WordSize], defined->getVA()); +} + BindingSection::BindingSection() : SyntheticSection(segment_names::linkEdit, section_names::binding) {} @@ -112,20 +118,32 @@ os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | in.got->parent->index); encodeULEB128(in.got->getSegmentOffset(), os); - for (const DylibSymbol *sym : in.got->getEntries()) { - // TODO: Implement compact encoding -- we only need to encode the - // differences between consecutive symbol entries. - if (sym->file->ordinal <= BIND_IMMEDIATE_MASK) { - os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | - sym->file->ordinal); + uint32_t entries_to_skip = 0; + for (const Symbol *sym : in.got->getEntries()) { + if (const auto *dysym = dyn_cast(sym)) { + if (entries_to_skip != 0) { + os << static_cast(BIND_OPCODE_ADD_ADDR_ULEB); + encodeULEB128(WordSize * entries_to_skip, os); + entries_to_skip = 0; + } + + // TODO: Implement compact encoding -- we only need to encode the + // differences between consecutive symbol entries. + if (dysym->file->ordinal <= BIND_IMMEDIATE_MASK) { + os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + dysym->file->ordinal); + } else { + error("TODO: Support larger dylib symbol ordinals"); + continue; + } + os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) + << dysym->getName() << '\0' + << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) + << static_cast(BIND_OPCODE_DO_BIND); } else { - error("TODO: Support larger dylib symbol ordinals"); - continue; + // We have a defined symbol with a pre-populated address; skip over it. + ++entries_to_skip; } - os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << sym->getName() << '\0' - << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) - << static_cast(BIND_OPCODE_DO_BIND); } os << static_cast(BIND_OPCODE_DONE); diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -18,6 +18,7 @@ namespace lld { namespace macho { +class Symbol; class DylibSymbol; class InputSection; struct Reloc; @@ -48,13 +49,12 @@ virtual void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &, uint64_t entryAddr) const = 0; - // Dylib symbols are referenced via either the GOT or the stubs section, - // depending on the relocation type. prepareDylibSymbolRelocation() will set - // up the GOT/stubs entries, and getDylibSymbolVA() will return the addresses - // of those entries. - virtual void prepareDylibSymbolRelocation(DylibSymbol &, uint8_t type) = 0; - virtual uint64_t getDylibSymbolVA(const DylibSymbol &, - uint8_t type) const = 0; + // Symbols may be referenced via either the GOT or the stubs section, + // depending on the relocation type. prepareSymbolRelocation() will set up the + // GOT/stubs entries, and getSymbolVA() will return the addresses of those + // entries. + virtual void prepareSymbolRelocation(Symbol &, uint8_t type) = 0; + virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0; uint32_t cpuType; uint32_t cpuSubtype; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -255,8 +255,8 @@ if (isa(s)) error("undefined symbol " + s->getName() + ", referenced from " + sys::path::filename(isec->file->getName())); - else if (auto *dylibSymbol = dyn_cast(s)) - target->prepareDylibSymbolRelocation(*dylibSymbol, r.type); + else + target->prepareSymbolRelocation(*s, r.type); } } } diff --git a/lld/test/MachO/local-got.s b/lld/test/MachO/local-got.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/local-got.s @@ -0,0 +1,58 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libhello.s \ +# RUN: -o %t/libhello.o +# RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -dylib -install_name \ +# RUN: @executable_path/libhello.dylib %t/libhello.o -o %t/libhello.dylib +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/test %t/test.o -L%t -lhello +# RUN: llvm-objdump --full-contents --bind %t/test | FileCheck %s --match-full-lines + +## Check that the GOT references the cstrings. --full-contents displays the +## address offset and the contents at that address very similarly, so am using +## --match-full-lines to make sure we match on the right thing. +# CHECK: Contents of section __cstring: +# CHECK-NEXT: 1000003cc {{.*}} + +## 1st 8 bytes refer to the start of __cstring + 0xe, 2nd 8 bytes refer to the +## start of __cstring +# CHECK: Contents of section __got: +# CHECK-NEXT: [[#%X,ADDR:]] da030000 01000000 cc030000 01000000 {{.*}} +# CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}} + +## Check that a non-locally-defined symbol is still bound at the correct offset: +# CHECK: Bind table: +# CHECK-NEXT: segment section address type addend dylib symbol +# CHECK-NEXT: __DATA_CONST __got 0x[[#ADDR+16]] pointer 0 libhello _hello_its_me + +.globl _main + +.text +_main: + movl $0x2000004, %eax # write() syscall + mov $1, %rdi # stdout + movq _hello_its_me@GOTPCREL(%rip), %rsi + mov $15, %rdx # length of str + syscall + + movl $0x2000004, %eax # write() syscall + mov $1, %rdi # stdout + movq _hello_world@GOTPCREL(%rip), %rsi + mov $13, %rdx # length of str + syscall + + movl $0x2000004, %eax # write() syscall + mov $1, %rdi # stdout + movq _goodbye_world@GOTPCREL(%rip), %rsi + mov $15, %rdx # length of str + syscall + + mov $0, %rax + ret + +.section __TEXT,__cstring +_hello_world: + .asciz "Hello world!\n" + +_goodbye_world: + .asciz "Goodbye world!\n"