diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -238,6 +238,8 @@ break; } case X86_64_RELOC_BRANCH: { + // TODO: factor this logic out so it can be reused for different + // architectures if (auto *dysym = dyn_cast(sym)) { if (in.stubs->addEntry(dysym)) { if (sym->isWeakDef()) { @@ -250,10 +252,13 @@ } } } else if (auto *defined = dyn_cast(sym)) { - if (defined->isWeakDef() && defined->isExternal()) - if (in.stubs->addEntry(sym)) + if (defined->isWeakDef() && defined->isExternal()) { + if (in.stubs->addEntry(sym)) { + in.rebase->addEntry(in.lazyPointers, sym->stubsIndex * WordSize); in.weakBinding->addEntry(sym, in.lazyPointers, sym->stubsIndex * WordSize); + } + } } break; } diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -36,6 +36,7 @@ bool allLoad = false; bool forceLoadObjC = false; bool staticLink = false; + bool isPic = false; bool headerPadMaxInstallNames = false; bool searchDylibsFirst = false; uint32_t headerPad; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -660,6 +660,9 @@ } } + config->isPic = config->outputType == MH_DYLIB || + (config->outputType == MH_EXECUTE && args.hasArg(OPT_pie)); + // Now that all dylibs have been loaded, search for those that should be // re-exported. for (opt::Arg *arg : args.filtered(OPT_sub_library)) { diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -28,6 +28,7 @@ constexpr const char pageZero[] = "__pagezero"; constexpr const char common[] = "__common"; constexpr const char header[] = "__mach_header"; +constexpr const char rebase[] = "__rebase"; constexpr const char binding[] = "__binding"; constexpr const char weakBinding[] = "__weak_binding"; constexpr const char lazyBinding[] = "__lazy_binding"; @@ -153,22 +154,42 @@ using SectionPointerUnion = llvm::PointerUnion; -struct BindingTarget { - SectionPointerUnion section; - uint64_t offset; - int64_t addend; - - BindingTarget(SectionPointerUnion section, uint64_t offset, int64_t addend) - : section(section), offset(offset), addend(addend) {} +struct Location { + SectionPointerUnion section = nullptr; + uint64_t offset = 0; + Location(SectionPointerUnion section, uint64_t offset) + : section(section), offset(offset) {} uint64_t getVA() const; }; +// Stores rebase opcodes, which tell dyld where absolute addresses have been +// encoded in the binary. If the binary is not loaded at its preferred address, +// dyld has to rebase these addresses by adding an offset to them. +class RebaseSection : public LinkEditSection { +public: + RebaseSection(); + void finalizeContents(); + uint64_t getRawSize() const override { return contents.size(); } + bool isNeeded() const override { return !locations.empty(); } + void writeTo(uint8_t *buf) const override; + + void addEntry(SectionPointerUnion section, uint64_t offset) { + if (config->isPic) + locations.push_back({section, offset}); + } + +private: + std::vector locations; + SmallVector contents; +}; + struct BindingEntry { const DylibSymbol *dysym; - BindingTarget target; - BindingEntry(const DylibSymbol *dysym, BindingTarget target) - : dysym(dysym), target(std::move(target)) {} + int64_t addend; + Location target; + BindingEntry(const DylibSymbol *dysym, int64_t addend, Location target) + : dysym(dysym), addend(addend), target(std::move(target)) {} }; // Stores bind opcodes for telling dyld which symbols to load non-lazily. @@ -182,7 +203,7 @@ void addEntry(const DylibSymbol *dysym, SectionPointerUnion section, uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(dysym, BindingTarget(section, offset, addend)); + bindings.emplace_back(dysym, addend, Location(section, offset)); } private: @@ -192,9 +213,10 @@ struct WeakBindingEntry { const Symbol *symbol; - BindingTarget target; - WeakBindingEntry(const Symbol *symbol, BindingTarget target) - : symbol(symbol), target(std::move(target)) {} + int64_t addend; + Location target; + WeakBindingEntry(const Symbol *symbol, int64_t addend, Location target) + : symbol(symbol), addend(addend), target(std::move(target)) {} }; // Stores bind opcodes for telling dyld which weak symbols need coalescing. @@ -220,7 +242,7 @@ void addEntry(const Symbol *symbol, SectionPointerUnion section, uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(symbol, BindingTarget(section, offset, addend)); + bindings.emplace_back(symbol, addend, Location(section, offset)); } bool hasEntry() const { return !bindings.empty(); } @@ -416,6 +438,7 @@ struct InStruct { MachHeaderSection *header = nullptr; + RebaseSection *rebase = nullptr; BindingSection *binding = nullptr; WeakBindingSection *weakBinding = nullptr; LazyBindingSection *lazyBinding = nullptr; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -63,6 +63,9 @@ if (config->outputType == MachO::MH_DYLIB && !config->hasReexports) hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS; + if (config->outputType == MachO::MH_EXECUTE && config->isPic) + hdr->flags |= MachO::MH_PIE; + if (in.exports->hasWeakSymbol || in.weakBinding->hasNonWeakDefinition()) hdr->flags |= MachO::MH_WEAK_DEFINES; @@ -88,6 +91,97 @@ PageZeroSection::PageZeroSection() : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} +uint64_t Location::getVA() const { + if (const auto *isec = section.dyn_cast()) + return isec->getVA() + offset; + return section.get()->addr + offset; +} + +RebaseSection::RebaseSection() + : LinkEditSection(segment_names::linkEdit, section_names::rebase) {} + +namespace { +struct Rebase { + OutputSegment *segment = nullptr; + uint64_t offset = 0; + uint64_t consecutiveCount = 0; +}; +} // namespace + +// Rebase opcodes allow us to describe a contiguous sequence of rebase location +// using a single DO_REBASE opcode. To take advantage of it, we delay emitting +// `DO_REBASE` until we have reached the end of a contiguous sequence. +static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) { + using namespace llvm::MachO; + assert(rebase.consecutiveCount != 0); + if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) { + os << static_cast(REBASE_OPCODE_DO_REBASE_IMM_TIMES | + rebase.consecutiveCount); + } else { + os << static_cast(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); + encodeULEB128(rebase.consecutiveCount, os); + } + rebase.consecutiveCount = 0; +} + +static void encodeRebase(const OutputSection *osec, uint64_t outSecOff, + Rebase &lastRebase, raw_svector_ostream &os) { + using namespace llvm::MachO; + OutputSegment *seg = osec->parent; + uint64_t offset = osec->getSegmentOffset() + outSecOff; + if (lastRebase.segment != seg || lastRebase.offset != offset) { + if (lastRebase.consecutiveCount != 0) + encodeDoRebase(lastRebase, os); + + if (lastRebase.segment != seg) { + os << static_cast(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | + seg->index); + encodeULEB128(offset, os); + lastRebase.segment = seg; + lastRebase.offset = offset; + } else { + assert(lastRebase.offset != offset); + os << static_cast(REBASE_OPCODE_ADD_ADDR_ULEB); + encodeULEB128(offset - lastRebase.offset, os); + lastRebase.offset = offset; + } + } + ++lastRebase.consecutiveCount; + // DO_REBASE causes dyld to both perform the binding and increment the offset + lastRebase.offset += WordSize; +} + +void RebaseSection::finalizeContents() { + using namespace llvm::MachO; + if (locations.empty()) + return; + + raw_svector_ostream os{contents}; + Rebase lastRebase; + + os << static_cast(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER); + + llvm::sort(locations, [](const Location &a, const Location &b) { + return a.getVA() < b.getVA(); + }); + for (const Location &loc : locations) { + if (const auto *isec = loc.section.dyn_cast()) { + encodeRebase(isec->parent, isec->outSecOff + loc.offset, lastRebase, os); + } else { + const auto *osec = loc.section.get(); + encodeRebase(osec, loc.offset, lastRebase, os); + } + } + if (lastRebase.consecutiveCount != 0) + encodeDoRebase(lastRebase, os); + + os << static_cast(REBASE_OPCODE_DONE); +} + +void RebaseSection::writeTo(uint8_t *buf) const { + memcpy(buf, contents.data(), contents.size()); +} + NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname, const char *name) : SyntheticSection(segname, name) { @@ -184,12 +278,6 @@ << defined->getName() << '\0'; } -uint64_t BindingTarget::getVA() const { - if (auto *isec = section.dyn_cast()) - return isec->getVA() + offset; - return section.get()->addr + offset; -} - // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld // interprets to update a record with the following fields: // * segment index (of the segment to write the symbol addresses to, typically @@ -217,11 +305,10 @@ encodeDylibOrdinal(b.dysym, lastBinding, os); if (auto *isec = b.target.section.dyn_cast()) { encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset, - b.target.addend, lastBinding, os); + b.addend, lastBinding, os); } else { auto *osec = b.target.section.get(); - encodeBinding(b.dysym, osec, b.target.offset, b.target.addend, - lastBinding, os); + encodeBinding(b.dysym, osec, b.target.offset, b.addend, lastBinding, os); } } if (!bindings.empty()) @@ -251,11 +338,10 @@ for (const WeakBindingEntry &b : bindings) { if (auto *isec = b.target.section.dyn_cast()) { encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset, - b.target.addend, lastBinding, os); + b.addend, lastBinding, os); } else { auto *osec = b.target.section.get(); - encodeBinding(b.symbol, osec, b.target.offset, b.target.addend, - lastBinding, os); + encodeBinding(b.symbol, osec, b.target.offset, b.addend, lastBinding, os); } } if (!bindings.empty() || !definitions.empty()) @@ -284,6 +370,7 @@ if (dysym->isWeakDef()) in.weakBinding->addEntry(sym, section, offset, addend); } else if (auto *defined = dyn_cast(sym)) { + in.rebase->addEntry(section, offset); if (defined->isWeakDef() && defined->isExternal()) in.weakBinding->addEntry(sym, section, offset, addend); } else if (isa(sym)) { @@ -407,8 +494,10 @@ } void LazyBindingSection::addEntry(DylibSymbol *dysym) { - if (entries.insert(dysym)) + if (entries.insert(dysym)) { dysym->stubsHelperIndex = entries.size() - 1; + in.rebase->addEntry(in.lazyPointers, dysym->stubsIndex * WordSize); + } } // Unlike the non-lazy binding section, the bind opcodes in this section aren't diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -67,11 +67,12 @@ // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. class LCDyldInfo : public LoadCommand { public: - LCDyldInfo(BindingSection *bindingSection, + LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection, WeakBindingSection *weakBindingSection, LazyBindingSection *lazyBindingSection, ExportSection *exportSection) - : bindingSection(bindingSection), weakBindingSection(weakBindingSection), + : rebaseSection(rebaseSection), bindingSection(bindingSection), + weakBindingSection(weakBindingSection), lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} uint32_t getSize() const override { return sizeof(dyld_info_command); } @@ -80,6 +81,10 @@ auto *c = reinterpret_cast(buf); c->cmd = LC_DYLD_INFO_ONLY; c->cmdsize = getSize(); + if (rebaseSection->isNeeded()) { + c->rebase_off = rebaseSection->fileOff; + c->rebase_size = rebaseSection->getFileSize(); + } if (bindingSection->isNeeded()) { c->bind_off = bindingSection->fileOff; c->bind_size = bindingSection->getFileSize(); @@ -98,6 +103,7 @@ } } + RebaseSection *rebaseSection; BindingSection *bindingSection; WeakBindingSection *weakBindingSection; LazyBindingSection *lazyBindingSection; @@ -333,6 +339,12 @@ void Writer::scanRelocations() { for (InputSection *isec : inputSections) { + // We do not wish to add rebase opcodes for __LD,__compact_unwind, because + // it doesn't actually end up in the final binary. TODO: filtering it out + // before Writer runs might be cleaner... + if (isec->segname == segment_names::ld) + continue; + for (Reloc &r : isec->relocs) { if (auto *s = r.referent.dyn_cast()) { if (isa(s)) @@ -340,14 +352,18 @@ sys::path::filename(isec->file->getName())); else target->prepareSymbolRelocation(s, isec, r); + } else { + assert(r.referent.is()); + if (!r.pcrel) + in.rebase->addEntry(isec, r.offset); } } } } void Writer::createLoadCommands() { - in.header->addLoadCommand( - make(in.binding, in.weakBinding, in.lazyBinding, in.exports)); + in.header->addLoadCommand(make( + in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports)); in.header->addLoadCommand(make(symtabSection, stringTableSection)); in.header->addLoadCommand(make(indirectSymtabSection)); for (StringRef path : config->runtimePaths) @@ -451,6 +467,7 @@ .Default(0); } else if (segname == segment_names::linkEdit) { return StringSwitch(osec->name) + .Case(section_names::rebase, -8) .Case(section_names::binding, -7) .Case(section_names::weakBinding, -6) .Case(section_names::lazyBinding, -5) @@ -624,6 +641,7 @@ assignAddresses(seg); // Fill __LINKEDIT contents. + in.rebase->finalizeContents(); in.binding->finalizeContents(); in.weakBinding->finalizeContents(); in.lazyBinding->finalizeContents(); @@ -649,6 +667,7 @@ void macho::createSyntheticSections() { in.header = make(); + in.rebase = make(); in.binding = make(); in.weakBinding = make(); in.lazyBinding = make(); diff --git a/lld/test/MachO/compact-unwind-pie.s b/lld/test/MachO/compact-unwind-pie.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/compact-unwind-pie.s @@ -0,0 +1,21 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o +# RUN: lld -flavor darwinnew -pie -syslibroot %S/Inputs/MacOSX.sdk -lSystem %t.o -o %t +# RUN: llvm-objdump --macho --unwind-info --rebase %t | FileCheck %s + +## Check that we do not add rebase opcodes to the compact unwind section. +# CHECK: Contents of __unwind_info section: +# CHECK-NEXT: Version: 0x1 +# CHECK-NEXT: Common encodings array section offset: +# CHECK-NEXT: Number of common encodings in array: 0x1 +# CHECK: Rebase table: +# CHECK-NEXT: segment section address type +# CHECK-EMPTY: + +.globl _main +.text +_main: + .cfi_startproc + .cfi_def_cfa_offset 16 + retq + .cfi_endproc diff --git a/lld/test/MachO/dylink-lazy.s b/lld/test/MachO/dylink-lazy.s --- a/lld/test/MachO/dylink-lazy.s +++ b/lld/test/MachO/dylink-lazy.s @@ -19,9 +19,13 @@ ## symbol each entry points to. So we call objdump twice in order to get the ## disassembly of __text and the bind tables first, which allow us to check for ## matching entries in __stubs. -# RUN: (llvm-objdump -d --no-show-raw-insn --syms --bind --lazy-bind %t/dylink-lazy; \ +# RUN: (llvm-objdump -d --no-show-raw-insn --syms --rebase --bind --lazy-bind %t/dylink-lazy; \ # RUN: llvm-objdump -D --no-show-raw-insn %t/dylink-lazy) | FileCheck %s +# RUN: lld -flavor darwinnew -pie -o %t/dylink-lazy-pie \ +# RUN: -L%S/Inputs/MacOSX.sdk/usr/lib -L%t -lhello -lgoodbye %t/dylink-lazy.o -lSystem +# RUN: llvm-objdump --macho --rebase %t/dylink-lazy-pie | FileCheck %s --check-prefix=PIE + # CHECK-LABEL: SYMBOL TABLE: # CHECK: {{0*}}[[#%x, IMGLOADER:]] {{.*}} __DATA,__data __dyld_private @@ -29,7 +33,11 @@ # CHECK: callq 0x[[#%x, HELLO_STUB:]] # CHECK-NEXT: callq 0x[[#%x, GOODBYE_STUB:]] -# CHECK-LABEL: Bind table: +## Check that the rebase table is empty. +# CHECK-LABEL: Rebase table: +# CHECK-NEXT: segment section address type + +# CHECK-NEXT: Bind table: # CHECK: __DATA_CONST __got 0x[[#%x, BINDER:]] pointer 0 libSystem dyld_stub_binder # CHECK-LABEL: Lazy bind table: @@ -51,6 +59,11 @@ # CHECK-NEXT: pushq $21 # CHECK-NEXT: jmp 0x[[#STUB_HELPER_ENTRY]] +# PIE: Rebase table: +# PIE-NEXT: segment section address type +# PIE-NEXT: __DATA __la_symbol_ptr 0x[[#%X, ADDR:]] pointer +# PIE-NEXT: __DATA __la_symbol_ptr 0x[[#ADDR + 8]] pointer + .text .globl _main diff --git a/lld/test/MachO/local-got.s b/lld/test/MachO/local-got.s --- a/lld/test/MachO/local-got.s +++ b/lld/test/MachO/local-got.s @@ -6,7 +6,7 @@ # RUN: @executable_path/libhello.dylib %t/libhello.o -o %t/libhello.dylib # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o # RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/test %t/test.o -L%t -lhello -# RUN: llvm-objdump --full-contents --bind %t/test | FileCheck %s --match-full-lines +# RUN: llvm-objdump --full-contents --rebase --bind %t/test | FileCheck %s --match-full-lines ## Check that the GOT references the cstrings. --full-contents displays the ## address offset and the contents at that address very similarly, so am using @@ -20,11 +20,26 @@ # CHECK-NEXT: [[#%X,ADDR:]] 1a040000 01000000 0c040000 01000000 {{.*}} # CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}} +## Check that the rebase table is empty. +# CHECK: Rebase table: +# CHECK-NEXT: segment section address type + ## Check that a non-locally-defined symbol is still bound at the correct offset: -# CHECK: Bind table: +# CHECK-NEXT: Bind table: # CHECK-NEXT: segment section address type addend dylib symbol # CHECK-NEXT: __DATA_CONST __got 0x[[#ADDR+16]] pointer 0 libhello _hello_its_me +# RUN: lld -flavor darwinnew -pie -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/test %t/test.o -L%t -lhello +# RUN: llvm-objdump --macho --rebase --bind %t/test | FileCheck %s --check-prefix=PIE --match-full-lines +# PIE: Rebase table: +# PIE-NEXT: segment section address type +# PIE-NEXT: __DATA_CONST __got 0x[[#%X,ADDR:]] pointer +# PIE-NEXT: __DATA_CONST __got 0x[[#ADDR + 8]] pointer + +# PIE-NEXT: Bind table: +# PIE-NEXT: segment section address type addend dylib symbol +# PIE-NEXT: __DATA_CONST __got 0x[[#ADDR+16]] pointer 0 libhello _hello_its_me + .globl _main .text diff --git a/lld/test/MachO/x86-64-reloc-unsigned.s b/lld/test/MachO/x86-64-reloc-unsigned.s --- a/lld/test/MachO/x86-64-reloc-unsigned.s +++ b/lld/test/MachO/x86-64-reloc-unsigned.s @@ -1,11 +1,25 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: lld -flavor darwinnew -o %t %t.o -# RUN: llvm-objdump --full-contents %t | FileCheck %s -# CHECK: Contents of section __DATA,foo: -# CHECK: 100001000 08100000 01000000 -# CHECK: Contents of section __DATA,bar: -# CHECK: 100001008 011000f0 11211111 02000000 +# RUN: llvm-objdump --macho --rebase --full-contents %t | FileCheck %s + +# RUN: lld -flavor darwinnew -pie -o %t-pie %t.o +# RUN: llvm-objdump --macho --rebase %t-pie | FileCheck %s --check-prefix=PIE + +# CHECK: Contents of section __DATA,foo: +# CHECK-NEXT: 100001000 08100000 01000000 +# CHECK: Contents of section __DATA,bar: +# CHECK-NEXT: 100001008 011000f0 11211111 02000000 +# CHECK: Rebase table: +# CHECK-NEXT: segment section address type +# CHECK-EMPTY: + +# PIE: Rebase table: +# PIE-NEXT: segment section address type +# PIE-DAG: __DATA foo 0x[[#%X,ADDR:]] pointer +# PIE-DAG: __DATA bar 0x[[#ADDR + 8]] pointer +# PIE-DAG: __DATA bar 0x[[#ADDR + 12]] pointer +# PIE-DAG: __DATA baz 0x[[#ADDR + 20]] pointer .globl _main, _foo, _bar @@ -25,6 +39,12 @@ ## The unsigned relocation should support 64-bit addends too (r_length = 3). .quad _foo + 0x111111111 +.section __DATA,baz +## Generates a section relocation. +.quad L_.baz +L_.baz: + .space 0 + .text _main: mov $0, %rax