diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -250,10 +250,13 @@ } } } else if (auto *defined = dyn_cast(sym)) { - if (defined->isWeakDef() && defined->isExternal()) - if (in.stubs->addEntry(sym)) + if (defined->isWeakDef() && defined->isExternal()) { + if (in.stubs->addEntry(sym)) { + in.rebase->addEntry(in.lazyPointers, sym->stubsIndex * WordSize); in.weakBinding->addEntry(sym, in.lazyPointers, sym->stubsIndex * WordSize); + } + } } break; } diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -35,6 +35,7 @@ bool hasReexports = false; bool allLoad = false; bool forceLoadObjC = false; + bool isPic = false; uint32_t headerPad; llvm::StringRef installName; llvm::StringRef outputFile; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -616,6 +616,7 @@ case OPT_all_load: case OPT_o: case OPT_dylib: + case OPT_pie: case OPT_e: case OPT_F: case OPT_L: @@ -636,6 +637,9 @@ } } + config->isPic = config->outputType == MH_DYLIB || + (config->outputType == MH_EXECUTE && args.hasArg(OPT_pie)); + // Now that all dylibs have been loaded, search for those that should be // re-exported. for (opt::Arg *arg : args.filtered(OPT_sub_library)) { diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -28,6 +28,7 @@ constexpr const char pageZero[] = "__pagezero"; constexpr const char common[] = "__common"; constexpr const char header[] = "__mach_header"; +constexpr const char rebase[] = "__rebase"; constexpr const char binding[] = "__binding"; constexpr const char weakBinding[] = "__weak_binding"; constexpr const char lazyBinding[] = "__lazy_binding"; @@ -149,22 +150,42 @@ using SectionPointerUnion = llvm::PointerUnion; -struct BindingTarget { - SectionPointerUnion section; - uint64_t offset; - int64_t addend; - - BindingTarget(SectionPointerUnion section, uint64_t offset, int64_t addend) - : section(section), offset(offset), addend(addend) {} +struct Location { + SectionPointerUnion section = nullptr; + uint64_t offset = 0; + Location(SectionPointerUnion section, uint64_t offset) + : section(section), offset(offset) {} uint64_t getVA() const; }; +// Stores rebase opcodes, which tell dyld where absolute addresses have been +// encoded in the binary. If the binary is not loaded at its preferred address, +// dyld has to rebase these addresses by adding an offset to them. +class RebaseSection : public LinkEditSection { +public: + RebaseSection(); + void finalizeContents(); + uint64_t getRawSize() const override { return contents.size(); } + bool isNeeded() const override { return !locations.empty(); } + void writeTo(uint8_t *buf) const override; + + void addEntry(SectionPointerUnion section, uint64_t offset) { + if (config->isPic) + locations.push_back({section, offset}); + } + +private: + std::vector locations; + SmallVector contents; +}; + struct BindingEntry { const DylibSymbol *dysym; - BindingTarget target; - BindingEntry(const DylibSymbol *dysym, BindingTarget target) - : dysym(dysym), target(std::move(target)) {} + int64_t addend; + Location target; + BindingEntry(const DylibSymbol *dysym, int64_t addend, Location target) + : dysym(dysym), addend(addend), target(std::move(target)) {} }; // Stores bind opcodes for telling dyld which symbols to load non-lazily. @@ -178,7 +199,7 @@ void addEntry(const DylibSymbol *dysym, SectionPointerUnion section, uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(dysym, BindingTarget(section, offset, addend)); + bindings.emplace_back(dysym, addend, Location(section, offset)); } private: @@ -188,9 +209,10 @@ struct WeakBindingEntry { const Symbol *symbol; - BindingTarget target; - WeakBindingEntry(const Symbol *symbol, BindingTarget target) - : symbol(symbol), target(std::move(target)) {} + int64_t addend; + Location target; + WeakBindingEntry(const Symbol *symbol, int64_t addend, Location target) + : symbol(symbol), addend(addend), target(std::move(target)) {} }; // Stores bind opcodes for telling dyld which weak symbols need coalescing. @@ -216,7 +238,7 @@ void addEntry(const Symbol *symbol, SectionPointerUnion section, uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(symbol, BindingTarget(section, offset, addend)); + bindings.emplace_back(symbol, addend, Location(section, offset)); } bool hasEntry() const { return !bindings.empty(); } @@ -412,6 +434,7 @@ struct InStruct { MachHeaderSection *header = nullptr; + RebaseSection *rebase = nullptr; BindingSection *binding = nullptr; WeakBindingSection *weakBinding = nullptr; LazyBindingSection *lazyBinding = nullptr; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -63,6 +63,9 @@ if (config->outputType == MachO::MH_DYLIB && !config->hasReexports) hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS; + if (config->outputType == MachO::MH_EXECUTE && config->isPic) + hdr->flags |= MachO::MH_PIE; + if (in.exports->hasWeakSymbol || in.weakBinding->hasNonWeakDefinition()) hdr->flags |= MachO::MH_WEAK_DEFINES; @@ -88,6 +91,97 @@ PageZeroSection::PageZeroSection() : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} +uint64_t Location::getVA() const { + if (auto *isec = section.dyn_cast()) + return isec->getVA() + offset; + return section.get()->addr + offset; +} + +RebaseSection::RebaseSection() + : LinkEditSection(segment_names::linkEdit, section_names::rebase) {} + +namespace { +struct Rebase { + OutputSegment *segment = nullptr; + uint64_t offset = 0; + uint64_t consecutiveCount = 0; +}; +} // namespace + +// Rebase opcodes allow us to describe a contiguous sequence of rebase location +// using a single DO_REBASE opcode. To take advantage of it, we delay emitting +// `DO_REBASE` until we have reached the end of a contiguous sequence. +static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) { + using namespace llvm::MachO; + assert(rebase.consecutiveCount != 0); + if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) { + os << static_cast(REBASE_OPCODE_DO_REBASE_IMM_TIMES | + rebase.consecutiveCount); + } else { + os << static_cast(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); + encodeULEB128(rebase.consecutiveCount, os); + } + rebase.consecutiveCount = 0; +} + +static void encodeRebase(const OutputSection *osec, uint64_t outSecOff, + Rebase &lastRebase, raw_svector_ostream &os) { + using namespace llvm::MachO; + OutputSegment *seg = osec->parent; + uint64_t offset = osec->getSegmentOffset() + outSecOff; + if (lastRebase.segment != seg || lastRebase.offset != offset) { + if (lastRebase.consecutiveCount != 0) + encodeDoRebase(lastRebase, os); + + if (lastRebase.segment != seg) { + os << static_cast(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | + seg->index); + encodeULEB128(offset, os); + lastRebase.segment = seg; + lastRebase.offset = offset; + } else { + assert(lastRebase.offset != offset); + os << static_cast(REBASE_OPCODE_ADD_ADDR_ULEB); + encodeULEB128(offset - lastRebase.offset, os); + lastRebase.offset = offset; + } + } + ++lastRebase.consecutiveCount; + // DO_REBASE causes dyld to both perform the binding and increment the offset + lastRebase.offset += WordSize; +} + +void RebaseSection::finalizeContents() { + using namespace llvm::MachO; + raw_svector_ostream os{contents}; + Rebase lastRebase; + + if (!locations.empty()) + os << static_cast(REBASE_OPCODE_SET_TYPE_IMM | + REBASE_TYPE_POINTER); + + llvm::sort(locations, [](const Location &a, const Location &b) { + return a.getVA() < b.getVA(); + }); + for (const Location &loc : locations) { + if (auto *isec = loc.section.dyn_cast()) { + encodeRebase(isec->parent, isec->outSecOff + loc.offset, lastRebase, os); + } else { + auto *osec = loc.section.get(); + encodeRebase(osec, loc.offset, lastRebase, os); + } + } + if (lastRebase.consecutiveCount != 0) + encodeDoRebase(lastRebase, os); + + if (!locations.empty()) + os << static_cast(REBASE_OPCODE_DONE); +} + +void RebaseSection::writeTo(uint8_t *buf) const { + memcpy(buf, contents.data(), contents.size()); +} + NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname, const char *name) : SyntheticSection(segname, name) { @@ -184,12 +278,6 @@ << defined->getName() << '\0'; } -uint64_t BindingTarget::getVA() const { - if (auto *isec = section.dyn_cast()) - return isec->getVA() + offset; - return section.get()->addr + offset; -} - // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld // interprets to update a record with the following fields: // * segment index (of the segment to write the symbol addresses to, typically @@ -217,11 +305,10 @@ encodeDylibOrdinal(b.dysym, lastBinding, os); if (auto *isec = b.target.section.dyn_cast()) { encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset, - b.target.addend, lastBinding, os); + b.addend, lastBinding, os); } else { auto *osec = b.target.section.get(); - encodeBinding(b.dysym, osec, b.target.offset, b.target.addend, - lastBinding, os); + encodeBinding(b.dysym, osec, b.target.offset, b.addend, lastBinding, os); } } if (!bindings.empty()) @@ -251,11 +338,10 @@ for (const WeakBindingEntry &b : bindings) { if (auto *isec = b.target.section.dyn_cast()) { encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset, - b.target.addend, lastBinding, os); + b.addend, lastBinding, os); } else { auto *osec = b.target.section.get(); - encodeBinding(b.symbol, osec, b.target.offset, b.target.addend, - lastBinding, os); + encodeBinding(b.symbol, osec, b.target.offset, b.addend, lastBinding, os); } } if (!bindings.empty() || !definitions.empty()) @@ -284,6 +370,7 @@ if (dysym->isWeakDef()) in.weakBinding->addEntry(sym, section, offset, addend); } else if (auto *defined = dyn_cast(sym)) { + in.rebase->addEntry(section, offset); if (defined->isWeakDef() && defined->isExternal()) in.weakBinding->addEntry(sym, section, offset, addend); } else if (isa(sym)) { @@ -407,8 +494,10 @@ } void LazyBindingSection::addEntry(DylibSymbol *dysym) { - if (entries.insert(dysym)) + if (entries.insert(dysym)) { dysym->stubsHelperIndex = entries.size() - 1; + in.rebase->addEntry(in.lazyPointers, dysym->stubsIndex * WordSize); + } } // Unlike the non-lazy binding section, the bind opcodes in this section aren't diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -63,11 +63,12 @@ // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. class LCDyldInfo : public LoadCommand { public: - LCDyldInfo(BindingSection *bindingSection, + LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection, WeakBindingSection *weakBindingSection, LazyBindingSection *lazyBindingSection, ExportSection *exportSection) - : bindingSection(bindingSection), weakBindingSection(weakBindingSection), + : rebaseSection(rebaseSection), bindingSection(bindingSection), + weakBindingSection(weakBindingSection), lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} uint32_t getSize() const override { return sizeof(dyld_info_command); } @@ -76,6 +77,10 @@ auto *c = reinterpret_cast(buf); c->cmd = LC_DYLD_INFO_ONLY; c->cmdsize = getSize(); + if (rebaseSection->isNeeded()) { + c->rebase_off = rebaseSection->fileOff; + c->rebase_size = rebaseSection->getFileSize(); + } if (bindingSection->isNeeded()) { c->bind_off = bindingSection->fileOff; c->bind_size = bindingSection->getFileSize(); @@ -94,6 +99,7 @@ } } + RebaseSection *rebaseSection; BindingSection *bindingSection; WeakBindingSection *weakBindingSection; LazyBindingSection *lazyBindingSection; @@ -335,8 +341,8 @@ } void Writer::createLoadCommands() { - in.header->addLoadCommand( - make(in.binding, in.weakBinding, in.lazyBinding, in.exports)); + in.header->addLoadCommand(make( + in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports)); in.header->addLoadCommand(make(symtabSection, stringTableSection)); in.header->addLoadCommand(make(indirectSymtabSection)); for (StringRef path : config->runtimePaths) @@ -428,6 +434,7 @@ return -1; } else if (segname == segment_names::linkEdit) { return StringSwitch(osec->name) + .Case(section_names::rebase, -8) .Case(section_names::binding, -7) .Case(section_names::weakBinding, -6) .Case(section_names::lazyBinding, -5) @@ -596,6 +603,7 @@ assignAddresses(seg); // Fill __LINKEDIT contents. + in.rebase->finalizeContents(); in.binding->finalizeContents(); in.weakBinding->finalizeContents(); in.lazyBinding->finalizeContents(); @@ -621,6 +629,7 @@ void macho::createSyntheticSections() { in.header = make(); + in.rebase = make(); in.binding = make(); in.weakBinding = make(); in.lazyBinding = make(); diff --git a/lld/test/MachO/dylink-lazy.s b/lld/test/MachO/dylink-lazy.s --- a/lld/test/MachO/dylink-lazy.s +++ b/lld/test/MachO/dylink-lazy.s @@ -19,9 +19,13 @@ ## symbol each entry points to. So we call objdump twice in order to get the ## disassembly of __text and the bind tables first, which allow us to check for ## matching entries in __stubs. -# RUN: (llvm-objdump -d --no-show-raw-insn --syms --bind --lazy-bind %t/dylink-lazy; \ +# RUN: (llvm-objdump -d --no-show-raw-insn --syms --rebase --bind --lazy-bind %t/dylink-lazy; \ # RUN: llvm-objdump -D --no-show-raw-insn %t/dylink-lazy) | FileCheck %s +# RUN: lld -flavor darwinnew -pie -o %t/dylink-lazy-pie \ +# RUN: -L%S/Inputs/MacOSX.sdk/usr/lib -L%t -lhello -lgoodbye %t/dylink-lazy.o -lSystem +# RUN: llvm-objdump --macho --rebase %t/dylink-lazy-pie | FileCheck %s --check-prefix=PIE + # CHECK-LABEL: SYMBOL TABLE: # CHECK: {{0*}}[[#%x, IMGLOADER:]] {{.*}} __DATA,__data __dyld_private @@ -29,7 +33,11 @@ # CHECK: callq 0x[[#%x, HELLO_STUB:]] # CHECK-NEXT: callq 0x[[#%x, GOODBYE_STUB:]] -# CHECK-LABEL: Bind table: +## Check that the rebase table is empty. +# CHECK-LABEL: Rebase table: +# CHECK-NEXT: segment section address type + +# CHECK-NEXT: Bind table: # CHECK: __DATA_CONST __got 0x[[#%x, BINDER:]] pointer 0 libSystem dyld_stub_binder # CHECK-LABEL: Lazy bind table: @@ -51,6 +59,11 @@ # CHECK-NEXT: pushq $21 # CHECK-NEXT: jmp 0x[[#STUB_HELPER_ENTRY]] +# PIE: Rebase table: +# PIE-NEXT: segment section address type +# PIE-NEXT: __DATA __la_symbol_ptr 0x[[#%X, ADDR:]] pointer +# PIE-NEXT: __DATA __la_symbol_ptr 0x[[#ADDR + 8]] pointer + .text .globl _main diff --git a/lld/test/MachO/local-got.s b/lld/test/MachO/local-got.s --- a/lld/test/MachO/local-got.s +++ b/lld/test/MachO/local-got.s @@ -6,7 +6,7 @@ # RUN: @executable_path/libhello.dylib %t/libhello.o -o %t/libhello.dylib # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o # RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/test %t/test.o -L%t -lhello -# RUN: llvm-objdump --full-contents --bind %t/test | FileCheck %s --match-full-lines +# RUN: llvm-objdump --full-contents --rebase --bind %t/test | FileCheck %s --match-full-lines ## Check that the GOT references the cstrings. --full-contents displays the ## address offset and the contents at that address very similarly, so am using @@ -20,11 +20,26 @@ # CHECK-NEXT: [[#%X,ADDR:]] 1a040000 01000000 0c040000 01000000 {{.*}} # CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}} +## Check that the rebase table is empty. +# CHECK: Rebase table: +# CHECK-NEXT: segment section address type + ## Check that a non-locally-defined symbol is still bound at the correct offset: -# CHECK: Bind table: +# CHECK-NEXT: Bind table: # CHECK-NEXT: segment section address type addend dylib symbol # CHECK-NEXT: __DATA_CONST __got 0x[[#ADDR+16]] pointer 0 libhello _hello_its_me +# RUN: lld -flavor darwinnew -pie -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/test %t/test.o -L%t -lhello +# RUN: llvm-objdump --macho --rebase --bind %t/test | FileCheck %s --check-prefix=PIE --match-full-lines +# PIE: Rebase table: +# PIE-NEXT: segment section address type +# PIE-NEXT: __DATA_CONST __got 0x[[#%X,ADDR:]] pointer +# PIE-NEXT: __DATA_CONST __got 0x[[#ADDR + 8]] pointer + +# PIE-NEXT: Bind table: +# PIE-NEXT: segment section address type addend dylib symbol +# PIE-NEXT: __DATA_CONST __got 0x[[#ADDR+16]] pointer 0 libhello _hello_its_me + .globl _main .text diff --git a/lld/test/MachO/x86-64-reloc-unsigned.s b/lld/test/MachO/x86-64-reloc-unsigned.s --- a/lld/test/MachO/x86-64-reloc-unsigned.s +++ b/lld/test/MachO/x86-64-reloc-unsigned.s @@ -1,11 +1,24 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: lld -flavor darwinnew -o %t %t.o -# RUN: llvm-objdump --full-contents %t | FileCheck %s -# CHECK: Contents of section foo: -# CHECK: 100001000 08100000 01000000 -# CHECK: Contents of section bar: -# CHECK: 100001008 011000f0 11211111 02000000 +# RUN: llvm-objdump --macho --rebase --full-contents %t | FileCheck %s + +# RUN: lld -flavor darwinnew -pie -o %t-pie %t.o +# RUN: llvm-objdump --macho --rebase %t-pie | FileCheck %s --check-prefix=PIE + +# CHECK: Contents of section foo: +# CHECK-NEXT: 100001000 08100000 01000000 +# CHECK: Contents of section bar: +# CHECK-NEXT: 100001008 011000f0 11211111 02000000 +# CHECK: Rebase table: +# CHECK-NEXT: segment section address type +# CHECK-EMPTY: + +# PIE: Rebase table: +# PIE-NEXT: segment section address type +# PIE-DAG: __DATA foo 0x[[#%X,ADDR:]] pointer +# PIE-DAG: __DATA bar 0x[[#ADDR + 8]] pointer +# PIE-DAG: __DATA bar 0x[[#ADDR + 12]] pointer .globl _main, _foo, _bar