diff --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h --- a/lld/MachO/OutputSection.h +++ b/lld/MachO/OutputSection.h @@ -63,6 +63,8 @@ uint64_t fileOff = 0; uint32_t align = 1; uint32_t flags = 0; + uint32_t reserved1 = 0; + uint32_t reserved2 = 0; private: Kind sectionKind; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -72,6 +72,8 @@ uint32_t stubsIndex = UINT32_MAX; + uint32_t symtabIndex = UINT32_MAX; + protected: Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {} diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -33,6 +33,7 @@ constexpr const char lazyBinding[] = "__lazy_binding"; constexpr const char export_[] = "__export"; constexpr const char symbolTable[] = "__symbol_table"; +constexpr const char indirectSymbolTable[] = "__ind_sym_tab"; constexpr const char stringTable[] = "__string_table"; constexpr const char got[] = "__got"; constexpr const char threadPtrs[] = "__thread_ptrs"; @@ -391,6 +392,28 @@ std::vector symbols; }; +// The indirect symbol table is a list of 32-bit integers that serve as indices +// into the (actual) symbol table. The indirect symbol table is a +// concatentation of several sub-arrays of indices, each sub-array belonging to +// a separate section. The starting offset of each sub-array is stored in the +// reserved1 header field of the respective section. +// +// These sub-arrays provide symbol information for sections that store +// contiguous sequences of symbol references. These references can be pointers +// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g. +// function stubs). +class IndirectSymtabSection : public LinkEditSection { +public: + IndirectSymtabSection(); + void finalizeContents(); + uint32_t getNumSymbols() const; + uint64_t getRawSize() const override { + return getNumSymbols() * sizeof(uint32_t); + } + bool isNeeded() const override; + void writeTo(uint8_t *buf) const override; +}; + struct InStruct { MachHeaderSection *header = nullptr; BindingSection *binding = nullptr; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -296,7 +296,10 @@ } StubsSection::StubsSection() - : SyntheticSection(segment_names::text, "__stubs") {} + : SyntheticSection(segment_names::text, "__stubs") { + flags = MachO::S_SYMBOL_STUBS; + reserved2 = target->stubSize; +} uint64_t StubsSection::getSize() const { return entries.size() * target->stubSize; @@ -464,9 +467,12 @@ void SymtabSection::finalizeContents() { // TODO support other symbol types - for (Symbol *sym : symtab->getSymbols()) - if (isa(sym)) + for (Symbol *sym : symtab->getSymbols()) { + if (isa(sym) || sym->isInGot() || sym->isInStubs()) { + sym->symtabIndex = symbols.size(); symbols.push_back({sym, stringTableSection.addString(sym->getName())}); + } + } } void SymtabSection::writeTo(uint8_t *buf) const { @@ -486,6 +492,47 @@ } } +IndirectSymtabSection::IndirectSymtabSection() + : LinkEditSection(segment_names::linkEdit, + section_names::indirectSymbolTable) {} + +uint32_t IndirectSymtabSection::getNumSymbols() const { + return in.got->getEntries().size() + in.tlvPointers->getEntries().size() + + in.stubs->getEntries().size(); +} + +bool IndirectSymtabSection::isNeeded() const { + return in.got->isNeeded() || in.tlvPointers->isNeeded() || + in.stubs->isNeeded(); +} + +void IndirectSymtabSection::finalizeContents() { + uint32_t off = 0; + in.got->reserved1 = off; + off += in.got->getEntries().size(); + in.tlvPointers->reserved1 = off; + off += in.tlvPointers->getEntries().size(); + // There is a 1:1 correspondence between stubs and LazyPointerSection + // entries, so they can share the same sub-array in the table. + in.stubs->reserved1 = in.lazyPointers->reserved1 = off; +} + +void IndirectSymtabSection::writeTo(uint8_t *buf) const { + uint32_t off = 0; + for (const Symbol *sym : in.got->getEntries()) { + write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); + ++off; + } + for (const Symbol *sym : in.tlvPointers->getEntries()) { + write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); + ++off; + } + for (const Symbol *sym : in.stubs->getEntries()) { + write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); + ++off; + } +} + StringTableSection::StringTableSection() : LinkEditSection(segment_names::linkEdit, section_names::stringTable) {} diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -60,6 +60,7 @@ MachHeaderSection *header = nullptr; StringTableSection *stringTableSection = nullptr; SymtabSection *symtabSection = nullptr; + IndirectSymtabSection *indirectSymtabSection = nullptr; UnwindInfoSection *unwindInfoSection = nullptr; }; @@ -105,13 +106,20 @@ class LCDysymtab : public LoadCommand { public: + LCDysymtab(IndirectSymtabSection *indirectSymtabSection) + : indirectSymtabSection(indirectSymtabSection) {} + uint32_t getSize() const override { return sizeof(dysymtab_command); } void writeTo(uint8_t *buf) const override { auto *c = reinterpret_cast(buf); c->cmd = LC_DYSYMTAB; c->cmdsize = getSize(); + c->indirectsymoff = indirectSymtabSection->fileOff; + c->nindirectsyms = indirectSymtabSection->getNumSymbols(); } + + IndirectSymtabSection *indirectSymtabSection = nullptr; }; class LCSegment : public LoadCommand { @@ -163,6 +171,8 @@ sectHdr->align = Log2_32(osec->align); sectHdr->flags = osec->flags; sectHdr->size = osec->getSize(); + sectHdr->reserved1 = osec->reserved1; + sectHdr->reserved2 = osec->reserved2; } } @@ -339,7 +349,7 @@ in.header->addLoadCommand( make(in.binding, in.weakBinding, in.lazyBinding, in.exports)); in.header->addLoadCommand(make(symtabSection, stringTableSection)); - in.header->addLoadCommand(make()); + in.header->addLoadCommand(make(indirectSymtabSection)); for (StringRef path : config->runtimePaths) in.header->addLoadCommand(make(path)); @@ -438,11 +448,12 @@ .Default(0); } else if (segname == segment_names::linkEdit) { return StringSwitch(osec->name) - .Case(section_names::binding, -6) - .Case(section_names::weakBinding, -5) - .Case(section_names::lazyBinding, -4) - .Case(section_names::export_, -3) - .Case(section_names::symbolTable, -2) + .Case(section_names::binding, -7) + .Case(section_names::weakBinding, -6) + .Case(section_names::lazyBinding, -5) + .Case(section_names::export_, -4) + .Case(section_names::symbolTable, -3) + .Case(section_names::indirectSymbolTable, -2) .Case(section_names::stringTable, -1) .Default(0); } @@ -494,6 +505,7 @@ stringTableSection = make(); unwindInfoSection = make(); // TODO(gkm): only when no -r symtabSection = make(*stringTableSection); + indirectSymtabSection = make(); switch (config->outputType) { case MH_EXECUTE: @@ -614,6 +626,7 @@ in.lazyBinding->finalizeContents(); in.exports->finalizeContents(); symtabSection->finalizeContents(); + indirectSymtabSection->finalizeContents(); // Now that __LINKEDIT is filled out, do a proper calculation of its // addresses and offsets. diff --git a/lld/test/MachO/indirect-symtab.s b/lld/test/MachO/indirect-symtab.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/indirect-symtab.s @@ -0,0 +1,60 @@ +# REQUIRES: x86 +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: lld -flavor darwinnew -dylib %t/libfoo.o -o %t/libfoo.dylib -syslibroot %S/Inputs/MacOSX.sdk -lSystem +# RUN: lld -flavor darwinnew %t/test.o %t/libfoo.dylib -o %t/test -syslibroot %S/Inputs/MacOSX.sdk -lSystem +# RUN: llvm-objdump --macho -d --no-show-raw-insn --indirect-symbols %t/test | FileCheck %s + +# CHECK: (__TEXT,__text) section +# CHECK-NEXT: _main: +# CHECK-NEXT: movq {{.*}}(%rip), %rax ## literal pool symbol address: _foo +# CHECK-NEXT: movq {{.*}}(%rip), %rax ## literal pool symbol address: _bar +# CHECK-NEXT: movq {{.*}}(%rip), %rax ## literal pool symbol address: _foo_tlv +# CHECK-NEXT: movq {{.*}}(%rip), %rax ## literal pool symbol address: _bar_tlv +# CHECK-NEXT: callq {{.*}} ## symbol stub for: _foo_fn +# CHECK-NEXT: callq {{.*}} ## symbol stub for: _bar_fn +# CHECK-NEXT: retq + +# CHECK: Indirect symbols for (__TEXT,__stubs) 2 entries +# CHECK-NEXT: address index name +# CHECK-NEXT: _bar_fn +# CHECK-NEXT: _foo_fn +# CHECK-NEXT: Indirect symbols for (__DATA,__thread_ptrs) 2 entries +# CHECK-NEXT: address index name +# CHECK-NEXT: _bar_tlv +# CHECK-NEXT: _foo_tlv +# CHECK-NEXT: Indirect symbols for (__DATA,__la_symbol_ptr) 2 entries +# CHECK-NEXT: address index name +# CHECK-NEXT: _bar_fn +# CHECK-NEXT: _foo_fn +# CHECK-NEXT: Indirect symbols for (__DATA_CONST,__got) 3 entries +# CHECK-NEXT: address index name +# CHECK-NEXT: _bar +# CHECK-NEXT: _foo +# CHECK-NEXT: _stub_binder + +#--- libfoo.s + +.globl _foo, _foo_fn, _bar, _bar_fn +_foo: +_foo_fn: +_bar: +_bar_fn: + +.section __DATA,__thread_vars,thread_local_variables +.globl _foo_tlv, _bar_tlv +_foo_tlv: +_bar_tlv: + +#--- test.s + +.globl _main +_main: + movq _foo@GOTPCREL(%rip), %rax + movq _bar@GOTPCREL(%rip), %rax + mov _foo_tlv@TLVP(%rip), %rax + mov _bar_tlv@TLVP(%rip), %rax + callq _foo_fn + callq _bar_fn + ret