diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -16,6 +16,7 @@ #include "OutputSegment.h" #include "Target.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SetVector.h" #include "llvm/MC/StringTableBuilder.h" @@ -169,40 +170,34 @@ }; struct BindingEntry { - const DylibSymbol *dysym; int64_t addend; Location target; - BindingEntry(const DylibSymbol *dysym, int64_t addend, Location target) - : dysym(dysym), addend(addend), target(std::move(target)) {} + BindingEntry(int64_t addend, Location target) + : addend(addend), target(std::move(target)) {} }; +template +using BindingsMap = llvm::DenseMap>; + // Stores bind opcodes for telling dyld which symbols to load non-lazily. class BindingSection final : public LinkEditSection { public: BindingSection(); void finalizeContents() override; uint64_t getRawSize() const override { return contents.size(); } - bool isNeeded() const override { return !bindings.empty(); } + bool isNeeded() const override { return !bindingsMap.empty(); } void writeTo(uint8_t *buf) const override; void addEntry(const DylibSymbol *dysym, const InputSection *isec, uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(dysym, addend, Location(isec, offset)); + bindingsMap[dysym].emplace_back(addend, Location(isec, offset)); } private: - std::vector bindings; + BindingsMap bindingsMap; SmallVector contents; }; -struct WeakBindingEntry { - const Symbol *symbol; - int64_t addend; - Location target; - WeakBindingEntry(const Symbol *symbol, int64_t addend, Location target) - : symbol(symbol), addend(addend), target(std::move(target)) {} -}; - // Stores bind opcodes for telling dyld which weak symbols need coalescing. // There are two types of entries in this section: // @@ -219,17 +214,17 @@ void finalizeContents() override; uint64_t getRawSize() const override { return contents.size(); } bool isNeeded() const override { - return !bindings.empty() || !definitions.empty(); + return !bindingsMap.empty() || !definitions.empty(); } void writeTo(uint8_t *buf) const override; void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(symbol, addend, Location(isec, offset)); + bindingsMap[symbol].emplace_back(addend, Location(isec, offset)); } - bool hasEntry() const { return !bindings.empty(); } + bool hasEntry() const { return !bindingsMap.empty(); } void addNonWeakDefinition(const Defined *defined) { definitions.emplace_back(defined); @@ -238,7 +233,7 @@ bool hasNonWeakDefinition() const { return !definitions.empty(); } private: - std::vector bindings; + BindingsMap bindingsMap; std::vector definitions; SmallVector contents; }; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -273,7 +273,6 @@ OutputSegment *segment = nullptr; uint64_t offset = 0; int64_t addend = 0; - int16_t ordinal = 0; }; } // namespace @@ -283,9 +282,8 @@ // The bind opcode "interpreter" remembers the values of each binding field, so // we only need to encode the differences between bindings. Hence the use of // lastBinding. -static void encodeBinding(const Symbol *sym, const OutputSection *osec, - uint64_t outSecOff, int64_t addend, - bool isWeakBinding, Binding &lastBinding, +static void encodeBinding(const OutputSection *osec, uint64_t outSecOff, + int64_t addend, Binding &lastBinding, raw_svector_ostream &os) { OutputSegment *seg = osec->parent; uint64_t offset = osec->getSegmentOffset() + outSecOff; @@ -307,13 +305,7 @@ lastBinding.addend = addend; } - uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; - if (!isWeakBinding && sym->isWeakRef()) - flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; - - os << flags << sym->getName() << '\0' - << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) - << static_cast(BIND_OPCODE_DO_BIND); + os << static_cast(BIND_OPCODE_DO_BIND); // DO_BIND causes dyld to both perform the binding and increment the offset lastBinding.offset += target->wordSize; } @@ -345,6 +337,37 @@ << defined->getName() << '\0'; } +// Organize the bindings so we can encoded them with fewer opcodes. +// +// First, all bindings for a given symbol should be grouped together. +// BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it +// has an associated symbol string), so we only want to emit it once per symbol. +// +// Within each group, we sort the bindings by address. Since bindings are +// delta-encoded, sorting them allows for a more compact result. Note that +// sorting by address alone ensures that bindings for the same segment / section +// are located together, minimizing the number of times we have to emit +// BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB. +// +// Finally, we sort the symbols by the address of their first binding, again +// to facilitate the delta-encoding process. +template +std::vector>> +sortBindings(const BindingsMap &bindingsMap) { + std::vector>> bindingsVec( + bindingsMap.begin(), bindingsMap.end()); + for (auto &p : bindingsVec) { + std::vector &bindings = p.second; + llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { + return a.target.getVA() < b.target.getVA(); + }); + } + llvm::sort(bindingsVec, [](const auto &a, const auto &b) { + return a.second[0].target.getVA() < b.second[0].target.getVA(); + }); + return bindingsVec; +} + // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld // interprets to update a record with the following fields: // * segment index (of the segment to write the symbol addresses to, typically @@ -361,24 +384,30 @@ void BindingSection::finalizeContents() { raw_svector_ostream os{contents}; Binding lastBinding; - - // Since bindings are delta-encoded, sorting them allows for a more compact - // result. Note that sorting by address alone ensures that bindings for the - // same segment / section are located together. - llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { - return a.target.getVA() < b.target.getVA(); - }); - for (const BindingEntry &b : bindings) { - int16_t ordinal = ordinalForDylibSymbol(*b.dysym); - if (ordinal != lastBinding.ordinal) { + int16_t lastOrdinal = 0; + + for (auto &p : sortBindings(bindingsMap)) { + const DylibSymbol *sym = p.first; + std::vector &bindings = p.second; + llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { + return a.target.getVA() < b.target.getVA(); + }); + uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; + if (sym->isWeakRef()) + flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; + os << flags << sym->getName() << '\0' + << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); + int16_t ordinal = ordinalForDylibSymbol(*sym); + if (ordinal != lastOrdinal) { encodeDylibOrdinal(ordinal, os); - lastBinding.ordinal = ordinal; + lastOrdinal = ordinal; } - encodeBinding(b.dysym, b.target.isec->parent, - b.target.isec->getOffset(b.target.offset), b.addend, - /*isWeakBinding=*/false, lastBinding, os); + for (const BindingEntry &b : bindings) + encodeBinding(b.target.isec->parent, + b.target.isec->getOffset(b.target.offset), b.addend, + lastBinding, os); } - if (!bindings.empty()) + if (!bindingsMap.empty()) os << static_cast(BIND_OPCODE_DONE); } @@ -396,17 +425,18 @@ for (const Defined *defined : definitions) encodeWeakOverride(defined, os); - // Since bindings are delta-encoded, sorting them allows for a more compact - // result. - llvm::sort(bindings, - [](const WeakBindingEntry &a, const WeakBindingEntry &b) { - return a.target.getVA() < b.target.getVA(); - }); - for (const WeakBindingEntry &b : bindings) - encodeBinding(b.symbol, b.target.isec->parent, - b.target.isec->getOffset(b.target.offset), b.addend, - /*isWeakBinding=*/true, lastBinding, os); - if (!bindings.empty() || !definitions.empty()) + for (auto &p : sortBindings(bindingsMap)) { + const Symbol *sym = p.first; + std::vector &bindings = p.second; + os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) + << sym->getName() << '\0' + << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); + for (const BindingEntry &b : bindings) + encodeBinding(b.target.isec->parent, + b.target.isec->getOffset(b.target.offset), b.addend, + lastBinding, os); + } + if (!bindingsMap.empty() || !definitions.empty()) os << static_cast(BIND_OPCODE_DONE); } diff --git a/lld/test/MachO/bind-opcodes.s b/lld/test/MachO/bind-opcodes.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/bind-opcodes.s @@ -0,0 +1,45 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: %lld -dylib %t/foo.o -o %t/libfoo.dylib +# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib -o %t/test + +## Make sure we emit exactly one BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM per +## symbol. +# RUN: obj2yaml %t/test | FileCheck %s --implicit-check-not BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + +# CHECK: Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: Symbol: _foo + +# CHECK: Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: Symbol: _bar + +# RUN: llvm-objdump --macho --bind %t/test | FileCheck %s --check-prefix=BIND +# BIND: Bind table: +# BIND-NEXT: segment section address type addend dylib symbol +# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _foo +# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _foo +# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _bar +# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _bar +# BIND-EMPTY: + +#--- foo.s +.globl _foo, _bar +_foo: + .space 4 +_bar: + .space 4 + +#--- test.s +.data +.quad _foo +.quad _bar +.quad _foo +.quad _bar + +.globl _main +.text +_main: