diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -5,6 +5,7 @@ add_lld_library(lldMachO2 Arch/X86_64.cpp Driver.cpp + ExportTrie.cpp InputFiles.cpp InputSection.cpp OutputSegment.cpp diff --git a/lld/MachO/ExportTrie.h b/lld/MachO/ExportTrie.h new file mode 100644 --- /dev/null +++ b/lld/MachO/ExportTrie.h @@ -0,0 +1,32 @@ +//===- ExportTrie.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" + +#include + +namespace lld { +namespace macho { + +struct TrieNode; +class Symbol; + +class TrieBuilder { +public: + TrieBuilder(); + void addSymbol(const Symbol &sym); + llvm::SmallVector build(); + +private: + TrieNode *rootNode; + std::vector exported; + std::vector allNodes; +}; + +} // namespace macho +} // namespace lld diff --git a/lld/MachO/ExportTrie.cpp b/lld/MachO/ExportTrie.cpp new file mode 100644 --- /dev/null +++ b/lld/MachO/ExportTrie.cpp @@ -0,0 +1,214 @@ +//===- ExportTrie.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a partial implementation of the Mach-O export trie format. +// Documentation of the format can be found in +// llvm/tools/obj2yaml/macho2yaml.cpp. +// +//===----------------------------------------------------------------------===// + +#include "ExportTrie.h" +#include "Symbols.h" + +#include "lld/Common/Memory.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace lld; +using namespace lld::macho; + +namespace { + +struct TrieEdge : public ilist_node { + TrieEdge(StringRef s, TrieNode *node) : subString(s), child(node) {} + + StringRef subString; + struct TrieNode *child; +}; + +} // namespace + +template <> +struct llvm::ilist_alloc_traits + : llvm::ilist_noalloc_traits {}; + +namespace lld { +namespace macho { + +struct TrieNode { + typedef ilist TrieEdgeList; + + TrieNode(StringRef s) + : cumulativeString(s), address(0), trieOffset(0), hasExportInfo(false) {} + ~TrieNode() = default; + + void addSymbol(const Symbol &entry, std::vector &allNodes); + + // Push into orderedNodes the path of nodes from the root to the leaf + // containing the symbol. + void orderNodes(const Symbol &, std::vector &orderedNodes); + + bool updateOffset(uint32_t &offset); + void encode(raw_svector_ostream &out); + +private: + StringRef cumulativeString; + TrieEdgeList children; + uint64_t address; + // Estimated offset from the start of the serialized trie to the current node. + // This will converge to the true offset when updateOffset() is run to a + // fixpoint. + uint32_t trieOffset; + bool hasExportInfo; + bool ordered = false; +}; + +void TrieNode::addSymbol(const Symbol &entry, + std::vector &allNodes) { + StringRef partialStr = entry.getName().drop_front(cumulativeString.size()); + for (TrieEdge &edge : children) { + StringRef edgeStr = edge.subString; + if (partialStr.startswith(edgeStr)) { + // Already have matching edge, go down that path. + edge.child->addSymbol(entry, allNodes); + return; + } + // See if string has common prefix with existing edge. + for (size_t n = edgeStr.size() - 1; n > 0; --n) { + if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) { + // Splice in new node: was A -> C, now A -> B -> C + StringRef bNodeStr = edge.child->cumulativeString; + bNodeStr = bNodeStr.drop_back(edgeStr.size() - n).copy(bAlloc); + auto *bNode = make(bNodeStr); + allNodes.push_back(bNode); + TrieNode *cNode = edge.child; + StringRef abEdgeStr = edgeStr.substr(0, n).copy(bAlloc); + StringRef bcEdgeStr = edgeStr.substr(n).copy(bAlloc); + TrieEdge &abEdge = edge; + abEdge.subString = abEdgeStr; + abEdge.child = bNode; + auto *bcEdge = make(bcEdgeStr, cNode); + bNode->children.insert(bNode->children.end(), bcEdge); + bNode->addSymbol(entry, allNodes); + return; + } + } + } + // No commonality with any existing child, so make a new edge. + auto *newNode = make(entry.getName().copy(bAlloc)); + auto *newEdge = make(partialStr, newNode); + children.insert(children.end(), newEdge); + newNode->address = entry.getVA() + ImageBase; + newNode->hasExportInfo = true; + allNodes.push_back(newNode); +} + +void TrieNode::orderNodes(const Symbol &entry, + std::vector &orderedNodes) { + if (!ordered) { + orderedNodes.push_back(this); + ordered = true; + } + + StringRef partialStr = entry.getName().drop_front(cumulativeString.size()); + for (TrieEdge &edge : children) { + StringRef edgeStr = edge.subString; + if (partialStr.startswith(edgeStr)) { + // Already have matching edge, go down that path. + edge.child->orderNodes(entry, orderedNodes); + return; + } + } +} + +bool TrieNode::updateOffset(uint32_t &offset) { + uint32_t nodeSize = 1; // Length when no Symbol info + if (hasExportInfo) { + uint64_t flags = 0; + nodeSize = getULEB128Size(flags) + getULEB128Size(address); + // Overall node size so far is uleb128 of Symbol info + actual Symbol info. + nodeSize += getULEB128Size(nodeSize); + } + // Compute size of all child edges. + ++nodeSize; // Byte for number of children. + for (TrieEdge &edge : children) { + nodeSize += edge.subString.size() + 1 // String length. + + getULEB128Size(edge.child->trieOffset); // Offset len. + } + // On input, 'offset' is the new preferred location for this node. + bool result = (trieOffset != offset); + // Store new location in node object for use by parents. + trieOffset = offset; + offset += nodeSize; + return result; +} + +void TrieNode::encode(raw_svector_ostream &os) { + if (hasExportInfo) { + // Nodes with Symbol info: size, flags address + uint64_t flags = 0; // TODO: emit proper flags + uint32_t nodeSize = getULEB128Size(flags) + getULEB128Size(address); + assert(nodeSize < 256); + os << static_cast(nodeSize); + encodeULEB128(flags, os); + encodeULEB128(address, os); + } else { + // Node with no Symbol info. + os << static_cast(0); // nodeSize + } + // Add number of children. TODO: Handle case where we have more than 256. + assert(children.size() < 256); + os << static_cast(children.size()); + // Append each child edge substring and node offset. + for (TrieEdge &edge : children) { + os << edge.subString << '\0'; + encodeULEB128(edge.child->trieOffset, os); + } +} + +TrieBuilder::TrieBuilder() : rootNode(make(StringRef())) { + allNodes.push_back(rootNode); +} + +void TrieBuilder::addSymbol(const Symbol &sym) { + rootNode->addSymbol(sym, allNodes); + exported.push_back(&sym); +} + +SmallVector TrieBuilder::build() { + SmallVector out; + if (exported.empty()) + return out; + + std::vector orderedNodes; + orderedNodes.reserve(allNodes.size()); + for (const Symbol *sym : exported) + rootNode->orderNodes(*sym, orderedNodes); + + // Assign each node in the vector an offset in the trie stream, iterating + // until all uleb128 sizes have stabilized. + bool more; + do { + uint32_t offset = 0; + more = false; + for (TrieNode *node : orderedNodes) + more |= node->updateOffset(offset); + } while (more); + + raw_svector_ostream os{out}; + for (TrieNode *node : orderedNodes) + node->encode(os); + return out; +} + +} // namespace macho +} // namespace lld diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -8,6 +8,7 @@ #include "SyntheticSections.h" #include "Config.h" +#include "ExportTrie.h" #include "InputFiles.h" #include "OutputSegment.h" #include "SymbolTable.h" @@ -122,32 +123,12 @@ } void ExportSection::encode() { - raw_svector_ostream os{contents}; - std::vector exported; + TrieBuilder builder; for (const Symbol *sym : symtab->getSymbols()) if (auto *defined = dyn_cast(sym)) - exported.push_back(defined); - - if (exported.empty()) - return; - - if (exported.size() > 1) { - error("TODO: Unable to export more than 1 symbol"); - return; - } + builder.addSymbol(*defined); - auto *sym = exported.front(); - os << (char)0; // Indicates non-leaf node - os << (char)1; // # of children - os << sym->getName() << '\0'; - encodeULEB128(sym->getName().size() + 4, os); // Leaf offset - - // Leaf node - uint64_t addr = sym->getVA() + ImageBase; - os << (char)(1 + getULEB128Size(addr)); - os << (char)0; // Flags - encodeULEB128(addr, os); - os << (char)0; // Terminator + contents = builder.build(); } void ExportSection::writeTo(uint8_t *buf) { diff --git a/lld/test/MachO/dylib.s b/lld/test/MachO/dylib.s --- a/lld/test/MachO/dylib.s +++ b/lld/test/MachO/dylib.s @@ -28,7 +28,7 @@ ## export trie addresses match symtab addresses. .section __TEXT,__cstring -.globl _hello_world +.globl _hello_world, _goodbye_world _hello_world: .asciz "Hello world!\n" diff --git a/lld/test/MachO/export-trie.s b/lld/test/MachO/export-trie.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/export-trie.s @@ -0,0 +1,34 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -dylib %t.o -o %t.dylib + +# RUN: llvm-objdump --exports-trie -d %t.dylib | FileCheck %s --check-prefix=EXPORTS +# EXPORTS-LABEL: Exports trie: +# EXPORTS-DAG: _hello_world +# EXPORTS-DAG: _hello_its_me +# EXPORTS-DAG: _hello_its_you +## TODO: once we support proper symtab entries, add a check here to ensure +## export trie addresses match symtab addresses. + +## Check that we are sharing prefixes in the trie. +# RUN: obj2yaml %t.dylib | FileCheck %s +# CHECK-LABEL: ExportTrie: +# CHECK: Name: '' +# CHECK: Name: _hello_ +# CHECK: Name: its_ +# CHECK: Name: me +# CHECK: Name: you +# CHECK: Name: world + +.section __TEXT,__cstring +.globl _hello_world, _hello_its_me, _hello_its_you + +_hello_world: +.asciz "Hello world!\n" + +.data +_hello_its_me: +.asciz "Hello, it's me\n" + +_hello_its_you: +.asciz "Hello, it's you\n" diff --git a/lld/test/MachO/no-exports-dylib.s b/lld/test/MachO/no-exports-dylib.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/no-exports-dylib.s @@ -0,0 +1,6 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -dylib %t.o -o %t.dylib + +# RUN: obj2yaml %t.dylib | FileCheck %s +# CHECK: export_size: 0