diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -9,6 +9,7 @@ #ifndef LLD_MACHO_CONFIG_H #define LLD_MACHO_CONFIG_H +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" @@ -18,6 +19,7 @@ namespace macho { class Symbol; +struct SymbolPriorityEntry; struct Configuration { Symbol *entry; @@ -26,6 +28,17 @@ llvm::StringRef outputFile; llvm::MachO::HeaderFileType outputType; std::vector searchPaths; + llvm::StringMap priorities; +}; + +// The symbol with the highest priority should be ordered first in the output +// section (modulo input section contiguity constraints). Using priority +// (highest first) instead of order (lowest first) has the convenient property +// that the default-constructed zero priority -- for symbols/sections without a +// user-defined order -- naturally end up putting them at the end of the output. +struct SymbolPriorityEntry { + size_t anyObjectFile = 0; + llvm::StringMap objectFiles; }; extern Configuration *config; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -24,6 +24,7 @@ #include "lld/Common/Version.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Option/ArgList.h" @@ -116,6 +117,64 @@ } } +static bool isArchString(StringRef s) { + static StringSet<> archNames{"ppc", "ppc64", "i386", + "x86_64", "arm", "arm64"}; + return archNames.find(s) != archNames.end(); +} + +// An order file has one entry per line, in the following format: +// +// :: +// +// and are optional. If not specified, then that entry +// matches any symbol of that name. +// +// If a symbol is matched by multiple entries, then it takes the lowest-ordered +// entry (the one nearest to the front of the list.) +// +// The file can also have line comments that start with '#'. +void parseOrderFile(StringRef path) { + Optional buffer = readFile(path); + if (!buffer) { + error("Could not read order file at " + path); + return; + } + + MemoryBufferRef mbref = *buffer; + size_t priority = std::numeric_limits::max(); + for (StringRef rest : args::getLines(mbref)) { + StringRef symbol, objectFile; + while (rest != "") { + std::pair p = getToken(rest, ": \t\n\v\f\r"); + StringRef tok = p.first; + rest = p.second; + + // Check if we have a comment + if (tok == "" || tok[0] == '#') + break; + + if (isArchString(tok) && tok != "x86_64") + continue; + + if (tok.endswith(".o")) + objectFile = tok; + else + symbol = tok; + } + + if (!symbol.empty()) { + SymbolPriorityEntry &entry = config->priorities[symbol]; + if (!objectFile.empty()) + entry.objectFiles.insert(std::make_pair(objectFile, priority)); + else + entry.anyObjectFile = std::max(entry.anyObjectFile, priority); + } + + --priority; + } +} + // We expect sub-library names of the form "libfoo", which will match a dylib // with a path of .*/libfoo.dylib. static bool markSubLibrary(StringRef searchName) { @@ -183,6 +242,10 @@ error("-sub_library " + searchName + " does not match a supplied dylib"); } + StringRef orderFile = args.getLastArgValue(OPT_order_file); + if (!orderFile.empty()) + parseOrderFile(orderFile); + // dyld requires us to load libSystem. Since we may run tests on non-OSX // systems which do not have libSystem, we mock it out here. // TODO: Replace this with a stub tbd file once we have TAPI support. diff --git a/lld/MachO/MergedOutputSection.h b/lld/MachO/MergedOutputSection.h --- a/lld/MachO/MergedOutputSection.h +++ b/lld/MachO/MergedOutputSection.h @@ -22,7 +22,7 @@ // in the final binary. class MergedOutputSection : public OutputSection { public: - MergedOutputSection(StringRef name) : OutputSection(name) {} + MergedOutputSection(StringRef name) : OutputSection(MergedKind, name) {} const InputSection *firstSection() const { return inputs.front(); } const InputSection *lastSection() const { return inputs.back(); } @@ -38,6 +38,10 @@ std::vector inputs; + static bool classof(const OutputSection *sec) { + return sec->kind() == MergedKind; + } + private: void mergeFlags(uint32_t inputFlags); diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -23,6 +23,10 @@ def o: Separate<["-"], "o">, MetaVarName<"">, HelpText<"Path to file to write output">; +def order_file: Separate<["-"], "order_file">, + HelpText<"Lay out symbols within each section in the order specified by the " + "order file">; + def sub_library: Separate<["-"], "sub_library">, MetaVarName<"">, HelpText<"Re-export the specified dylib">; diff --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h --- a/lld/MachO/OutputSection.h +++ b/lld/MachO/OutputSection.h @@ -24,8 +24,14 @@ // linker with the same segment / section name. class OutputSection { public: - OutputSection(StringRef name) : name(name) {} + enum Kind { + MergedKind, + SyntheticKind, + }; + + OutputSection(Kind kind, StringRef name) : name(name), sectionKind(kind) {} virtual ~OutputSection() = default; + Kind kind() const { return sectionKind; } // These accessors will only be valid after finalizing the section. uint64_t getSegmentOffset() const; @@ -60,6 +66,9 @@ uint64_t fileOff = 0; uint32_t align = 1; uint32_t flags = 0; + +private: + Kind sectionKind; }; class OutputSectionComparator { diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -93,7 +93,6 @@ OutputSegment *getOutputSegment(StringRef name); OutputSegment *getOrCreateOutputSegment(StringRef name); -void sortOutputSegmentsAndSections(); } // namespace macho } // namespace lld diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -129,23 +129,3 @@ outputSegments.push_back(segRef); return segRef; } - -void macho::sortOutputSegmentsAndSections() { - // Sorting only can happen once all outputs have been collected. - // Since output sections are grouped by segment, sorting happens - // first over all segments, then over sections per segment. - auto comparator = OutputSegmentComparator(); - llvm::stable_sort(outputSegments, comparator); - - // Now that the output sections are sorted, assign the final - // output section indices. - uint32_t sectionIndex = 0; - for (OutputSegment *seg : outputSegments) { - seg->sortOutputSections(&comparator); - for (auto &p : seg->getSections()) { - OutputSection *section = p.second; - if (!section->isHidden()) - section->index = ++sectionIndex; - } - } -} diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -40,6 +40,10 @@ public: SyntheticSection(const char *segname, const char *name); virtual ~SyntheticSection() = default; + + static bool classof(const OutputSection *sec) { + return sec->kind() == SyntheticKind; + } }; // The header of the Mach-O file, which must have a file offset of zero. diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -28,7 +28,7 @@ namespace macho { SyntheticSection::SyntheticSection(const char *segname, const char *name) - : OutputSection(name) { + : OutputSection(SyntheticKind, name) { // Synthetic sections always know which segment they belong to so hook // them up when they're made getOrCreateOutputSegment(segname)->addOutputSection(this); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -10,6 +10,8 @@ #include "Config.h" #include "InputFiles.h" #include "InputSection.h" +#include "MergedOutputSection.h" +#include "OutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" @@ -21,6 +23,7 @@ #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" using namespace llvm; using namespace llvm::MachO; @@ -293,6 +296,74 @@ } } +static size_t getSymbolPriority(const SymbolPriorityEntry &entry, + const InputFile &file) { + return std::max(entry.objectFiles.lookup(sys::path::filename(file.getName())), + entry.anyObjectFile); +} + +// Each section gets assigned the priority of the highest-priority symbol it +// contains. +static DenseMap buildInputSectionPriorities() { + DenseMap sectionPriorities; + + if (config->priorities.empty()) + return sectionPriorities; + + auto addSym = [&](Defined &sym) { + auto it = config->priorities.find(sym.getName()); + if (it == config->priorities.end()) + return; + + SymbolPriorityEntry &entry = it->second; + size_t &priority = sectionPriorities[sym.isec]; + priority = std::max(priority, getSymbolPriority(entry, *sym.isec->file)); + }; + + for (InputFile *file : inputFiles) { + // TODO: handle archive files + if (auto objFile = dyn_cast(file)) { + for (Symbol *sym : file->symbols) + if (auto *d = dyn_cast(sym)) + addSym(*d); + } + } + + return sectionPriorities; +} + +// Sorting only can happen once all outputs have been collected. Here we sort +// segments, output sections within each segment, and input sections within each +// output segment. +static void sortSegmentsAndSections() { + auto comparator = OutputSegmentComparator(); + llvm::stable_sort(outputSegments, comparator); + + DenseMap isecPriorities = + buildInputSectionPriorities(); + + uint32_t sectionIndex = 0; + for (OutputSegment *seg : outputSegments) { + seg->sortOutputSections(&comparator); + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + // Now that the output sections are sorted, assign the final + // output section indices. + if (!section->isHidden()) + section->index = ++sectionIndex; + + if (!isecPriorities.empty()) { + if (auto *merged = dyn_cast(section)) { + llvm::stable_sort(merged->inputs, + [&](InputSection *a, InputSection *b) { + return isecPriorities[a] > isecPriorities[b]; + }); + } + } + } + } +} + void Writer::createOutputSections() { // First, create hidden sections headerSection = make(); @@ -383,9 +454,9 @@ in.stubHelper->setup(); // Sort and assign sections to their respective segments. No more sections nor - // segments may be created after this method runs. + // segments may be created after these methods run. createOutputSections(); - sortOutputSegmentsAndSections(); + sortSegmentsAndSections(); createLoadCommands(); diff --git a/lld/test/MachO/order-file.s b/lld/test/MachO/order-file.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/order-file.s @@ -0,0 +1,66 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/order-file.o +# RUN: echo ".globl _foo, _bar; .text:; _foo: _bar: ret" | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/foo.o + +# FOO-FIRST: <_foo>: +# FOO-FIRST: <_main>: + +# FOO-SECOND: <_main>: +# FOO-SECOND: <_foo>: + +# RUN: echo "_foo # just a comment" > %t/ord1 +# RUN: echo "_main # another comment" >> %t/ord1 +# RUN: lld -flavor darwinnew -o %t/order-file-1 %t/order-file.o %t/foo.o -order_file %t/ord1 +# RUN: llvm-objdump -d %t/order-file-1 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: echo "_main # just a comment" > %t/ord2 +# RUN: echo "_foo # another comment" >> %t/ord2 +# RUN: lld -flavor darwinnew -o %t/order-file-2 %t/order-file.o %t/foo.o -order_file %t/ord2 +# RUN: llvm-objdump -d %t/order-file-2 | FileCheck %s --check-prefix=FOO-SECOND + +# RUN: echo "bar.o:_foo" > %t/ord3 +# RUN: echo "_main" >> %t/ord3 +# RUN: echo "_foo" >> %t/ord3 +# RUN: lld -flavor darwinnew -o %t/order-file-3 %t/order-file.o %t/foo.o -order_file %t/ord3 +# RUN: llvm-objdump -d %t/order-file-3 | FileCheck %s --check-prefix=FOO-SECOND + +## The following tests check that if an address is matched by multiple order +## file entries, it should always use the lowest-ordered match. + +# RUN: echo "_foo" > %t/ord4 +# RUN: echo "_main" >> %t/ord4 +# RUN: echo "foo.o:_foo" >> %t/ord4 +# RUN: lld -flavor darwinnew -o %t/order-file-4 %t/order-file.o %t/foo.o -order_file %t/ord4 +# RUN: llvm-objdump -d %t/order-file-4 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: echo "foo.o:_foo" > %t/ord5 +# RUN: echo "_main" >> %t/ord5 +# RUN: echo "_foo" >> %t/ord5 +# RUN: lld -flavor darwinnew -o %t/order-file-5 %t/order-file.o %t/foo.o -order_file %t/ord5 +# RUN: llvm-objdump -d %t/order-file-5 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: echo "_foo" > %t/ord6 +# RUN: echo "_main" >> %t/ord6 +# RUN: echo "_foo" >> %t/ord6 +# RUN: lld -flavor darwinnew -o %t/order-file-6 %t/order-file.o %t/foo.o -order_file %t/ord6 +# RUN: llvm-objdump -d %t/order-file-6 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: echo "foo.o:_foo" > %t/ord7 +# RUN: echo "_main" >> %t/ord7 +# RUN: echo "foo.o:_foo" >> %t/ord7 +# RUN: lld -flavor darwinnew -o %t/order-file-7 %t/order-file.o %t/foo.o -order_file %t/ord7 +# RUN: llvm-objdump -d %t/order-file-7 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: echo "_bar" > %t/ord8 +# RUN: echo "_main" >> %t/ord8 +# RUN: echo "_foo" >> %t/ord8 +# RUN: lld -flavor darwinnew -o %t/order-file-8 %t/order-file.o %t/foo.o -order_file %t/ord8 +# RUN: llvm-objdump -d %t/order-file-8 | FileCheck %s --check-prefix=FOO-FIRST + +.text +.globl _main + +_main: + ret