diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -9,6 +9,7 @@ #ifndef LLD_MACHO_CONFIG_H #define LLD_MACHO_CONFIG_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" @@ -18,6 +19,7 @@ namespace macho { class Symbol; +struct SymbolPriorityEntry; struct Configuration { Symbol *entry; @@ -26,6 +28,21 @@ llvm::StringRef outputFile; llvm::MachO::HeaderFileType outputType; std::vector searchPaths; + llvm::DenseMap priorities; +}; + +// The symbol with the highest priority should be ordered first in the output +// section (modulo input section contiguity constraints). Using priority +// (highest first) instead of order (lowest first) has the convenient property +// that the default-constructed zero priority -- for symbols/sections without a +// user-defined order -- naturally ends up putting them at the end of the +// output. +struct SymbolPriorityEntry { + // The priority given to a matching symbol, regardless of which object file + // it originated from. + size_t anyObjectFile = 0; + // The priority given to a matching symbol from a particular object file. + llvm::DenseMap objectFiles; }; extern Configuration *config; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -22,6 +22,7 @@ #include "lld/Common/LLVM.h" #include "lld/Common/Memory.h" #include "lld/Common/Version.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" @@ -127,6 +128,99 @@ } } +static std::vector archNames{"arm", "arm64", "i386", + "x86_64", "ppc", "ppc64"}; +static bool isArchString(StringRef s) { + static DenseSet archNamesSet(archNames.begin(), archNames.end()); + return archNamesSet.find(s) != archNamesSet.end(); +} + +// An order file has one entry per line, in the following format: +// +// :: +// +// and are optional. If not specified, then that entry +// matches any symbol of that name. +// +// If a symbol is matched by multiple entries, then it takes the lowest-ordered +// entry (the one nearest to the front of the list.) +// +// The file can also have line comments that start with '#'. +void parseOrderFile(StringRef path) { + Optional buffer = readFile(path); + if (!buffer) { + error("Could not read order file at " + path); + return; + } + + MemoryBufferRef mbref = *buffer; + size_t priority = std::numeric_limits::max(); + for (StringRef rest : args::getLines(mbref)) { + StringRef arch, objectFile, symbol; + + std::array fields; + uint8_t fieldCount = 0; + while (rest != "" && fieldCount < 3) { + std::pair p = getToken(rest, ": \t\n\v\f\r"); + StringRef tok = p.first; + rest = p.second; + + // Check if we have a comment + if (tok == "" || tok[0] == '#') + break; + + fields[fieldCount++] = tok; + } + + switch (fieldCount) { + case 3: + arch = fields[0]; + objectFile = fields[1]; + symbol = fields[2]; + break; + case 2: + objectFile = fields[0]; + symbol = fields[1]; + break; + case 1: + symbol = fields[0]; + break; + case 0: + break; + default: + llvm_unreachable("too many fields in order file"); + } + + if (!arch.empty()) { + if (!isArchString(arch)) { + error("invalid arch \"" + arch + "\" in order file: expected one of " + + llvm::join(archNames, ", ")); + continue; + } + + // TODO: Update when we extend support for other archs + if (arch != "x86_64") + continue; + } + + if (!objectFile.empty() && !objectFile.endswith(".o")) { + error("invalid object file name \"" + objectFile + + "\" in order file: should end with .o"); + continue; + } + + if (!symbol.empty()) { + SymbolPriorityEntry &entry = config->priorities[symbol]; + if (!objectFile.empty()) + entry.objectFiles.insert(std::make_pair(objectFile, priority)); + else + entry.anyObjectFile = std::max(entry.anyObjectFile, priority); + } + + --priority; + } +} + // We expect sub-library names of the form "libfoo", which will match a dylib // with a path of .*/libfoo.dylib. static bool markSubLibrary(StringRef searchName) { @@ -212,6 +306,10 @@ error("-sub_library " + searchName + " does not match a supplied dylib"); } + StringRef orderFile = args.getLastArgValue(OPT_order_file); + if (!orderFile.empty()) + parseOrderFile(orderFile); + // dyld requires us to load libSystem. Since we may run tests on non-OSX // systems which do not have libSystem, we mock it out here. // TODO: Replace this with a stub tbd file once we have TAPI support. diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -323,6 +323,7 @@ ": could not get the buffer for the member defining symbol " + sym.getName()); auto file = make(mb); + symbols.insert(symbols.end(), file->symbols.begin(), file->symbols.end()); sections.insert(sections.end(), file->sections.begin(), file->sections.end()); } diff --git a/lld/MachO/MergedOutputSection.h b/lld/MachO/MergedOutputSection.h --- a/lld/MachO/MergedOutputSection.h +++ b/lld/MachO/MergedOutputSection.h @@ -22,7 +22,7 @@ // in the final binary. class MergedOutputSection : public OutputSection { public: - MergedOutputSection(StringRef name) : OutputSection(name) {} + MergedOutputSection(StringRef name) : OutputSection(MergedKind, name) {} const InputSection *firstSection() const { return inputs.front(); } const InputSection *lastSection() const { return inputs.back(); } @@ -38,6 +38,10 @@ std::vector inputs; + static bool classof(const OutputSection *sec) { + return sec->kind() == MergedKind; + } + private: void mergeFlags(uint32_t inputFlags); diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -23,6 +23,10 @@ def o: Separate<["-"], "o">, MetaVarName<"">, HelpText<"Path to file to write output">; +def order_file: Separate<["-"], "order_file">, + HelpText<"Lay out symbols within each section in the order specified by the " + "order file">; + def sub_library: Separate<["-"], "sub_library">, MetaVarName<"">, HelpText<"Re-export the specified dylib">; diff --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h --- a/lld/MachO/OutputSection.h +++ b/lld/MachO/OutputSection.h @@ -24,8 +24,14 @@ // linker with the same segment / section name. class OutputSection { public: - OutputSection(StringRef name) : name(name) {} + enum Kind { + MergedKind, + SyntheticKind, + }; + + OutputSection(Kind kind, StringRef name) : name(name), sectionKind(kind) {} virtual ~OutputSection() = default; + Kind kind() const { return sectionKind; } // These accessors will only be valid after finalizing the section. uint64_t getSegmentOffset() const; @@ -60,6 +66,9 @@ uint64_t fileOff = 0; uint32_t align = 1; uint32_t flags = 0; + +private: + Kind sectionKind; }; class OutputSectionComparator { diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -93,7 +93,6 @@ OutputSegment *getOutputSegment(StringRef name); OutputSegment *getOrCreateOutputSegment(StringRef name); -void sortOutputSegmentsAndSections(); } // namespace macho } // namespace lld diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -129,23 +129,3 @@ outputSegments.push_back(segRef); return segRef; } - -void macho::sortOutputSegmentsAndSections() { - // Sorting only can happen once all outputs have been collected. - // Since output sections are grouped by segment, sorting happens - // first over all segments, then over sections per segment. - auto comparator = OutputSegmentComparator(); - llvm::stable_sort(outputSegments, comparator); - - // Now that the output sections are sorted, assign the final - // output section indices. - uint32_t sectionIndex = 0; - for (OutputSegment *seg : outputSegments) { - seg->sortOutputSections(&comparator); - for (auto &p : seg->getSections()) { - OutputSection *section = p.second; - if (!section->isHidden()) - section->index = ++sectionIndex; - } - } -} diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -40,6 +40,10 @@ public: SyntheticSection(const char *segname, const char *name); virtual ~SyntheticSection() = default; + + static bool classof(const OutputSection *sec) { + return sec->kind() == SyntheticKind; + } }; // The header of the Mach-O file, which must have a file offset of zero. diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -28,7 +28,7 @@ namespace macho { SyntheticSection::SyntheticSection(const char *segname, const char *name) - : OutputSection(name) { + : OutputSection(SyntheticKind, name) { // Synthetic sections always know which segment they belong to so hook // them up when they're made getOrCreateOutputSegment(segname)->addOutputSection(this); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -10,6 +10,8 @@ #include "Config.h" #include "InputFiles.h" #include "InputSection.h" +#include "MergedOutputSection.h" +#include "OutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" @@ -21,6 +23,7 @@ #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" using namespace llvm; using namespace llvm::MachO; @@ -293,6 +296,72 @@ } } +static size_t getSymbolPriority(const SymbolPriorityEntry &entry, + const InputFile &file) { + return std::max(entry.objectFiles.lookup(sys::path::filename(file.getName())), + entry.anyObjectFile); +} + +// Each section gets assigned the priority of the highest-priority symbol it +// contains. +static DenseMap buildInputSectionPriorities() { + DenseMap sectionPriorities; + + if (config->priorities.empty()) + return sectionPriorities; + + auto addSym = [&](Defined &sym) { + auto it = config->priorities.find(sym.getName()); + if (it == config->priorities.end()) + return; + + SymbolPriorityEntry &entry = it->second; + size_t &priority = sectionPriorities[sym.isec]; + priority = std::max(priority, getSymbolPriority(entry, *sym.isec->file)); + }; + + // TODO: Make sure this handles weak symbols correctly. + for (InputFile *file : inputFiles) + if (isa(file) || isa(file)) + for (Symbol *sym : file->symbols) + if (auto *d = dyn_cast(sym)) + addSym(*d); + + return sectionPriorities; +} + +// Sorting only can happen once all outputs have been collected. Here we sort +// segments, output sections within each segment, and input sections within each +// output segment. +static void sortSegmentsAndSections() { + auto comparator = OutputSegmentComparator(); + llvm::stable_sort(outputSegments, comparator); + + DenseMap isecPriorities = + buildInputSectionPriorities(); + + uint32_t sectionIndex = 0; + for (OutputSegment *seg : outputSegments) { + seg->sortOutputSections(&comparator); + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + // Now that the output sections are sorted, assign the final + // output section indices. + if (!section->isHidden()) + section->index = ++sectionIndex; + + if (!isecPriorities.empty()) { + if (auto *merged = dyn_cast(section)) { + llvm::stable_sort(merged->inputs, + [&](InputSection *a, InputSection *b) { + return isecPriorities[a] > isecPriorities[b]; + }); + } + } + } + } +} + void Writer::createOutputSections() { // First, create hidden sections headerSection = make(); @@ -383,9 +452,9 @@ in.stubHelper->setup(); // Sort and assign sections to their respective segments. No more sections nor - // segments may be created after this method runs. + // segments may be created after these methods run. createOutputSections(); - sortOutputSegmentsAndSections(); + sortSegmentsAndSections(); createLoadCommands(); diff --git a/lld/test/MachO/invalid/order-file-bad-arch.test b/lld/test/MachO/invalid/order-file-bad-arch.test new file mode 100644 --- /dev/null +++ b/lld/test/MachO/invalid/order-file-bad-arch.test @@ -0,0 +1,9 @@ +# REQUIRES: x86 +# RUN: echo ".globl _main; .text; _main: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t.o +# RUN: not lld -flavor darwinnew -o %t %t.o -order_file %s 2>&1 | FileCheck %s +# CHECK: error: invalid arch "sparc" in order file: expected one of arm, arm64, i386, x86_64, ppc, ppc64 +# CHECK-EMPTY: + +_barsymbol +sparc:hello.o:_foosymbol +i386:hello.o:_foosymbol diff --git a/lld/test/MachO/invalid/order-file-bad-objfile.test b/lld/test/MachO/invalid/order-file-bad-objfile.test new file mode 100644 --- /dev/null +++ b/lld/test/MachO/invalid/order-file-bad-objfile.test @@ -0,0 +1,10 @@ +# REQUIRES: x86 +# RUN: echo ".globl _main; .text; _main: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t.o +# RUN: not lld -flavor darwinnew -o %t %t.o -order_file %s 2>&1 | FileCheck %s +# CHECK: invalid object file name "helloo" in order file: should end with .o +# CHECK: invalid object file name "foo" in order file: should end with .o +# CHECK-EMPTY: + +_barsymbol +x86_64:helloo:_foosymbol +foo:_foosymbol diff --git a/lld/test/MachO/order-file.s b/lld/test/MachO/order-file.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/order-file.s @@ -0,0 +1,109 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: echo ".globl _foo; .text; _foo: _bar: ret" | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/foo.o +# RUN: rm -f %t/foo.a +# RUN: llvm-ar rcs %t/foo.a %t/foo.o + +# FOO-FIRST: <_foo>: +# FOO-FIRST: <_main>: + +# FOO-SECOND: <_main>: +# FOO-SECOND: <_foo>: + +# RUN: echo "_foo # just a comment" > %t/ord-1 +# RUN: echo "_main # another comment" >> %t/ord-1 +# RUN: lld -flavor darwinnew -o %t/test-1 %t/test.o %t/foo.o -order_file %t/ord-1 +# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST +## Output should be the same regardless of the command-line order of object files +# RUN: lld -flavor darwinnew -o %t/test-1 %t/foo.o %t/test.o -order_file %t/ord-1 +# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: echo "_main # just a comment" > %t/ord-2 +# RUN: echo "_foo # another comment" >> %t/ord-2 +# RUN: lld -flavor darwinnew -o %t/test-2 %t/test.o %t/foo.o -order_file %t/ord2 +# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-SECOND +# RUN: lld -flavor darwinnew -o %t/test-2 %t/foo.o %t/test.o -order_file %t/ord2 +# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-SECOND + +# RUN: echo "foo.o:_foo" > %t/ord-file-match +# RUN: echo "_main" >> %t/ord-file-match +# RUN: lld -flavor darwinnew -o %t/test-file-match %t/test.o %t/foo.o -order_file %t/ord-file-match +# RUN: llvm-objdump -d %t/test-file-match | FileCheck %s --check-prefix=FOO-FIRST +## Output should be the same regardless of the command-line order of object files +# RUN: lld -flavor darwinnew -o %t/test-file-match %t/foo.o %t/test.o -order_file %t/ord-file-match +# RUN: llvm-objdump -d %t/test-file-match | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: echo "bar.o:_foo" > %t/ord-file-nomatch +# RUN: echo "_main" >> %t/ord-file-nomatch +# RUN: echo "_foo" >> %t/ord-file-nomatch +# RUN: lld -flavor darwinnew -o %t/test-file-nomatch %t/test.o %t/foo.o -order_file %t/ord-file-nomatch +# RUN: llvm-objdump -d %t/test-file-nomatch | FileCheck %s --check-prefix=FOO-SECOND +# RUN: lld -flavor darwinnew -o %t/test-file-nomatch %t/foo.o %t/test.o -order_file %t/ord-file-nomatch +# RUN: llvm-objdump -d %t/test-file-nomatch | FileCheck %s --check-prefix=FOO-SECOND + +## Test archives + +# RUN: lld -flavor darwinnew -o %t/test-archive-1 %t/test.o %t/foo.a -order_file %t/ord1 +# RUN: llvm-objdump -d %t/test-archive-1 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: lld -flavor darwinnew -o %t/test-archive %t/foo.a %t/test.o -order_file %t/ord1 +# RUN: llvm-objdump -d %t/test-archive-1 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: lld -flavor darwinnew -o %t/test-archive-file-no-match %t/test.o %t/foo.a -order_file %t/ord-file-nomatch +# RUN: llvm-objdump -d %t/test-archive-file-no-match | FileCheck %s --check-prefix=FOO-SECOND +# RUN: lld -flavor darwinnew -o %t/test-archive %t/foo.a %t/test.o -order_file %t/ord-file-nomatch +# RUN: llvm-objdump -d %t/test-archive-file-no-match | FileCheck %s --check-prefix=FOO-SECOND + +## The following tests check that if an address is matched by multiple order +## file entries, it should always use the lowest-ordered match. + +# RUN: echo "_foo" > %t/ord-multiple-1 +# RUN: echo "_main" >> %t/ord-multiple-1 +# RUN: echo "foo.o:_foo" >> %t/ord-multiple-1 +# RUN: lld -flavor darwinnew -o %t/test-1 %t/test.o %t/foo.o -order_file %t/ord-multiple-1 +# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: lld -flavor darwinnew -o %t/test-1 %t/foo.o %t/test.o -order_file %t/ord-multiple-1 +# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: echo "foo.o:_foo" > %t/ord-multiple-2 +# RUN: echo "_main" >> %t/ord-multiple-2 +# RUN: echo "_foo" >> %t/ord-multiple-2 +# RUN: lld -flavor darwinnew -o %t/test-2 %t/test.o %t/foo.o -order_file %t/ord-multiple-2 +# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: lld -flavor darwinnew -o %t/test-2 %t/foo.o %t/test.o -order_file %t/ord-multiple-2 +# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: echo "_foo" > %t/ord-multiple-3 +# RUN: echo "_main" >> %t/ord-multiple-3 +# RUN: echo "_foo" >> %t/ord-multiple-3 +# RUN: lld -flavor darwinnew -o %t/test-3 %t/test.o %t/foo.o -order_file %t/ord-multiple-3 +# RUN: llvm-objdump -d %t/test-3 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: lld -flavor darwinnew -o %t/test-3 %t/foo.o %t/test.o -order_file %t/ord-multiple-3 +# RUN: llvm-objdump -d %t/test-3 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: echo "foo.o:_foo" > %t/ord-multiple-4 +# RUN: echo "_main" >> %t/ord-multiple-4 +# RUN: echo "foo.o:_foo" >> %t/ord-multiple-4 +# RUN: lld -flavor darwinnew -o %t/test-4 %t/test.o %t/foo.o -order_file %t/ord-multiple-4 +# RUN: llvm-objdump -d %t/test-4 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: lld -flavor darwinnew -o %t/test-4 %t/foo.o %t/test.o -order_file %t/ord-multiple-4 +# RUN: llvm-objdump -d %t/test-4 | FileCheck %s --check-prefix=FOO-FIRST + +## _foo and _bar both point to the same location. When both symbols appear in +## an order file, the location in question should be ordered according to the +## lowest-ordered symbol that references it. +# RUN: echo "_bar" > %t/ord-alias +# RUN: echo "_main" >> %t/ord-alias +# RUN: echo "_foo" >> %t/ord-alias +# RUN: lld -flavor darwinnew -o %t/test-alias %t/test.o %t/foo.o -order_file %t/ord-alias +# RUN: llvm-objdump -d %t/test-alias | FileCheck %s --check-prefix=FOO-FIRST +# RUN: lld -flavor darwinnew -o %t/test-alias %t/foo.o %t/test.o -order_file %t/ord-alias +# RUN: llvm-objdump -d %t/test-alias | FileCheck %s --check-prefix=FOO-FIRST + +.text +.globl _main + +_main: + callq _foo + ret