diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Archive.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/MemoryBuffer.h" @@ -105,6 +106,16 @@ MemoryBufferRef mbref = *buffer; switch (identify_magic(mbref.getBuffer())) { + case file_magic::archive: { + std::unique_ptr file = CHECK( + object::Archive::create(mbref), path + ": failed to parse archive"); + + if (!file->isEmpty() && !file->hasSymbolTable()) + error(path + ": archive has no index; run ranlib to add one"); + + inputFiles.push_back(make(file.release())); + break; + } case file_magic::macho_object: inputFiles.push_back(make(mbref)); break; diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -28,6 +28,7 @@ enum Kind { ObjKind, DylibKind, + ArchiveKind, }; virtual ~InputFile() = default; @@ -81,6 +82,19 @@ std::vector reexported; }; +// .a file +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(llvm::object::Archive *file); + static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } + void fetch(const llvm::object::Archive::Symbol &sym); + +private: + // Keep track of children fetched from the archive by tracking + // which address offsets have been fetched already. + llvm::DenseSet seen; +}; + extern std::vector inputFiles; llvm::Optional readFile(StringRef path); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -302,6 +302,30 @@ return file; } +ArchiveFile::ArchiveFile(llvm::object::Archive *f) + : InputFile(ArchiveKind, f->getMemoryBufferRef()) { + for (const object::Archive::Symbol &sym : f->symbols()) + symtab->addLazy(sym.getName(), this, sym); +} + +void ArchiveFile::fetch(const object::Archive::Symbol &sym) { + object::Archive::Child c = + CHECK(sym.getMember(), toString(this) + + ": could not get the member for symbol " + + sym.getName()); + + if (!seen.insert(c.getChildOffset()).second) + return; + + MemoryBufferRef mb = + CHECK(c.getMemoryBufferRef(), + toString(this) + + ": could not get the buffer for the member defining symbol " + + sym.getName()); + auto file = make(mb); + sections.insert(sections.end(), file->sections.begin(), file->sections.end()); +} + // Returns "" or "baz.o". std::string lld::toString(const InputFile *file) { return file ? std::string(file->getName()) : ""; diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -30,6 +30,9 @@ Symbol *addDylib(StringRef name, DylibFile *file); + Symbol *addLazy(StringRef name, ArchiveFile *file, + const llvm::object::Archive::Symbol &sym); + ArrayRef getSymbols() const { return symVector; } Symbol *find(StringRef name); diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -56,6 +56,8 @@ if (wasInserted) replaceSymbol(s, name); + else if (LazySymbol *lazy = dyn_cast(s)) + lazy->fetchArchiveMember(); return s; } @@ -69,4 +71,17 @@ return s; } +Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file, + const llvm::object::Archive::Symbol &sym) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(name); + + if (wasInserted) + replaceSymbol(s, file, sym); + else if (isa(s)) + file->fetch(sym); + return s; +} + SymbolTable *macho::symtab; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -35,6 +35,7 @@ DefinedKind, UndefinedKind, DylibKind, + LazyKind, }; Kind kind() const { return static_cast(symbolKind); } @@ -81,6 +82,20 @@ uint32_t lazyBindOffset = UINT32_MAX; }; +class LazySymbol : public Symbol { +public: + LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) + : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {} + + static bool classof(const Symbol *s) { return s->kind() == LazyKind; } + + void fetchArchiveMember(); + +private: + ArchiveFile *file; + const llvm::object::Archive::Symbol sym; +}; + inline uint64_t Symbol::getVA() const { if (auto *d = dyn_cast(this)) return d->isec->getVA() + d->value; @@ -91,6 +106,7 @@ alignas(Defined) char a[sizeof(Defined)]; alignas(Undefined) char b[sizeof(Undefined)]; alignas(DylibSymbol) char c[sizeof(DylibSymbol)]; + alignas(LazySymbol) char d[sizeof(LazySymbol)]; }; template diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -15,6 +15,8 @@ using namespace lld; using namespace lld::macho; +void LazySymbol::fetchArchiveMember() { file->fetch(sym); } + // Returns a symbol for an error message. std::string lld::toString(const Symbol &sym) { if (Optional s = demangleItanium(sym.getName())) diff --git a/lld/test/MachO/archive.s b/lld/test/MachO/archive.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/archive.s @@ -0,0 +1,35 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: echo ".global _boo; _boo: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/2.o +# RUN: echo ".global _bar; _bar: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/3.o +# RUN: echo ".global _undefined; .global _unused; _unused: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/4.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o + +# RUN: rm -f %t/test.a +# RUN: llvm-ar rcs %t/test.a %t/2.o %t/3.o %t/4.o +# RUN: lld -flavor darwinnew %t/main.o %t/test.a -o %t/test.out + +## TODO: Run llvm-nm -p to validate symbol order +# RUN: llvm-nm %t/test.out | FileCheck %s +# CHECK: T _bar +# CHECK: T _boo +# CHECK: T _main + +## Linking with the archive first in the command line shouldn't change anything +# RUN: lld -flavor darwinnew %t/test.a %t/main.o -o %t/test.out +# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST +# ARCHIVE-FIRST: T _bar +# ARCHIVE-FIRST: T _boo +# ARCHIVE-FIRST: T _main + + +# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix VISIBLE +# VISIBLE-NOT: T _undefined +# VISIBLE-NOT: T _unused + +.global _main +_main: + callq _boo + callq _bar + mov $0, %rax + ret diff --git a/lld/test/MachO/invalid/archive-no-index.s b/lld/test/MachO/invalid/archive-no-index.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/invalid/archive-no-index.s @@ -0,0 +1,17 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: echo ".global _boo; _boo: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/2.o +# RUN: echo ".global _bar; _bar: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/3.o +# RUN: echo ".global _undefined; .global _unused; _unused: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/4.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o + +# RUN: rm -f %t/test.a +# RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o + +# RUN: not lld -flavor darwinnew %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s +# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one + +.global _main +_main: + mov $0, %rax + ret diff --git a/lld/test/MachO/invalid/bad-archive.s b/lld/test/MachO/invalid/bad-archive.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/invalid/bad-archive.s @@ -0,0 +1,11 @@ +# REQUIRES: x86 +# RUN: echo "!" > %t.a +# RUN: echo "foo" >> %t.a +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o + +# RUN: not lld -flavor darwinnew %t.o %t.a -o /dev/null 2>&1 | FileCheck -DFILE=%t.a %s +# CHECK: error: [[FILE]]: failed to parse archive: truncated or malformed archive (remaining size of archive too small for next archive member header at offset 8) + +.global _main +_main: + ret diff --git a/lld/test/MachO/symbol-order.s b/lld/test/MachO/symbol-order.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/symbol-order.s @@ -0,0 +1,46 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: echo ".global f, g; .section __TEXT,test_g; g: callq f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/g.o +# RUN: echo ".global f; .section __TEXT,test_f1; f: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f1.o +# RUN: echo ".global f; .section __TEXT,test_f2; f: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f2.o +# RUN: echo ".global f, g; .section __TEXT,test_fg; f: ret; g: callq f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/fg.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: lld -flavor darwinnew -dylib -o %t/libf1.dylib %t/f1.o + +# RUN: rm -f %t/libf2_g.a +# RUN: llvm-ar rcs %t/libf2_g.a %t/f2.o %t/g.o + +# RUN: rm -f %t/libfg.a +# RUN: llvm-ar rcs %t/libfg.a %t/fg.o + +# RUN: lld -flavor darwinnew %t/libf1.dylib %t/libf2_g.a %t/test.o -o %t/test.out +# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix DYLIB-FIRST +# DYLIB-FIRST: SYMBOL TABLE: +# DYLIB-FIRST-DAG: __TEXT,test_g g +# DYLIB-FIRST: Lazy bind table: +# DYLIB-FIRST-NEXT: segment section address dylib symbol +# DYLIB-FIRST-NEXT: __DATA __la_symbol_ptr {{[0-9a-z]+}} libf1 f + +# RUN: lld -flavor darwinnew %t/libf2_g.a %t/libf1.dylib %t/test.o -o %t/test.out +# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST +# ARCHIVE-FIRST: SYMBOL TABLE: +# ARCHIVE-FIRST-DAG: __TEXT,test_f2 f +# ARCHIVE-FIRST-DAG: __TEXT,test_g g +# ARCHIVE-FIRST: Lazy bind table: +# ARCHIVE-FIRST-NEXT: segment section address dylib symbol +# ARCHIVE-FIRST-EMPTY: + +# RUN: lld -flavor darwinnew %t/libf1.dylib %t/libfg.a %t/test.o -o %t/test.out +# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix ARCHIVE-PRIORITY +# ARCHIVE-PRIORITY: SYMBOL TABLE: +# ARCHIVE-PRIORITY-DAG: __TEXT,test_fg f +# ARCHIVE-PRIORITY-DAG: __TEXT,test_fg g +# ARCHIVE-PRIORITY: Lazy bind table: +# ARCHIVE-PRIORITY-NEXT: segment section address dylib symbol +# ARCHIVE-PRIORITY-EMPTY: + +.global g +.global _main +_main: + callq g + ret