diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Archive.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/MemoryBuffer.h" @@ -105,6 +106,16 @@ MemoryBufferRef mbref = *buffer; switch (identify_magic(mbref.getBuffer())) { + case file_magic::archive: { + std::unique_ptr file = CHECK( + object::Archive::create(mbref), path + ": failed to parse archive"); + + if (!file->isEmpty() && !file->hasSymbolTable()) + error(path + ": archive has no index; run ranlib to add one"); + + inputFiles.push_back(make(file.release())); + break; + } case file_magic::macho_object: inputFiles.push_back(make(mbref)); break; diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -28,6 +28,7 @@ enum Kind { ObjKind, DylibKind, + ArchiveKind, }; virtual ~InputFile() = default; @@ -81,6 +82,19 @@ std::vector reexported; }; +// .a file +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(llvm::object::Archive *file); + static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } + InputFile *fetch(const llvm::object::Archive::Symbol &sym); + +private: + // Keep track of children fetched from the archive by tracking + // which address offsets have been fetched already. + llvm::DenseSet seen; +}; + extern std::vector inputFiles; llvm::Optional readFile(StringRef path); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -302,6 +302,31 @@ return file; } +ArchiveFile::ArchiveFile(llvm::object::Archive *f) + : InputFile(ArchiveKind, f->getMemoryBufferRef()) { + for (const object::Archive::Symbol &sym : f->symbols()) + symtab->addLazy(sym.getName(), this, sym); +} + +InputFile *ArchiveFile::fetch(const object::Archive::Symbol &sym) { + object::Archive::Child c = + CHECK(sym.getMember(), toString(this) + + ": could not get the member for symbol " + + sym.getName()); + + if (!seen.insert(c.getChildOffset()).second) + return nullptr; + + MemoryBufferRef mb = + CHECK(c.getMemoryBufferRef(), + toString(this) + + ": could not get the buffer for the member defining symbol " + + sym.getName()); + auto file = make(mb); + sections.insert(sections.end(), file->sections.begin(), file->sections.end()); + return file; +} + // Returns "" or "baz.o". std::string lld::toString(const InputFile *file) { return file ? std::string(file->getName()) : ""; diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -30,6 +30,9 @@ Symbol *addDylib(StringRef name, DylibFile *file); + Symbol *addLazy(StringRef name, ArchiveFile *file, + const llvm::object::Archive::Symbol &sym); + ArrayRef getSymbols() const { return symVector; } Symbol *find(StringRef name); diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -56,6 +56,8 @@ if (wasInserted) replaceSymbol(s, name); + else if (LazySymbol *lazy = dyn_cast(s)) + lazy->fetchFile(); return s; } @@ -69,4 +71,17 @@ return s; } +Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file, + const llvm::object::Archive::Symbol &sym) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(name); + + if (wasInserted) + replaceSymbol(s, file, sym); + else if (isa(s)) + file->fetch(sym); + return s; +} + SymbolTable *macho::symtab; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -35,6 +35,7 @@ DefinedKind, UndefinedKind, DylibKind, + LazyKind, }; Kind kind() const { return static_cast(symbolKind); } @@ -81,6 +82,20 @@ uint32_t lazyBindOffset = UINT32_MAX; }; +class LazySymbol : public Symbol { +public: + LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) + : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {} + + static bool classof(const Symbol *s) { return s->kind() == LazyKind; } + + InputFile *fetchFile(); + +private: + ArchiveFile *file; + const llvm::object::Archive::Symbol sym; +}; + inline uint64_t Symbol::getVA() const { if (auto *d = dyn_cast(this)) return d->isec->getVA() + d->value; @@ -91,6 +106,7 @@ alignas(Defined) char a[sizeof(Defined)]; alignas(Undefined) char b[sizeof(Undefined)]; alignas(DylibSymbol) char c[sizeof(DylibSymbol)]; + alignas(LazySymbol) char d[sizeof(LazySymbol)]; }; template diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -15,6 +15,8 @@ using namespace lld; using namespace lld::macho; +InputFile *LazySymbol::fetchFile() { return file->fetch(sym); } + // Returns a symbol for an error message. std::string lld::toString(const Symbol &sym) { if (Optional s = demangleItanium(sym.getName())) diff --git a/lld/test/MachO/Inputs/archive2.s b/lld/test/MachO/Inputs/archive2.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/Inputs/archive2.s @@ -0,0 +1,4 @@ +.global _boo +_boo: + mov $2, %rax + ret diff --git a/lld/test/MachO/Inputs/archive3.s b/lld/test/MachO/Inputs/archive3.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/Inputs/archive3.s @@ -0,0 +1,4 @@ +.global _bar +_bar: + mov $3, %rax + ret diff --git a/lld/test/MachO/Inputs/archive4.s b/lld/test/MachO/Inputs/archive4.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/Inputs/archive4.s @@ -0,0 +1,5 @@ +.global _alone +.global _lonely +_lonely: + mov $5, %rax + ret diff --git a/lld/test/MachO/archive.s b/lld/test/MachO/archive.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/archive.s @@ -0,0 +1,35 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %S/Inputs/archive2.s -o %t/2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %S/Inputs/archive3.s -o %t/3.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %S/Inputs/archive4.s -o %t/4.o + +# RUN: rm -f %t/test.a +# RUN: llvm-ar rcs %t/test.a %t/2.o %t/3.o %t/4.o +# RUN: lld -flavor darwinnew %t/main.o %t/test.a -o %t/test.out + +## TODO: Run llvm-nm -p to validate symbol order +# RUN: llvm-nm %t/test.out | FileCheck %s +# CHECK: T _bar +# CHECK: T _boo +# CHECK: T _main + +## Linking with the archive first in the command line shouldn't change anything +# RUN: lld -flavor darwinnew %t/test.a %t/main.o -o %t/test.out +# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST +# ARCHIVE-FIRST: T _bar +# ARCHIVE-FIRST: T _boo +# ARCHIVE-FIRST: T _main + + +# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix VISIBLE +# VISIBLE-NOT: T _alone +# VISIBLE-NOT: T _lonely + +.global _main +_main: + callq _boo + callq _bar + mov $0, %rax + ret diff --git a/lld/test/MachO/invalid/archive-no-index.s b/lld/test/MachO/invalid/archive-no-index.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/invalid/archive-no-index.s @@ -0,0 +1,15 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %S/../Inputs/archive2.s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %S/../Inputs/archive3.s -o %t3.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %S/../Inputs/archive4.s -o %t4.o +# RUN: rm -f %t.a +# RUN: llvm-ar rcS %t.a %t2.o %t3.o %t4.o + +# RUN: not lld -flavor darwinnew %t.o %t.a -o /dev/null 2>&1 | FileCheck %s +# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one + +.global _main +_main: + mov $0, %rax + ret diff --git a/lld/test/MachO/invalid/bad-archive.s b/lld/test/MachO/invalid/bad-archive.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/invalid/bad-archive.s @@ -0,0 +1,11 @@ +# REQUIRES: x86 +# RUN: echo "!" > %t.a +# RUN: echo "foo" >> %t.a +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o + +# RUN: not lld -flavor darwinnew %t.o %t.a -o /dev/null 2>&1 | FileCheck -DFILE=%t.a %s +# CHECK: error: [[FILE]]: failed to parse archive: truncated or malformed archive (remaining size of archive too small for next archive member header at offset 8) + +.global _main +_main: + ret diff --git a/lld/test/MachO/symbol-order.s b/lld/test/MachO/symbol-order.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/symbol-order.s @@ -0,0 +1,48 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: echo ".global f; .section __TEXT,test_g; .global g" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/g.o +# RUN: echo ".section __TEXT,test_f1; .global f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f1.o +# RUN: echo ".section __TEXT,test_f2; .global f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f2.o +# RUN: echo ".section __TEXT,test_fg; .global f; .global g" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/fg.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o + +## lld -flavor darwinnew +# RUN: lld -flavor darwinnew -dylib -o %t/libf1.dylib %t/f1.o + +# RUN: rm -f %t/libf2_g.a +# RUN: llvm-ar rcs %t/libf2_g.a %t/f2.o %t/g.o + +# RUN: rm -f %t/libfg.a +# RUN: llvm-ar rcs %t/libfg.a %t/fg.o + +# RUN: lld -flavor darwinnew %t/libf1.dylib %t/libf2_g.a %t/test.o -o %t/test.out +# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix DYLIB-FIRST +# DYLIB-FIRST: SYMBOL TABLE: +# DYLIB-FIRST-DAG: {{[0-9a-z]+}} g O __TEXT,test_g g +# DYLIB-FIRST: Lazy bind table: +# DYLIB-FIRST-NEXT: segment section address dylib symbol +# DYLIB-FIRST-NEXT: __DATA __la_symbol_ptr {{[0-9a-z]+}} libf1 f + +# RUN: lld -flavor darwinnew %t/libf2_g.a %t/libf1.dylib %t/test.o -o %t/test.out +# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST +# ARCHIVE-FIRST: SYMBOL TABLE: +# ARCHIVE-FIRST-DAG: {{[0-9a-z]+}} g O __TEXT,test_f2 f +# ARCHIVE-FIRST-DAG: {{[0-9a-z]+}} g O __TEXT,test_g g +# ARCHIVE-FIRST: Lazy bind table: +# ARCHIVE-FIRST-NEXT: segment section address dylib symbol +# ARCHIVE-FIRST-EMPTY: + +# RUN: lld -flavor darwinnew %t/libf1.dylib %t/libfg.a %t/test.o -o %t/test.out +# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix ARCHIVE-PRIORITY +# ARCHIVE-PRIORITY: SYMBOL TABLE: +# ARCHIVE-PRIORITY-DAG: {{[0-9a-z]+}} g O __TEXT,test_fg f +# ARCHIVE-PRIORITY-DAG: {{[0-9a-z]+}} g O __TEXT,test_fg g +# ARCHIVE-PRIORITY: Lazy bind table: +# ARCHIVE-PRIORITY-NEXT: segment section address dylib symbol +# ARCHIVE-PRIORITY-EMPTY: + +.global g +.global _main +_main: + callq g + ret