Index: ELF/Driver.cpp =================================================================== --- ELF/Driver.cpp +++ ELF/Driver.cpp @@ -44,25 +44,6 @@ return MBRef; } -static std::unique_ptr createFile(MemoryBufferRef MB) { - std::pair Type = - object::getElfArchType(MB.getBuffer()); - if (Type.second != ELF::ELFDATA2LSB && Type.second != ELF::ELFDATA2MSB) - error("Invalid data encoding"); - - if (Type.first == ELF::ELFCLASS32) { - if (Type.second == ELF::ELFDATA2LSB) - return make_unique>(MB); - return make_unique>(MB); - } - if (Type.first == ELF::ELFCLASS64) { - if (Type.second == ELF::ELFDATA2LSB) - return make_unique>(MB); - return make_unique>(MB); - } - error("Invalid file class"); -} - void LinkerDriver::link(ArrayRef ArgsArr) { // Parse command line options. opt::InputArgList Args = Parser.parse(ArgsArr); Index: ELF/InputFiles.h =================================================================== --- ELF/InputFiles.h +++ ELF/InputFiles.h @@ -14,16 +14,28 @@ #include "Symbols.h" #include "lld/Core/LLVM.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" namespace lld { namespace elf2 { + +using llvm::object::Archive; + +class Lazy; class SymbolBody; // The root class of input files. class InputFile { public: - enum Kind { Object32LEKind, Object32BEKind, Object64LEKind, Object64BEKind }; + enum Kind { + Object32LEKind, + Object32BEKind, + Object64LEKind, + Object64BEKind, + ArchiveKind + }; Kind kind() const { return FileKind; } virtual ~InputFile() {} @@ -115,6 +127,33 @@ ArrayRef SymtabSHNDX; }; +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } + void parse() override; + + // Returns a memory buffer for a given symbol. An empty memory buffer + // is returned if we have already returned the same memory buffer. + // (So that we don't instantiate same members more than once.) + MemoryBufferRef getMember(const Archive::Symbol *Sym); + + ArrayRef getLazySymbols() { return LazySymbols; } + + // All symbols returned by ArchiveFiles are of Lazy type. + ArrayRef getSymbols() override { + llvm_unreachable("internal error"); + } + +private: + std::unique_ptr File; + std::vector LazySymbols; + llvm::DenseMap Seen; + llvm::MallocAllocator Alloc; +}; + +std::unique_ptr createFile(MemoryBufferRef MB); + } // namespace elf2 } // namespace lld Index: ELF/InputFiles.cpp =================================================================== --- ELF/InputFiles.cpp +++ ELF/InputFiles.cpp @@ -12,8 +12,13 @@ #include "Error.h" #include "Symbols.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/FileSystem.h" +using namespace llvm; using namespace llvm::ELF; +using namespace llvm::object; +using llvm::sys::fs::identify_magic; +using llvm::sys::fs::file_magic; using namespace lld; using namespace lld::elf2; @@ -124,6 +129,65 @@ } } +void ArchiveFile::parse() { + auto ArchiveOrErr = Archive::create(MB); + error(ArchiveOrErr, "Failed to parse archive"); + File = std::move(*ArchiveOrErr); + + // Allocate a buffer for Lazy objects. + size_t NumSyms = File->getNumberOfSymbols(); + size_t BufSize = NumSyms * sizeof(Lazy); + Lazy *Buf = (Lazy *)Alloc.Allocate(BufSize, llvm::alignOf()); + LazySymbols.reserve(NumSyms); + + // Read the symbol table to construct Lazy objects. + for (const Archive::Symbol &Sym : File->symbols()) + LazySymbols.push_back(new (Buf++) Lazy(this, Sym)); +} + +// Returns a buffer pointing to a member file containing a given symbol. +MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { + auto ItOrErr = Sym->getMember(); + error(ItOrErr, + Twine("Could not get the member for symbol ") + Sym->getName()); + Archive::child_iterator It = *ItOrErr; + + // Return an empty buffer if we have already returned the same buffer. + bool &SeenMember = Seen[It->getChildOffset()]; + if (SeenMember) { + return MemoryBufferRef(); + } + SeenMember = true; + ErrorOr Ret = It->getMemoryBufferRef(); + error(Ret, Twine("Could not get the buffer for the member defining symbol ") + + Sym->getName()); + return *Ret; +} + +std::unique_ptr lld::elf2::createFile(MemoryBufferRef MB) { + file_magic Magic = identify_magic(MB.getBuffer()); + + if (Magic == file_magic::archive) + return llvm::make_unique(MB); + + std::pair Type = + object::getElfArchType(MB.getBuffer()); + if (Type.second != ELF::ELFDATA2LSB && Type.second != ELF::ELFDATA2MSB) + error("Invalid data encoding"); + + if (Type.first == ELF::ELFCLASS32) { + if (Type.second == ELF::ELFDATA2LSB) + return make_unique>(MB); + return make_unique>(MB); + } + if (Type.first == ELF::ELFCLASS64) { + if (Type.second == ELF::ELFDATA2LSB) + return make_unique>(MB); + return make_unique>(MB); + } + error("Invalid file class"); +} + namespace lld { namespace elf2 { template class elf2::ObjectFile; Index: ELF/SymbolTable.h =================================================================== --- ELF/SymbolTable.h +++ ELF/SymbolTable.h @@ -38,18 +38,23 @@ void reportRemainingUndefines(); // The writer needs to infer the machine type from the object files. - std::vector> ObjectFiles; + std::vector ObjectFiles; const llvm::DenseMap &getSymbols() const { return Symtab; } private: + Symbol *insert(SymbolBody *New); void addObject(ObjectFileBase *File); + void addLazy(Lazy *New); + void addMemberFile(Lazy *Body); template void init(); template void resolve(SymbolBody *Body); + std::vector> Files; + llvm::DenseMap Symtab; llvm::BumpPtrAllocator Alloc; }; Index: ELF/SymbolTable.cpp =================================================================== --- ELF/SymbolTable.cpp +++ ELF/SymbolTable.cpp @@ -22,9 +22,14 @@ void SymbolTable::addFile(std::unique_ptr File) { File->parse(); - InputFile *FileP = File.release(); - auto *P = cast(FileP); - addObject(P); + InputFile *FileP = File.get(); + Files.push_back(std::move(File)); + if (auto *AF = dyn_cast(FileP)) { + for (Lazy *Sym : AF->getLazySymbols()) + addLazy(Sym); + return; + } + addObject(cast(FileP)); } template void SymbolTable::init() { @@ -84,22 +89,65 @@ // This function resolves conflicts if there's an existing symbol with // the same name. Decisions are made based on symbol type. template void SymbolTable::resolve(SymbolBody *New) { + Symbol *Sym = insert(New); + if (Sym->Body == New) + return; + + SymbolBody *Existing = Sym->Body; + + if (Lazy *L = dyn_cast(Existing)) { + if (New->isUndefined()) { + addMemberFile(L); + return; + } + + // Found a definition for something also in an archive. Ignore the archive + // definition. + Sym->Body = New; + return; + } + + // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, + // equivalent (conflicting), or more preferable, respectively. + int comp = Existing->compare(New); + if (comp < 0) + Sym->Body = New; + if (comp == 0) + error(Twine("duplicate symbol: ") + Sym->Body->getName()); +} + +Symbol *SymbolTable::insert(SymbolBody *New) { // Find an existing Symbol or create and insert a new one. StringRef Name = New->getName(); Symbol *&Sym = Symtab[Name]; if (!Sym) { Sym = new (Alloc) Symbol(New); New->setBackref(Sym); - return; + return Sym; } New->setBackref(Sym); + return Sym; +} - // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, - // equivalent (conflicting), or more preferable, respectively. +void SymbolTable::addLazy(Lazy *New) { + Symbol *Sym = insert(New); + if (Sym->Body == New) + return; SymbolBody *Existing = Sym->Body; - int comp = Existing->compare(New); - if (comp < 0) - Sym->Body = New; - if (comp == 0) - error(Twine("duplicate symbol: ") + Name); + if (Existing->isDefined() || isa(Existing)) + return; + Sym->Body = New; + if (Existing->isUndefined()) + addMemberFile(New); +} + +void SymbolTable::addMemberFile(Lazy *Body) { + std::unique_ptr File = Body->getMember(); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (!File) + return; + + addFile(std::move(File)); } Index: ELF/Symbols.h =================================================================== --- ELF/Symbols.h +++ ELF/Symbols.h @@ -13,13 +13,13 @@ #include "Chunks.h" #include "lld/Core/LLVM.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" namespace lld { namespace elf2 { -using llvm::object::ELFFile; - +class ArchiveFile; class Chunk; class InputFile; class SymbolBody; @@ -42,7 +42,8 @@ DefinedAbsoluteKind = 1, DefinedCommonKind = 2, DefinedLast = 2, - UndefinedKind = 3 + UndefinedKind = 3, + LazyKind = 4, }; Kind kind() const { return static_cast(SymbolKind); } @@ -52,6 +53,7 @@ bool isDefined() const { return !isUndefined(); } bool isStrongUndefined() const { return !IsWeak && isUndefined(); } bool isCommon() const { return SymbolKind == DefinedCommonKind; } + bool isLazy() const { return SymbolKind == LazyKind; } // Returns the symbol name. StringRef getName() const { return Name; } @@ -200,6 +202,28 @@ template typename Undefined::Elf_Sym Undefined::Synthetic; +// This class represents a symbol defined in an archive file. It is +// created from an archive file header, and it knows how to load an +// object file from an archive to replace itself with a defined +// symbol. If the resolver finds both Undefined and Lazy for +// the same name, it will ask the Lazy to load a file. +class Lazy : public SymbolBody { +public: + Lazy(ArchiveFile *F, const llvm::object::Archive::Symbol S) + : SymbolBody(LazyKind, S.getName(), false, llvm::ELF::STV_DEFAULT), + File(F), Sym(S) {} + + static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; } + + // Returns an object file for this symbol, or a nullptr if the file + // was already returned. + std::unique_ptr getMember(); + +private: + ArchiveFile *File; + const llvm::object::Archive::Symbol Sym; +}; + } // namespace elf2 } // namespace lld Index: ELF/Symbols.cpp =================================================================== --- ELF/Symbols.cpp +++ ELF/Symbols.cpp @@ -29,6 +29,7 @@ // Returns 1, 0 or -1 if this symbol should take precedence // over the Other, tie or lose, respectively. template int SymbolBody::compare(SymbolBody *Other) { + assert(!isLazy() && !Other->isLazy()); std::pair L(isDefined(), !isWeak()); std::pair R(Other->isDefined(), !Other->isWeak()); @@ -67,6 +68,17 @@ return 1; } +std::unique_ptr Lazy::getMember() { + MemoryBufferRef MBRef = File->getMember(&Sym); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (MBRef.getBuffer().empty()) + return std::unique_ptr(nullptr); + + return createFile(MBRef); +} + template int SymbolBody::compare(SymbolBody *Other); template int SymbolBody::compare(SymbolBody *Other); template int SymbolBody::compare(SymbolBody *Other); Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -432,9 +432,9 @@ }; const SymbolTable &Symtab = SymTable.getSymTable(); - for (const std::unique_ptr &FileB : Symtab.ObjectFiles) { - auto &File = cast>(*FileB); - for (SectionChunk *C : File.getChunks()) { + for (ObjectFileBase *FileB : Symtab.ObjectFiles) { + auto File = cast>(FileB); + for (SectionChunk *C : File->getChunks()) { if (!C) continue; const Elf_Shdr *H = C->getSectionHdr(); Index: test/elf2/archive.s =================================================================== --- /dev/null +++ test/elf2/archive.s @@ -0,0 +1,21 @@ +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %S/basic.s -o %t2 +// RUN: llvm-ar rcs %tar %t2 +// RUN: lld -flavor gnu2 %t %tar -o %tout +// RUN: llvm-objdump -t -d %tout | FileCheck %s +// REQUIRES: x86 + + +.section .text,"ax" +call _start + +// CHECK: Disassembly of section .text: +// CHECK: .text: +// CHECK: e8 03 00 00 00 callq 3 +// CHECK: _start: +// CHECK: 48 c7 c0 3c 00 00 00 movq $60, %rax +// CHECK: 48 c7 c7 2a 00 00 00 movq $42, %rdi +// CHECK: 0f 05 syscall +// CHECK: SYMBOL TABLE: +// CHECK: 0000000000000000 *UND* 00000000 +// CHECK: 0000000000001008 .text 00000000 _start