Index: COFF/Chunks.h =================================================================== --- COFF/Chunks.h +++ COFF/Chunks.h @@ -29,7 +29,6 @@ using llvm::object::SectionRef; using llvm::object::coff_relocation; using llvm::object::coff_section; -using llvm::sys::fs::file_magic; class Baserel; class Defined; Index: COFF/Chunks.cpp =================================================================== --- COFF/Chunks.cpp +++ COFF/Chunks.cpp @@ -134,7 +134,7 @@ // Apply relocations. for (const coff_relocation &Rel : Relocs) { uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress; - SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl(); + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); Defined *Sym = cast(Body); uint64_t P = RVA + Rel.VirtualAddress; switch (Config->Machine) { @@ -187,7 +187,7 @@ uint8_t Ty = getBaserelType(Rel); if (Ty == IMAGE_REL_BASED_ABSOLUTE) continue; - SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl(); + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); if (isa(Body)) continue; Res->emplace_back(RVA + Rel.VirtualAddress, Ty); Index: COFF/Config.h =================================================================== --- COFF/Config.h +++ COFF/Config.h @@ -26,7 +26,8 @@ class DefinedAbsolute; class DefinedRelative; class StringChunk; -class Undefined; +struct Symbol; +class SymbolBody; // Short aliases. static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; @@ -37,7 +38,7 @@ struct Export { StringRef Name; // N in /export:N or /export:E=N StringRef ExtName; // E in /export:E=N - Undefined *Sym = nullptr; + SymbolBody *Sym = nullptr; uint16_t Ordinal = 0; bool Noname = false; bool Data = false; @@ -69,7 +70,7 @@ llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN; bool Verbose = false; WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; - Undefined *Entry = nullptr; + SymbolBody *Entry = nullptr; bool NoEntry = false; std::string OutputFile; bool DoGC = true; @@ -80,7 +81,7 @@ bool WriteSymtab = true; // Symbols in this set are considered as live by the garbage collector. - std::set GCRoot; + std::set GCRoot; std::set NoDefaultLibs; bool NoDefaultLibAll = false; @@ -91,11 +92,11 @@ std::vector Exports; std::set DelayLoads; std::map DLLOrder; - Undefined *DelayLoadHelper = nullptr; + SymbolBody *DelayLoadHelper = nullptr; // Used for SafeSEH. - DefinedRelative *SEHTable = nullptr; - DefinedAbsolute *SEHCount = nullptr; + Symbol *SEHTable = nullptr; + Symbol *SEHCount = nullptr; // Used for /opt:lldlto=N unsigned LTOOptLevel = 2; Index: COFF/DLL.cpp =================================================================== --- COFF/DLL.cpp +++ COFF/DLL.cpp @@ -324,7 +324,7 @@ if (E.ForwardChunk) { write32le(P, E.ForwardChunk->getRVA()); } else { - write32le(P, cast(E.Sym->repl())->getRVA()); + write32le(P, cast(E.Sym)->getRVA()); } } } Index: COFF/Driver.h =================================================================== --- COFF/Driver.h +++ COFF/Driver.h @@ -63,7 +63,9 @@ class LinkerDriver { public: - LinkerDriver() : Alloc(AllocAux) {} + LinkerDriver() : Alloc(AllocAux) { + coff::Symtab = &Symtab; + } void link(llvm::ArrayRef Args); // Used by the resolver to parse .drectve section contents. @@ -91,7 +93,7 @@ std::vector SearchPaths; std::set VisitedFiles; - Undefined *addUndefined(StringRef Sym); + SymbolBody *addUndefined(StringRef Sym); StringRef mangle(StringRef Sym); // Windows specific -- "main" is not the only main function in Windows. Index: COFF/Driver.cpp =================================================================== --- COFF/Driver.cpp +++ COFF/Driver.cpp @@ -198,10 +198,10 @@ } } -Undefined *LinkerDriver::addUndefined(StringRef Name) { - Undefined *U = Symtab.addUndefined(Name); - Config->GCRoot.insert(U); - return U; +SymbolBody *LinkerDriver::addUndefined(StringRef Name) { + SymbolBody *B = Symtab.addUndefined(Name); + Config->GCRoot.insert(B); + return B; } // Symbol names are mangled by appending "_" prefix on x86. @@ -223,7 +223,7 @@ }; for (auto E : Entries) { StringRef Entry = Symtab.findMangle(mangle(E[0])); - if (!Entry.empty() && !isa(Symtab.find(Entry)->Body)) + if (!Entry.empty() && !isa(Symtab.find(Entry)->body())) return mangle(E[1]); } return ""; @@ -609,7 +609,7 @@ Symbol *Sym = Symtab.find(From); if (!Sym) continue; - if (auto *U = dyn_cast(Sym->Body)) + if (auto *U = dyn_cast(Sym->body())) if (!U->WeakAlias) U->WeakAlias = Symtab.addUndefined(To); } @@ -628,7 +628,7 @@ Symtab.addCombinedLTOObjects(); // Make sure we have resolved all symbols. - Symtab.reportRemainingUndefines(/*Resolve=*/true); + Symtab.reportRemainingUndefines(); // Windows specific -- if no /subsystem is given, we need to infer // that from entry point name. Index: COFF/DriverUtils.cpp =================================================================== --- COFF/DriverUtils.cpp +++ COFF/DriverUtils.cpp @@ -443,13 +443,13 @@ } for (Export &E : Config->Exports) { + SymbolBody *Sym = E.Sym; if (!E.ForwardTo.empty()) { E.SymbolName = E.Name; - } else if (Undefined *U = cast_or_null(E.Sym->WeakAlias)) { - E.SymbolName = U->getName(); - } else { - E.SymbolName = E.Sym->getName(); - } + } else if (auto *U = dyn_cast(Sym)) + if (U->WeakAlias) + Sym = U->WeakAlias; + E.SymbolName = Sym->getName(); } for (Export &E : Config->Exports) { Index: COFF/ICF.cpp =================================================================== --- COFF/ICF.cpp +++ COFF/ICF.cpp @@ -106,8 +106,8 @@ R1.VirtualAddress != R2.VirtualAddress) { return false; } - SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); - SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex); if (B1 == B2) return true; if (auto *D1 = dyn_cast(B1)) @@ -136,8 +136,8 @@ // Compare relocations. auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { - SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); - SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex); if (B1 == B2) return true; if (auto *D1 = dyn_cast(B1)) Index: COFF/InputFiles.h =================================================================== --- COFF/InputFiles.h +++ COFF/InputFiles.h @@ -31,6 +31,7 @@ using llvm::object::Archive; using llvm::object::COFFObjectFile; using llvm::object::COFFSymbolRef; +using llvm::object::coff_import_header; using llvm::object::coff_section; class Chunk; @@ -38,6 +39,7 @@ class DefinedImportData; class DefinedImportThunk; class Lazy; +struct Symbol; class SymbolBody; class Undefined; @@ -51,9 +53,6 @@ // Returns the filename. StringRef getName() { return MB.getBufferIdentifier(); } - // Returns symbols defined by this file. - virtual std::vector &getSymbols() = 0; - // Reads a file (the constructor doesn't do that). virtual void parse() = 0; @@ -71,14 +70,8 @@ // Returns .drectve section contents if exist. StringRef getDirectives() { return StringRef(Directives).trim(); } - // Each file has a unique index. The index number is used to - // resolve ties in symbol resolution. - int Index; - static int NextIndex; - protected: - InputFile(Kind K, MemoryBufferRef M) - : Index(NextIndex++), MB(M), FileKind(K) {} + InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} MemoryBufferRef MB; std::string Directives; @@ -95,22 +88,14 @@ static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } void parse() override; - // Returns a memory buffer for a given symbol. An empty memory buffer - // is returned if we have already returned the same memory buffer. - // (So that we don't instantiate same members more than once.) - MemoryBufferRef getMember(const Archive::Symbol *Sym); - - llvm::MutableArrayRef getLazySymbols() { return LazySymbols; } - - // All symbols returned by ArchiveFiles are of Lazy type. - std::vector &getSymbols() override { - llvm_unreachable("internal error"); - } + // Returns an input file for a given symbol. A null pointer is returned if we + // have already returned the same input file. (So that we don't instantiate + // the same member more than once.) + std::unique_ptr getMember(const Archive::Symbol *Sym); private: std::unique_ptr File; std::string Filename; - std::vector LazySymbols; std::map Seen; }; @@ -122,7 +107,7 @@ void parse() override; MachineTypes getMachineType() override; std::vector &getChunks() { return Chunks; } - std::vector &getSymbols() override { return SymbolBodies; } + std::vector &getSymbols() { return SymbolBodies; } // Returns a SymbolBody object for the SymbolIndex'th symbol in the // underlying object file. @@ -146,8 +131,8 @@ void initializeSymbols(); void initializeSEH(); - Defined *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); - Undefined *createUndefined(COFFSymbolRef Sym); + SymbolBody *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); + SymbolBody *createUndefined(COFFSymbolRef Sym); std::unique_ptr COFFObj; llvm::BumpPtrAllocator Alloc; @@ -182,7 +167,6 @@ explicit ImportFile(MemoryBufferRef M) : InputFile(ImportKind, M), StringAlloc(StringAllocAux) {} static bool classof(const InputFile *F) { return F->kind() == ImportKind; } - std::vector &getSymbols() override { return SymbolBodies; } DefinedImportData *ImpSym = nullptr; DefinedImportThunk *ThunkSym = nullptr; @@ -191,10 +175,14 @@ private: void parse() override; - std::vector SymbolBodies; llvm::BumpPtrAllocator Alloc; llvm::BumpPtrAllocator StringAllocAux; llvm::StringSaver StringAlloc; + +public: + StringRef ExternalName; + const coff_import_header *Hdr; + Chunk *Location = nullptr; }; // Used for LTO. @@ -202,7 +190,7 @@ public: explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } - std::vector &getSymbols() override { return SymbolBodies; } + std::vector &getSymbols() { return SymbolBodies; } MachineTypes getMachineType() override; std::unique_ptr takeModule() { return std::move(M); } Index: COFF/InputFiles.cpp =================================================================== --- COFF/InputFiles.cpp +++ COFF/InputFiles.cpp @@ -11,6 +11,7 @@ #include "Config.h" #include "Error.h" #include "InputFiles.h" +#include "SymbolTable.h" #include "Symbols.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" @@ -37,11 +38,12 @@ using llvm::Triple; using llvm::support::ulittle32_t; +using llvm::sys::fs::file_magic; +using llvm::sys::fs::identify_magic; namespace lld { namespace coff { -int InputFile::NextIndex = 0; llvm::LLVMContext BitcodeFile::Context; // Returns the last element of a path, which is supposed to be a filename. @@ -67,14 +69,6 @@ error(ArchiveOrErr, "Failed to parse static library"); File = std::move(*ArchiveOrErr); - // Allocate a buffer for Lazy objects. - size_t NumSyms = File->getNumberOfSymbols(); - LazySymbols.reserve(NumSyms); - - // Read the symbol table to construct Lazy objects. - for (const Archive::Symbol &Sym : File->symbols()) - LazySymbols.emplace_back(this, Sym); - // Seen is a map from member files to boolean values. Initially // all members are mapped to false, which indicates all these files // are not read yet. @@ -83,22 +77,41 @@ const Archive::Child &Child = *ChildOrErr; Seen[Child.getChildOffset()].clear(); } + + // Read the symbol table to construct Lazy objects. + for (const Archive::Symbol &Sym : File->symbols()) + Symtab->addLazy(this, Sym); } // Returns a buffer pointing to a member file containing a given symbol. // This function is thread-safe. -MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { +std::unique_ptr ArchiveFile::getMember(const Archive::Symbol *Sym) { auto COrErr = Sym->getMember(); error(COrErr, Twine("Could not get the member for symbol ") + Sym->getName()); const Archive::Child &C = *COrErr; // Return an empty buffer if we have already returned the same buffer. if (Seen[C.getChildOffset()].test_and_set()) - return MemoryBufferRef(); - ErrorOr Ret = C.getMemoryBufferRef(); - error(Ret, Twine("Could not get the buffer for the member defining symbol ") + - Sym->getName()); - return *Ret; + return std::unique_ptr(nullptr); + ErrorOr MBRef = C.getMemoryBufferRef(); + error(MBRef, + Twine("Could not get the buffer for the member defining symbol ") + + Sym->getName()); + + file_magic Magic = identify_magic(MBRef->getBuffer()); + if (Magic == file_magic::coff_import_library) + return std::unique_ptr(new ImportFile(*MBRef)); + + std::unique_ptr Obj; + if (Magic == file_magic::coff_object) + Obj.reset(new ObjectFile(*MBRef)); + else if (Magic == file_magic::bitcode) + Obj.reset(new BitcodeFile(*MBRef)); + else + error(Twine(MBRef->getBufferIdentifier()) + ": unknown file type"); + + Obj->setParentName(getName()); + return Obj; } void ObjectFile::parse() { @@ -162,7 +175,7 @@ uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); SymbolBodies.reserve(NumSymbols); SparseSymbolBodies.resize(NumSymbols); - llvm::SmallVector, 8> WeakAliases; + llvm::SmallVector, 8> WeakAliases; int32_t LastSectionNumber = 0; for (uint32_t I = 0; I < NumSymbols; ++I) { // Get a COFFSymbolRef object. @@ -183,7 +196,7 @@ Body = createUndefined(Sym); uint32_t TagIndex = static_cast(AuxP)->TagIndex; - WeakAliases.emplace_back((Undefined *)Body, TagIndex); + WeakAliases.emplace_back(Body, TagIndex); } else { Body = createDefined(Sym, AuxP, IsFirst); } @@ -194,23 +207,30 @@ I += Sym.getNumberOfAuxSymbols(); LastSectionNumber = Sym.getSectionNumber(); } - for (auto WeakAlias : WeakAliases) - WeakAlias.first->WeakAlias = SparseSymbolBodies[WeakAlias.second]; + for (auto WeakAlias : WeakAliases) { + auto *U = dyn_cast(WeakAlias.first); + if (!U) + continue; + // Report an error if two undefined symbols have different weak aliases. + if (U->WeakAlias && U->WeakAlias != SparseSymbolBodies[WeakAlias.second]) + Symtab->reportDuplicate(U->symbol(), this); + U->WeakAlias = SparseSymbolBodies[WeakAlias.second]; + } } -Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) { +SymbolBody *ObjectFile::createUndefined(COFFSymbolRef Sym) { StringRef Name; COFFObj->getSymbolName(Sym, Name); - return new (Alloc) Undefined(Name); + return Symtab->addUndefined(Name, this, Sym.isWeakExternal())->body(); } -Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, - bool IsFirst) { +SymbolBody *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, + bool IsFirst) { StringRef Name; if (Sym.isCommon()) { auto *C = new (Alloc) CommonChunk(Sym); Chunks.push_back(C); - return new (Alloc) DefinedCommon(this, Sym, C); + return Symtab->addCommon(this, Sym, C)->body(); } if (Sym.isAbsolute()) { COFFObj->getSymbolName(Sym, Name); @@ -223,7 +243,10 @@ SEHCompat = true; return nullptr; } - return new (Alloc) DefinedAbsolute(Name, Sym); + if (Sym.isExternal()) + return Symtab->addAbsolute(Name, Sym)->body(); + else + return new (Alloc) DefinedAbsolute(Name, Sym); } int32_t SectionNumber = Sym.getSectionNumber(); if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) @@ -253,7 +276,11 @@ SC->Checksum = Aux->CheckSum; } - auto *B = new (Alloc) DefinedRegular(this, Sym, SC); + DefinedRegular *B; + if (Sym.isExternal()) + B = cast(Symtab->addRegular(this, Sym, SC)->body()); + else + B = new (Alloc) DefinedRegular(this, Sym, SC); if (SC->isCOMDAT() && Sym.getValue() == 0 && !AuxP) SC->setSymbol(B); @@ -315,22 +342,23 @@ ExtName = ExtName.substr(0, ExtName.find('@')); break; } - ImpSym = new (Alloc) DefinedImportData(DLLName, ImpName, ExtName, Hdr); - SymbolBodies.push_back(ImpSym); + + this->Hdr = Hdr; + ExternalName = ExtName; + + ImpSym = cast( + Symtab->addImportData(ImpName, this)->body()); // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) if (Hdr->getType() != llvm::COFF::IMPORT_CODE) return; - ThunkSym = new (Alloc) DefinedImportThunk(Name, ImpSym, Hdr->Machine); - SymbolBodies.push_back(ThunkSym); + ThunkSym = cast( + Symtab->addImportThunk(Name, ImpSym, Hdr->Machine)->body()); } void BitcodeFile::parse() { - // Usually parse() is thread-safe, but bitcode file is an exception. - std::lock_guard Lock(Mu); - Context.enableDebugTypeODRUniquing(); ErrorOr> ModOrErr = LTOModule::createFromBuffer( Context, MB.getBufferStart(), MB.getBufferSize(), llvm::TargetOptions()); @@ -346,15 +374,15 @@ StringRef SymName = Saver.save(M->getSymbolName(I)); int SymbolDef = Attrs & LTO_SYMBOL_DEFINITION_MASK; if (SymbolDef == LTO_SYMBOL_DEFINITION_UNDEFINED) { - SymbolBodies.push_back(new (Alloc) Undefined(SymName)); + SymbolBodies.push_back(Symtab->addUndefined(SymName, this, false)->body()); } else { bool Replaceable = (SymbolDef == LTO_SYMBOL_DEFINITION_TENTATIVE || // common (Attrs & LTO_SYMBOL_COMDAT) || // comdat (SymbolDef == LTO_SYMBOL_DEFINITION_WEAK && // weak external (Attrs & LTO_SYMBOL_ALIAS))); - SymbolBodies.push_back(new (Alloc) DefinedBitcode(this, SymName, - Replaceable)); + SymbolBodies.push_back( + Symtab->addBitcode(this, SymName, Replaceable)->body()); } } Index: COFF/MarkLive.cpp =================================================================== --- COFF/MarkLive.cpp +++ COFF/MarkLive.cpp @@ -38,8 +38,8 @@ }; // Add GC root chunks. - for (Undefined *U : Config->GCRoot) - if (auto *D = dyn_cast(U->repl())) + for (SymbolBody *B : Config->GCRoot) + if (auto *D = dyn_cast(B)) Enqueue(D->getChunk()); while (!Worklist.empty()) { @@ -48,7 +48,7 @@ // Mark all symbols listed in the relocation table for this section. for (SymbolBody *S : SC->symbols()) - if (auto *D = dyn_cast(S->repl())) + if (auto *D = dyn_cast(S)) Enqueue(D->getChunk()); // Mark associative sections if any. Index: COFF/SymbolTable.h =================================================================== --- COFF/SymbolTable.h +++ COFF/SymbolTable.h @@ -22,6 +22,7 @@ #endif #include +#include namespace llvm { struct LTOCodeGenerator; @@ -31,8 +32,10 @@ namespace coff { class Chunk; +class CommonChunk; class Defined; class Lazy; +class SectionChunk; class SymbolBody; struct Symbol; @@ -45,7 +48,9 @@ // conflicts. For example, obviously, a defined symbol is better than // an undefined symbol. Or, if there's a conflict between a lazy and a // undefined, it'll read an archive member to read a real definition -// to replace the lazy symbol. The logic is implemented in resolve(). +// to replace the lazy symbol. The logic is implemented in the +// add*() functions, which are called by input files as they are parsed. +// There is one add* function per symbol type. class SymbolTable { public: void addFile(std::unique_ptr File); @@ -54,9 +59,10 @@ void run(); bool queueEmpty(); - // Print an error message on undefined symbols. If Resolve is true, try to - // resolve any undefined symbols and update the symbol table accordingly. - void reportRemainingUndefines(bool Resolve); + // Try to resolve any undefined symbols and update the symbol table + // accordingly, then print an error message for any remaining undefined + // symbols. + void reportRemainingUndefines(); // Returns a list of chunks of selected symbols. std::vector getChunks(); @@ -69,7 +75,7 @@ // mangled symbol. This function tries to find a mangled name // for U from the symbol table, and if found, set the symbol as // a weak alias for U. - void mangleMaybe(Undefined *U); + void mangleMaybe(SymbolBody *B); StringRef findMangle(StringRef Name); // Print a layout map to OS. @@ -88,37 +94,50 @@ std::vector ObjectFiles; // Creates an Undefined symbol for a given name. - Undefined *addUndefined(StringRef Name); - DefinedRelative *addRelative(StringRef Name, uint64_t VA); - DefinedAbsolute *addAbsolute(StringRef Name, uint64_t VA); + SymbolBody *addUndefined(StringRef Name); + + Symbol *addRelative(StringRef N, uint64_t VA); + Symbol *addAbsolute(StringRef N, uint64_t VA); + + Symbol *addUndefined(StringRef Name, InputFile *F, bool IsWeakAlias); + void addLazy(ArchiveFile *F, const Archive::Symbol Sym); + Symbol *addAbsolute(StringRef N, COFFSymbolRef S); + Symbol *addRegular(ObjectFile *F, COFFSymbolRef S, SectionChunk *C); + Symbol *addBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable); + Symbol *addCommon(ObjectFile *F, COFFSymbolRef S, CommonChunk *C); + Symbol *addImportData(StringRef N, ImportFile *F); + Symbol *addImportThunk(StringRef Name, DefinedImportData *S, + uint16_t Machine); + + void reportDuplicate(Symbol *Existing, InputFile *NewFile); // A list of chunks which to be added to .rdata. std::vector LocalImportChunks; private: - void readArchives(); + void readArchive(); void readObjects(); - void addSymbol(SymbolBody *New); - void addLazy(Lazy *New, std::vector *Accum); - Symbol *insert(SymbolBody *New); + std::pair insert(StringRef Name); StringRef findByPrefix(StringRef Prefix); - void addMemberFile(Lazy *Body); + void addMemberFile(ArchiveFile *F, const Archive::Symbol Sym); void addCombinedLTOObject(ObjectFile *Obj); std::vector createLTOObjects(llvm::LTOCodeGenerator *CG); llvm::DenseMap Symtab; std::vector> Files; - std::vector> ArchiveQueue; - std::vector> ObjectQueue; + std::list ArchiveQueue; + std::vector ObjectQueue; std::vector BitcodeFiles; std::vector> Objs; llvm::BumpPtrAllocator Alloc; }; +extern SymbolTable *Symtab; + } // namespace coff } // namespace lld Index: COFF/SymbolTable.cpp =================================================================== --- COFF/SymbolTable.cpp +++ COFF/SymbolTable.cpp @@ -12,7 +12,6 @@ #include "Error.h" #include "SymbolTable.h" #include "Symbols.h" -#include "lld/Core/Parallel.h" #include "llvm/IR/LLVMContext.h" #include "llvm/LTO/LTOCodeGenerator.h" #include "llvm/Support/Debug.h" @@ -24,22 +23,16 @@ namespace lld { namespace coff { -void SymbolTable::addFile(std::unique_ptr FileP) { -#if LLVM_ENABLE_THREADS - std::launch Policy = std::launch::async; -#else - std::launch Policy = std::launch::deferred; -#endif +SymbolTable *Symtab; +void SymbolTable::addFile(std::unique_ptr FileP) { InputFile *File = FileP.get(); Files.push_back(std::move(FileP)); if (auto *F = dyn_cast(File)) { - ArchiveQueue.push_back( - std::async(Policy, [=]() { F->parse(); return F; })); + ArchiveQueue.push_back(F); return; } - ObjectQueue.push_back( - std::async(Policy, [=]() { File->parse(); return File; })); + ObjectQueue.push_back(File); if (auto *F = dyn_cast(File)) { ObjectFiles.push_back(F); } else if (auto *F = dyn_cast(File)) { @@ -53,7 +46,7 @@ if (queueEmpty()) return; readObjects(); - readArchives(); + readArchive(); } void SymbolTable::run() { @@ -61,26 +54,17 @@ step(); } -void SymbolTable::readArchives() { +void SymbolTable::readArchive() { if (ArchiveQueue.empty()) return; // Add lazy symbols to the symbol table. Lazy symbols that conflict // with existing undefined symbols are accumulated in LazySyms. - std::vector LazySyms; - for (std::future &Future : ArchiveQueue) { - ArchiveFile *File = Future.get(); - if (Config->Verbose) - llvm::outs() << "Reading " << File->getShortName() << "\n"; - for (Lazy &Sym : File->getLazySymbols()) - addLazy(&Sym, &LazySyms); - } - ArchiveQueue.clear(); - - // Add archive member files to ObjectQueue that should resolve - // existing undefined symbols. - for (Symbol *Sym : LazySyms) - addMemberFile(cast(Sym->Body)); + ArchiveFile *File = ArchiveQueue.front(); + ArchiveQueue.pop_front(); + if (Config->Verbose) + llvm::outs() << "Reading " << File->getShortName() << "\n"; + File->parse(); } void SymbolTable::readObjects() { @@ -90,14 +74,12 @@ // Add defined and undefined symbols to the symbol table. std::vector Directives; for (size_t I = 0; I < ObjectQueue.size(); ++I) { - InputFile *File = ObjectQueue[I].get(); + InputFile *File = ObjectQueue[I]; if (Config->Verbose) llvm::outs() << "Reading " << File->getShortName() << "\n"; + File->parse(); // Adding symbols may add more files to ObjectQueue // (but not to ArchiveQueue). - for (SymbolBody *Sym : File->getSymbols()) - if (Sym->isExternal()) - addSymbol(Sym); StringRef S = File->getDirectives(); if (!S.empty()) { Directives.push_back(S); @@ -118,119 +100,231 @@ return ArchiveQueue.empty() && ObjectQueue.empty(); } -void SymbolTable::reportRemainingUndefines(bool Resolve) { +void SymbolTable::reportRemainingUndefines() { llvm::SmallPtrSet Undefs; for (auto &I : Symtab) { Symbol *Sym = I.second; - auto *Undef = dyn_cast(Sym->Body); + auto *Undef = dyn_cast(Sym->body()); if (!Undef) continue; + if (!Sym->IsUsedInRegularObj) + continue; StringRef Name = Undef->getName(); // A weak alias may have been resolved, so check for that. if (Defined *D = Undef->getWeakAlias()) { - if (Resolve) - Sym->Body = D; + // We resolve weak aliases by replacing the alias's SymbolBody with the + // target's SymbolBody. This causes all SymbolBody pointers referring to + // the old symbol to instead refer to the new symbol. However, we can't + // just blindly copy sizeof(Symbol::Body) bytes from D to Sym->Body + // because D may be an internal symbol, and internal symbols are stored as + // "unparented" SymbolBodies. For that reason we need to check which type + // of symbol we are dealing with and copy the correct number of bytes. + if (isa(D)) + memcpy(Sym->Body.buffer, D, sizeof(DefinedRegular)); + else if (isa(D)) + memcpy(Sym->Body.buffer, D, sizeof(DefinedAbsolute)); + else + // No other internal symbols are possible. + Sym->Body = D->symbol()->Body; continue; } // If we can resolve a symbol by removing __imp_ prefix, do that. // This odd rule is for compatibility with MSVC linker. if (Name.startswith("__imp_")) { Symbol *Imp = find(Name.substr(strlen("__imp_"))); - if (Imp && isa(Imp->Body)) { - if (!Resolve) - continue; - auto *D = cast(Imp->Body); - auto *S = new (Alloc) DefinedLocalImport(Name, D); - LocalImportChunks.push_back(S->getChunk()); - Sym->Body = S; + if (Imp && isa(Imp->body())) { + auto *D = cast(Imp->body()); + replaceBody(Sym, Name, D); + LocalImportChunks.push_back( + cast(Sym->body())->getChunk()); continue; } } // Remaining undefined symbols are not fatal if /force is specified. // They are replaced with dummy defined symbols. - if (Config->Force && Resolve) - Sym->Body = new (Alloc) DefinedAbsolute(Name, 0); - Undefs.insert(Sym->Body); + if (Config->Force) + replaceBody(Sym, Name, 0); + Undefs.insert(Sym->body()); } if (Undefs.empty()) return; - for (Undefined *U : Config->GCRoot) - if (Undefs.count(U->repl())) - llvm::errs() << ": undefined symbol: " << U->getName() << "\n"; - for (std::unique_ptr &File : Files) - if (!isa(File.get())) - for (SymbolBody *Sym : File->getSymbols()) - if (Undefs.count(Sym->repl())) - llvm::errs() << File->getShortName() << ": undefined symbol: " - << Sym->getName() << "\n"; + for (SymbolBody *B : Config->GCRoot) + if (Undefs.count(B)) + llvm::errs() << ": undefined symbol: " << B->getName() << "\n"; + for (ObjectFile *File : ObjectFiles) + for (SymbolBody *Sym : File->getSymbols()) + if (Undefs.count(Sym)) + llvm::errs() << File->getShortName() + << ": undefined symbol: " << Sym->getName() << "\n"; if (!Config->Force) error("Link failed"); } -void SymbolTable::addLazy(Lazy *New, std::vector *Accum) { - Symbol *Sym = insert(New); - if (Sym->Body == New) - return; - SymbolBody *Existing = Sym->Body; - if (isa(Existing)) - return; - if (Lazy *L = dyn_cast(Existing)) - if (L->getFileIndex() < New->getFileIndex()) - return; - Sym->Body = New; - New->setBackref(Sym); - if (isa(Existing)) - Accum->push_back(Sym); +std::pair SymbolTable::insert(StringRef Name) { + Symbol *&Sym = Symtab[Name]; + if (Sym) + return {Sym, false}; + Sym = new (Alloc) Symbol; + Sym->IsUsedInRegularObj = false; + return {Sym, true}; } -void SymbolTable::addSymbol(SymbolBody *New) { - // Find an existing symbol or create and insert a new one. - assert(isa(New) || isa(New)); - Symbol *Sym = insert(New); - if (Sym->Body == New) - return; - SymbolBody *Existing = Sym->Body; - - // If we have an undefined symbol and a lazy symbol, - // let the lazy symbol to read a member file. - if (auto *L = dyn_cast(Existing)) { - // Undefined symbols with weak aliases need not to be resolved, - // since they would be replaced with weak aliases if they remain - // undefined. - if (auto *U = dyn_cast(New)) { - if (!U->WeakAlias) { - addMemberFile(L); - return; - } - } - Sym->Body = New; +Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F, + bool IsWeakAlias) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (!F || !isa(F)) + S->IsUsedInRegularObj = true; + if (WasInserted || (isa(S->body()) && IsWeakAlias)) { + replaceBody(S, Name); + return S; + } + if (auto *L = dyn_cast(S->body())) + addMemberFile(L->File, L->Sym); + return S; +} + +void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) { + StringRef Name = Sym.getName(); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + replaceBody(S, F, Sym); return; } + auto *U = dyn_cast(S->body()); + if (!U || U->WeakAlias) + return; + addMemberFile(F, Sym); +} + +void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) { + error(Twine("duplicate symbol: ") + Existing->body()->getDebugName() + + " and " + (NewFile ? NewFile->getShortName() : "(internal)")); +} - // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, - // equivalent (conflicting), or more preferable, respectively. - int Comp = Existing->compare(New); - if (Comp == 0) - error(Twine("duplicate symbol: ") + Existing->getDebugName() + " and " + - New->getDebugName()); - if (Comp < 0) - Sym->Body = New; +Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, N, Sym); + else if (!isa(S->body())) + reportDuplicate(S, nullptr); + return S; } -Symbol *SymbolTable::insert(SymbolBody *New) { - Symbol *&Sym = Symtab[New->getName()]; - if (Sym) { - New->setBackref(Sym); - return Sym; +Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, N, VA); + else if (!isa(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addRelative(StringRef N, uint64_t VA) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, N, VA); + else if (!isa(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addRegular(ObjectFile *F, COFFSymbolRef Sym, + SectionChunk *C) { + StringRef Name; + F->getCOFFObj()->getSymbolName(Sym, Name); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, F, Sym, C); + else if (auto *R = dyn_cast(S->body())) { + if (!C->isCOMDAT() || !R->isCOMDAT()) + reportDuplicate(S, F); + } else if (auto *B = dyn_cast(S->body())) { + if (B->IsReplaceable) + replaceBody(S, F, Sym, C); + else if (!C->isCOMDAT()) + reportDuplicate(S, F); + } else + replaceBody(S, F, Sym, C); + return S; +} + +Symbol *SymbolTable::addBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + if (WasInserted || isa(S->body()) || isa(S->body())) { + replaceBody(S, F, N, IsReplaceable); + return S; } - Sym = new (Alloc) Symbol(New); - New->setBackref(Sym); - return Sym; + if (isa(S->body())) + return S; + if (IsReplaceable) + if (isa(S->body()) || isa(S->body())) + return S; + reportDuplicate(S, F); + return S; +} + +Symbol *SymbolTable::addCommon(ObjectFile *F, COFFSymbolRef Sym, + CommonChunk *C) { + StringRef Name; + F->getCOFFObj()->getSymbolName(Sym, Name); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || !isa(S->body())) + replaceBody(S, F, Sym, C); + else if (auto *DC = dyn_cast(S->body())) + if (Sym.getValue() > DC->getSize()) + replaceBody(S, F, Sym, C); + return S; +} + +Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, N, F); + else if (!isa(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID, + uint16_t Machine) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, Name, ID, Machine); + else if (!isa(S->body())) + reportDuplicate(S, nullptr); + return S; } // Reads an archive member file pointed by a given symbol. -void SymbolTable::addMemberFile(Lazy *Body) { - std::unique_ptr File = Body->getMember(); +void SymbolTable::addMemberFile(ArchiveFile *F, const Archive::Symbol Sym) { + std::unique_ptr File = F->getMember(&Sym); // getMember returns an empty buffer if the member was already // read from the library. @@ -238,7 +332,7 @@ return; if (Config->Verbose) llvm::outs() << "Loaded " << File->getShortName() << " for " - << Body->getName() << "\n"; + << Sym.getName() << "\n"; addFile(std::move(File)); } @@ -275,7 +369,7 @@ StringRef SymbolTable::findMangle(StringRef Name) { if (Symbol *Sym = find(Name)) - if (!isa(Sym->Body)) + if (!isa(Sym->body())) return Name; if (Config->Machine != I386) return findByPrefix(("?" + Name + "@@Y").str()); @@ -289,34 +383,17 @@ return findByPrefix(("?" + Name.substr(1) + "@@Y").str()); } -void SymbolTable::mangleMaybe(Undefined *U) { - if (U->WeakAlias) - return; - if (!isa(U->repl())) +void SymbolTable::mangleMaybe(SymbolBody *B) { + auto *U = dyn_cast(B); + if (!U || U->WeakAlias) return; StringRef Alias = findMangle(U->getName()); if (!Alias.empty()) U->WeakAlias = addUndefined(Alias); } -Undefined *SymbolTable::addUndefined(StringRef Name) { - auto *New = new (Alloc) Undefined(Name); - addSymbol(New); - if (auto *U = dyn_cast(New->repl())) - return U; - return New; -} - -DefinedRelative *SymbolTable::addRelative(StringRef Name, uint64_t VA) { - auto *New = new (Alloc) DefinedRelative(Name, VA); - addSymbol(New); - return New; -} - -DefinedAbsolute *SymbolTable::addAbsolute(StringRef Name, uint64_t VA) { - auto *New = new (Alloc) DefinedAbsolute(Name, VA); - addSymbol(New); - return New; +SymbolBody *SymbolTable::addUndefined(StringRef Name) { + return addUndefined(Name, nullptr, false)->body(); } void SymbolTable::printMap(llvm::raw_ostream &OS) { @@ -330,57 +407,19 @@ } } -void SymbolTable::addCombinedLTOObject(ObjectFile *Obj) { - for (SymbolBody *Body : Obj->getSymbols()) { - if (!Body->isExternal()) - continue; - // We should not see any new undefined symbols at this point, but we'll - // diagnose them later in reportRemainingUndefines(). - StringRef Name = Body->getName(); - Symbol *Sym = insert(Body); - SymbolBody *Existing = Sym->Body; - - if (Existing == Body) - continue; - - if (isa(Existing)) { - Sym->Body = Body; - continue; - } - if (auto *L = dyn_cast(Existing)) { - // We may see new references to runtime library symbols such as __chkstk - // here. These symbols must be wholly defined in non-bitcode files. - addMemberFile(L); - continue; - } - - int Comp = Existing->compare(Body); - if (Comp == 0) - error(Twine("LTO: unexpected duplicate symbol: ") + Name); - if (Comp < 0) - Sym->Body = Body; - } -} - void SymbolTable::addCombinedLTOObjects() { if (BitcodeFiles.empty()) return; - // Diagnose any undefined symbols early, but do not resolve weak externals, - // as resolution breaks the invariant that each Symbol points to a unique - // SymbolBody, which we rely on to replace DefinedBitcode symbols correctly. - reportRemainingUndefines(/*Resolve=*/false); - // Create an object file and add it to the symbol table by replacing any // DefinedBitcode symbols with the definitions in the object file. LTOCodeGenerator CG(BitcodeFile::Context); CG.setOptLevel(Config->LTOOptLevel); std::vector Objs = createLTOObjects(&CG); - for (ObjectFile *Obj : Objs) - addCombinedLTOObject(Obj); - size_t NumBitcodeFiles = BitcodeFiles.size(); + for (ObjectFile *Obj : Objs) + Obj->parse(); run(); if (BitcodeFiles.size() != NumBitcodeFiles) error("LTO: late loaded symbol created new bitcode reference"); @@ -389,25 +428,17 @@ // Combine and compile bitcode files and then return the result // as a vector of regular COFF object files. std::vector SymbolTable::createLTOObjects(LTOCodeGenerator *CG) { - // All symbols referenced by non-bitcode objects must be preserved. - for (ObjectFile *File : ObjectFiles) - for (SymbolBody *Body : File->getSymbols()) - if (auto *S = dyn_cast(Body->repl())) - CG->addMustPreserveSymbol(S->getName()); - - // Likewise for bitcode symbols which we initially resolved to non-bitcode. + // All symbols referenced by non-bitcode objects, including GC roots, must be + // preserved. We must also replace bitcode symbols with undefined symbols so + // that they may be replaced with real definitions without conflicting. for (BitcodeFile *File : BitcodeFiles) - for (SymbolBody *Body : File->getSymbols()) - if (isa(Body) && !isa(Body->repl())) + for (SymbolBody *Body : File->getSymbols()) { + if (!isa(Body)) + continue; + if (Body->symbol()->IsUsedInRegularObj) CG->addMustPreserveSymbol(Body->getName()); - - // Likewise for other symbols that must be preserved. - for (Undefined *U : Config->GCRoot) { - if (auto *S = dyn_cast(U->repl())) - CG->addMustPreserveSymbol(S->getName()); - else if (auto *S = dyn_cast_or_null(U->getWeakAlias())) - CG->addMustPreserveSymbol(S->getName()); - } + replaceBody(Body->symbol(), Body->getName()); + } CG->setModule(BitcodeFiles[0]->takeModule()); for (unsigned I = 1, E = BitcodeFiles.size(); I != E; ++I) @@ -437,7 +468,6 @@ auto *ObjFile = new ObjectFile(MemoryBufferRef(Obj, "")); Files.emplace_back(ObjFile); ObjectFiles.push_back(ObjFile); - ObjFile->parse(); ObjFiles.push_back(ObjFile); } Index: COFF/Symbols.h =================================================================== --- COFF/Symbols.h +++ COFF/Symbols.h @@ -32,15 +32,8 @@ class BitcodeFile; class InputFile; class ObjectFile; -class SymbolBody; - -// A real symbol object, SymbolBody, is usually accessed indirectly -// through a Symbol. There's always one Symbol for each symbol name. -// The resolver updates SymbolBody pointers as it resolves symbols. -struct Symbol { - explicit Symbol(SymbolBody *P) : Body(P) {} - SymbolBody *Body; -}; +struct Symbol; +class SymbolTable; // The base class for real symbol classes. class SymbolBody { @@ -75,25 +68,17 @@ // Returns the symbol name. StringRef getName(); - // A SymbolBody has a backreference to a Symbol. Originally they are - // doubly-linked. A backreference will never change. But the pointer - // in the Symbol may be mutated by the resolver. If you have a - // pointer P to a SymbolBody and are not sure whether the resolver - // has chosen the object among other objects having the same name, - // you can access P->Backref->Body to get the resolver's result. - void setBackref(Symbol *P) { Backref = P; } - SymbolBody *repl() { return Backref ? Backref->Body : this; } - - // Decides which symbol should "win" in the symbol table, this or - // the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if - // they are duplicate (conflicting) symbols. - int compare(SymbolBody *Other); + Symbol *symbol(); + const Symbol *symbol() const { + return const_cast(this)->symbol(); + } // Returns a name of this symbol including source file name. // Used only for debugging and logging. std::string getDebugName(); protected: + friend SymbolTable; explicit SymbolBody(Kind K, StringRef N = "") : SymbolKind(K), IsExternal(true), IsCOMDAT(false), IsReplaceable(false), Name(N) {} @@ -108,7 +93,6 @@ unsigned IsReplaceable : 1; StringRef Name; - Symbol *Backref = nullptr; }; // The base class for any defined symbols, including absolute symbols, @@ -149,8 +133,6 @@ return S->kind() <= LastDefinedCOFFKind; } - int getFileIndex() { return File->Index; } - COFFSymbolRef getCOFFSymbol(); protected: @@ -194,7 +176,7 @@ uint64_t getRVA() { return Data->getRVA(); } private: - friend SymbolBody; + friend SymbolTable; uint64_t getSize() { return Sym->Value; } CommonChunk *Data; }; @@ -253,13 +235,8 @@ static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; } - // Returns an object file for this symbol, or a nullptr if the file - // was already returned. - std::unique_ptr getMember(); - - int getFileIndex() { return File->Index; } - private: + friend SymbolTable; ArchiveFile *File; const Archive::Symbol Sym; }; @@ -293,26 +270,22 @@ // table in an output. The former has "__imp_" prefix. class DefinedImportData : public Defined { public: - DefinedImportData(StringRef D, StringRef N, StringRef E, - const coff_import_header *H) - : Defined(DefinedImportDataKind, N), DLLName(D), ExternalName(E), Hdr(H) { + DefinedImportData(StringRef N, ImportFile *F) + : Defined(DefinedImportDataKind, N), File(F) { } static bool classof(const SymbolBody *S) { return S->kind() == DefinedImportDataKind; } - uint64_t getRVA() { return Location->getRVA(); } - StringRef getDLLName() { return DLLName; } - StringRef getExternalName() { return ExternalName; } - void setLocation(Chunk *AddressTable) { Location = AddressTable; } - uint16_t getOrdinal() { return Hdr->OrdinalHint; } + uint64_t getRVA() { return File->Location->getRVA(); } + StringRef getDLLName() { return File->DLLName; } + StringRef getExternalName() { return File->ExternalName; } + void setLocation(Chunk *AddressTable) { File->Location = AddressTable; } + uint16_t getOrdinal() { return File->Hdr->OrdinalHint; } private: - StringRef DLLName; - StringRef ExternalName; - const coff_import_header *Hdr; - Chunk *Location = nullptr; + ImportFile *File; }; // This class represents a symbol for a jump table entry which jumps @@ -343,17 +316,17 @@ class DefinedLocalImport : public Defined { public: DefinedLocalImport(StringRef N, Defined *S) - : Defined(DefinedLocalImportKind, N), Data(S) {} + : Defined(DefinedLocalImportKind, N), Data(new LocalImportChunk(S)) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedLocalImportKind; } - uint64_t getRVA() { return Data.getRVA(); } - Chunk *getChunk() { return &Data; } + uint64_t getRVA() { return Data->getRVA(); } + Chunk *getChunk() { return Data.get(); } private: - LocalImportChunk Data; + std::unique_ptr Data; }; class DefinedBitcode : public Defined { @@ -361,6 +334,11 @@ public: DefinedBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) : Defined(DefinedBitcodeKind, N), File(F) { + // IsReplaceable tracks whether the bitcode symbol may be replaced with some + // other (defined, common or bitcode) symbol. This is the case for common, + // comdat and weak external symbols. We try to replace bitcode symbols with + // "real" symbols (see SymbolTable::add{Regular,Bitcode}), and resolve the + // result against the real symbol from the combined LTO object. this->IsReplaceable = IsReplaceable; } @@ -397,6 +375,46 @@ llvm_unreachable("unknown symbol kind"); } +// A real symbol object, SymbolBody, is usually stored within a Symbol. There's +// always one Symbol for each symbol name. The resolver updates the SymbolBody +// stored in the Body field of this object as it resolves symbols. Symbol also +// holds computed properties of symbol names. +struct Symbol { + // True if this symbol was referenced by a regular (non-bitcode) object. + unsigned IsUsedInRegularObj : 1; + + // This field is used to store the Symbol's SymbolBody. This instantiation of + // AlignedCharArrayUnion gives us a struct with a char array field that is + // large and aligned enough to store any derived class of SymbolBody. + llvm::AlignedCharArrayUnion + Body; + + SymbolBody *body() { + return reinterpret_cast(Body.buffer); + } + const SymbolBody *body() const { return const_cast(this)->body(); } +}; + +template +void replaceBody(Symbol *S, ArgT &&... Arg) { + static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); + static_assert(llvm::AlignOf::Alignment <= + llvm::AlignOfBody)>::Alignment, + "Body not aligned enough"); + assert(static_cast(static_cast(nullptr)) == nullptr && + "Not a SymbolBody"); + new (S->Body.buffer) T(std::forward(Arg)...); +} + +inline Symbol *SymbolBody::symbol() { + assert(isExternal()); + return reinterpret_cast(reinterpret_cast(this) - + offsetof(Symbol, Body)); +} + } // namespace coff } // namespace lld Index: COFF/Symbols.cpp =================================================================== --- COFF/Symbols.cpp +++ COFF/Symbols.cpp @@ -15,8 +15,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm::object; -using llvm::sys::fs::identify_magic; -using llvm::sys::fs::file_magic; namespace lld { namespace coff { @@ -36,120 +34,6 @@ return Name; } -// Returns 1, 0 or -1 if this symbol should take precedence -// over the Other, tie or lose, respectively. -int SymbolBody::compare(SymbolBody *Other) { - Kind LK = kind(), RK = Other->kind(); - - // Normalize so that the smaller kind is on the left. - if (LK > RK) - return -Other->compare(this); - - // First handle comparisons between two different kinds. - if (LK != RK) { - if (RK > LastDefinedKind) { - if (LK == LazyKind && cast(Other)->WeakAlias) - return -1; - - // The LHS is either defined or lazy and so it wins. - assert((LK <= LastDefinedKind || LK == LazyKind) && "Bad kind!"); - return 1; - } - - // Bitcode has special complexities. - if (RK == DefinedBitcodeKind) { - auto *RHS = cast(Other); - - switch (LK) { - case DefinedCommonKind: - return 1; - - case DefinedRegularKind: - // As an approximation, regular symbols win over bitcode symbols, - // but we definitely have a conflict if the regular symbol is not - // replaceable and neither is the bitcode symbol. We do not - // replicate the rest of the symbol resolution logic here; symbol - // resolution will be done accurately after lowering bitcode symbols - // to regular symbols in addCombinedLTOObject(). - if (cast(this)->isCOMDAT() || RHS->IsReplaceable) - return 1; - - // Fallthrough to the default of a tie otherwise. - default: - return 0; - } - } - - // Either of the object file kind will trump a higher kind. - if (LK <= LastDefinedCOFFKind) - return 1; - - // The remaining kind pairs are ties amongst defined symbols. - return 0; - } - - // Now handle the case where the kinds are the same. - switch (LK) { - case DefinedRegularKind: { - auto *LHS = cast(this); - auto *RHS = cast(Other); - if (LHS->isCOMDAT() && RHS->isCOMDAT()) - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - return 0; - } - - case DefinedCommonKind: { - auto *LHS = cast(this); - auto *RHS = cast(Other); - if (LHS->getSize() == RHS->getSize()) - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - return LHS->getSize() > RHS->getSize() ? 1 : -1; - } - - case DefinedBitcodeKind: { - auto *LHS = cast(this); - auto *RHS = cast(Other); - // If both are non-replaceable, we have a tie. - if (!LHS->IsReplaceable && !RHS->IsReplaceable) - return 0; - - // Non-replaceable symbols win, but even two replaceable symboles don't - // tie. If both symbols are replaceable, choice is arbitrary. - if (RHS->IsReplaceable && LHS->IsReplaceable) - return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1; - return LHS->IsReplaceable ? -1 : 1; - } - - case LazyKind: { - // Don't tie, pick the earliest. - auto *LHS = cast(this); - auto *RHS = cast(Other); - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - } - - case UndefinedKind: { - auto *LHS = cast(this); - auto *RHS = cast(Other); - // Tie if both undefined symbols have different weak aliases. - if (LHS->WeakAlias && RHS->WeakAlias) { - if (LHS->WeakAlias->getName() != RHS->WeakAlias->getName()) - return 0; - return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1; - } - return LHS->WeakAlias ? 1 : -1; - } - - case DefinedLocalImportKind: - case DefinedImportThunkKind: - case DefinedImportDataKind: - case DefinedAbsoluteKind: - case DefinedRelativeKind: - // These all simply tie. - return 0; - } - llvm_unreachable("unknown symbol kind"); -} - std::string SymbolBody::getDebugName() { std::string N = getName().str(); if (auto *D = dyn_cast(this)) { @@ -181,34 +65,10 @@ } } -std::unique_ptr Lazy::getMember() { - MemoryBufferRef MBRef = File->getMember(&Sym); - - // getMember returns an empty buffer if the member was already - // read from the library. - if (MBRef.getBuffer().empty()) - return std::unique_ptr(nullptr); - - file_magic Magic = identify_magic(MBRef.getBuffer()); - if (Magic == file_magic::coff_import_library) - return std::unique_ptr(new ImportFile(MBRef)); - - std::unique_ptr Obj; - if (Magic == file_magic::coff_object) - Obj.reset(new ObjectFile(MBRef)); - else if (Magic == file_magic::bitcode) - Obj.reset(new BitcodeFile(MBRef)); - else - error(Twine(File->getName()) + ": unknown file type"); - - Obj->setParentName(File->getName()); - return Obj; -} - Defined *Undefined::getWeakAlias() { // A weak alias may be a weak alias to another symbol, so check recursively. for (SymbolBody *A = WeakAlias; A; A = cast(A)->WeakAlias) - if (auto *D = dyn_cast(A->repl())) + if (auto *D = dyn_cast(A)) return D; return nullptr; } Index: COFF/Writer.cpp =================================================================== --- COFF/Writer.cpp +++ COFF/Writer.cpp @@ -293,7 +293,7 @@ if (!File->SEHCompat) return; for (SymbolBody *B : File->SEHandlers) - Handlers.insert(cast(B->repl())); + Handlers.insert(cast(B)); } SEHTable.reset(new SEHTableChunk(Handlers)); createSection(".rdata")->addChunk(SEHTable.get()); @@ -332,7 +332,7 @@ Sec->addChunk(C); } if (!DelayIdata.empty()) { - Defined *Helper = cast(Config->DelayLoadHelper->repl()); + Defined *Helper = cast(Config->DelayLoadHelper); DelayIdata.create(Helper); OutputSection *Sec = createSection(".didat"); for (Chunk *C : DelayIdata.getChunks()) @@ -375,6 +375,10 @@ } Optional Writer::createSymbol(Defined *Def) { + // Relative symbols are unrepresentable in a COFF symbol table. + if (isa(Def)) + return None; + if (auto *D = dyn_cast(Def)) if (!D->getChunk()->isLive()) return None; @@ -401,7 +405,6 @@ switch (Def->kind()) { case SymbolBody::DefinedAbsoluteKind: - case SymbolBody::DefinedRelativeKind: Sym.Value = Def->getRVA(); Sym.SectionNumber = IMAGE_SYM_ABSOLUTE; break; @@ -434,16 +437,13 @@ Sec->setStringTableOff(addEntryToStringTable(Name)); } + std::set SeenSymbols; for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) for (SymbolBody *B : File->getSymbols()) if (auto *D = dyn_cast(B)) - if (Optional Sym = createSymbol(D)) - OutputSymtab.push_back(*Sym); - - for (ImportFile *File : Symtab->ImportFiles) - for (SymbolBody *B : File->getSymbols()) - if (Optional Sym = createSymbol(cast(B))) - OutputSymtab.push_back(*Sym); + if (SeenSymbols.insert(D).second) + if (Optional Sym = createSymbol(D)) + OutputSymtab.push_back(*Sym); OutputSection *LastSection = OutputSections.back(); // We position the symbol table to be adjacent to the end of the last section. @@ -534,7 +534,7 @@ PE->SizeOfImage = SizeOfImage; PE->SizeOfHeaders = SizeOfHeaders; if (!Config->NoEntry) { - Defined *Entry = cast(Config->Entry->repl()); + Defined *Entry = cast(Config->Entry); PE->AddressOfEntryPoint = Entry->getRVA(); // Pointer to thumb code must have the LSB set, so adjust it. if (Config->Machine == ARMNT) @@ -594,7 +594,7 @@ Dir[EXCEPTION_TABLE].Size = Sec->getVirtualSize(); } if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) { - if (Defined *B = dyn_cast(Sym->Body)) { + if (Defined *B = dyn_cast(Sym->body())) { Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA(); Dir[TLS_TABLE].Size = Config->is64() ? sizeof(object::coff_tls_directory64) @@ -602,7 +602,7 @@ } } if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) { - if (auto *B = dyn_cast(Sym->Body)) { + if (auto *B = dyn_cast(Sym->body())) { SectionChunk *SC = B->getChunk(); assert(B->getRVA() >= SC->getRVA()); uint64_t OffsetInChunk = B->getRVA() - SC->getRVA(); @@ -653,8 +653,10 @@ void Writer::fixSafeSEHSymbols() { if (!SEHTable) return; - Config->SEHTable->setRVA(SEHTable->getRVA()); - Config->SEHCount->setVA(SEHTable->getSize() / 4); + if (auto *T = dyn_cast(Config->SEHTable->body())) + T->setRVA(SEHTable->getRVA()); + if (auto *C = dyn_cast(Config->SEHCount->body())) + C->setVA(SEHTable->getSize() / 4); } // Write section contents to a mmap'ed file. Index: test/COFF/conflict.test =================================================================== --- test/COFF/conflict.test +++ test/COFF/conflict.test @@ -8,7 +8,7 @@ # RUN: not lld-link /out:%t.exe %t.lto1.obj %t.lto2.obj >& %t.log # RUN: FileCheck %s < %t.log -# CHECK: duplicate symbol: foo {{.+}}1.obj and foo {{.+}}2.obj +# CHECK: duplicate symbol: foo {{.+}}1.obj and {{.+}}2.obj --- header: Index: test/COFF/include2.test =================================================================== --- test/COFF/include2.test +++ test/COFF/include2.test @@ -9,6 +9,6 @@ CHECK: include2.test.tmp1.obj CHECK: include2.test.tmp2.lib -CHECK: include2.test.tmp3.lib CHECK: include2.test.tmp2.lib(include2.test.tmp2.obj) for foo +CHECK: include2.test.tmp3.lib CHECK: include2.test.tmp3.lib(include2.test.tmp3.obj) for bar Index: test/COFF/order.test =================================================================== --- test/COFF/order.test +++ test/COFF/order.test @@ -11,5 +11,5 @@ CHECK: order.test.tmp1.obj CHECK: order.test.tmp3.obj CHECK: order.test.tmp2.lib -CHECK: order.test.tmp3.lib CHECK: order.test.tmp2.lib(order.test.tmp2.obj) for foo +CHECK: order.test.tmp3.lib Index: test/COFF/symtab.test =================================================================== --- test/COFF/symtab.test +++ test/COFF/symtab.test @@ -36,17 +36,17 @@ # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: message -# CHECK-NEXT: Value: 6 -# CHECK-NEXT: Section: .text2 (3) +# CHECK-NEXT: Name: MessageBoxA +# CHECK-NEXT: Value: 80 +# CHECK-NEXT: Section: .text (2) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) -# CHECK-NEXT: StorageClass: Static (0x3) +# CHECK-NEXT: StorageClass: External (0x2) # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: main -# CHECK-NEXT: Value: 0 +# CHECK-NEXT: Name: ExitProcess +# CHECK-NEXT: Value: 64 # CHECK-NEXT: Section: .text (2) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) @@ -54,8 +54,8 @@ # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: caption -# CHECK-NEXT: Value: 0 +# CHECK-NEXT: Name: message +# CHECK-NEXT: Value: 6 # CHECK-NEXT: Section: .text2 (3) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) @@ -63,26 +63,8 @@ # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: abs_symbol -# CHECK-NEXT: Value: 2662186735 -# CHECK-NEXT: Section: IMAGE_SYM_ABSOLUTE (-1) -# CHECK-NEXT: BaseType: Null (0x0) -# CHECK-NEXT: ComplexType: Null (0x0) -# CHECK-NEXT: StorageClass: External (0x2) -# CHECK-NEXT: AuxSymbolCount: 0 -# CHECK-NEXT: } -# CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: __imp_ExitProcess -# CHECK-NEXT: Value: 64 -# CHECK-NEXT: Section: .idata (4) -# CHECK-NEXT: BaseType: Null (0x0) -# CHECK-NEXT: ComplexType: Null (0x0) -# CHECK-NEXT: StorageClass: External (0x2) -# CHECK-NEXT: AuxSymbolCount: 0 -# CHECK-NEXT: } -# CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: ExitProcess -# CHECK-NEXT: Value: 64 +# CHECK-NEXT: Name: main +# CHECK-NEXT: Value: 0 # CHECK-NEXT: Section: .text (2) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) @@ -90,18 +72,18 @@ # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: __imp_MessageBoxA -# CHECK-NEXT: Value: 72 -# CHECK-NEXT: Section: .idata (4) +# CHECK-NEXT: Name: caption +# CHECK-NEXT: Value: 0 +# CHECK-NEXT: Section: .text2 (3) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) -# CHECK-NEXT: StorageClass: External (0x2) +# CHECK-NEXT: StorageClass: Static (0x3) # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: MessageBoxA -# CHECK-NEXT: Value: 80 -# CHECK-NEXT: Section: .text (2) +# CHECK-NEXT: Name: abs_symbol +# CHECK-NEXT: Value: 2662186735 +# CHECK-NEXT: Section: IMAGE_SYM_ABSOLUTE (-1) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) # CHECK-NEXT: StorageClass: External (0x2)