Index: lld/trunk/test/COFF/Inputs/start-lib1.ll =================================================================== --- lld/trunk/test/COFF/Inputs/start-lib1.ll (revision 0) +++ lld/trunk/test/COFF/Inputs/start-lib1.ll (revision 370816) @@ -0,0 +1,13 @@ +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +declare i32 @bar() + +define i32 @foo() { + %1 = call i32 () @bar() + %2 = add i32 %1, 1 + ret i32 %2 +} + +!llvm.linker.options = !{!0} +!0 = !{!"/INCLUDE:foo"} Index: lld/trunk/test/COFF/Inputs/start-lib2.ll =================================================================== --- lld/trunk/test/COFF/Inputs/start-lib2.ll (revision 0) +++ lld/trunk/test/COFF/Inputs/start-lib2.ll (revision 370816) @@ -0,0 +1,9 @@ +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define i32 @bar() { + ret i32 1 +} + +!llvm.linker.options = !{!0} +!0 = !{!"/INCLUDE:bar"} Index: lld/trunk/test/COFF/start-lib.ll =================================================================== --- lld/trunk/test/COFF/start-lib.ll (revision 0) +++ lld/trunk/test/COFF/start-lib.ll (revision 370816) @@ -0,0 +1,43 @@ +; REQUIRES: x86 +; +; RUN: llc -filetype=obj %s -o %t.obj +; RUN: llc -filetype=obj %p/Inputs/start-lib1.ll -o %t1.obj +; RUN: llc -filetype=obj %p/Inputs/start-lib2.ll -o %t2.obj +; RUN: opt -thinlto-bc %s -o %t.bc +; RUN: opt -thinlto-bc %p/Inputs/start-lib1.ll -o %t1.bc +; RUN: opt -thinlto-bc %p/Inputs/start-lib2.ll -o %t2.bc +; +; RUN: lld-link -out:%t1.exe -entry:main -opt:noref -lldmap:%t1.map \ +; RUN: %t.obj %t1.obj %t2.obj +; RUN: FileCheck --check-prefix=TEST1 %s < %t1.map +; RUN: lld-link -out:%t1.exe -entry:main -opt:noref -lldmap:%t1.thinlto.map \ +; RUN: %t.bc %t1.bc %t2.bc +; RUN: FileCheck --check-prefix=TEST1 %s < %t1.thinlto.map +; TEST1: foo +; TEST1: bar +; +; RUN: lld-link -out:%t2.exe -entry:main -opt:noref -lldmap:%t2.map \ +; RUN: %t.obj -start-lib %t1.obj -end-lib %t2.obj +; RUN: FileCheck --check-prefix=TEST2 %s < %t2.map +; RUN: lld-link -out:%t2.exe -entry:main -opt:noref -lldmap:%t2.thinlto.map \ +; RUN: %t.bc -start-lib %t1.bc -end-lib %t2.bc +; RUN: FileCheck --check-prefix=TEST2 %s < %t2.thinlto.map +; TEST2-NOT: Name: foo +; TEST2: bar +; TEST2-NOT: Name: foo +; +; RUN: lld-link -out:%t3.exe -entry:main -opt:noref -lldmap:%t3.map \ +; RUN: %t.obj -start-lib %t1.obj %t2.obj +; RUN: FileCheck --check-prefix=TEST3 %s < %t3.map +; RUN: lld-link -out:%t3.exe -entry:main -opt:noref -lldmap:%t3.thinlto.map \ +; RUN: %t.bc -start-lib %t1.bc %t2.bc +; RUN: FileCheck --check-prefix=TEST3 %s < %t3.thinlto.map +; TEST3-NOT: foo +; TEST3-NOT: bar + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define void @main() { + ret void +} Index: lld/trunk/test/COFF/start-lib-cmd-diagnostics.ll =================================================================== --- lld/trunk/test/COFF/start-lib-cmd-diagnostics.ll (revision 0) +++ lld/trunk/test/COFF/start-lib-cmd-diagnostics.ll (revision 370816) @@ -0,0 +1,19 @@ +; REQUIRES: x86 +; +; We need an input file to lld, so create one. +; RUN: llc -filetype=obj %s -o %t.obj + +; RUN: not lld-link %t.obj -end-lib 2>&1 \ +; RUN: | FileCheck --check-prefix=STRAY_END %s +; STRAY_END: stray -end-lib + +; RUN: not lld-link -start-lib -start-lib %t.obj 2>&1 \ +; RUN: | FileCheck --check-prefix=NESTED_START %s +; NESTED_START: nested -start-lib + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define void @main() { + ret void +} Index: lld/trunk/COFF/SymbolTable.h =================================================================== --- lld/trunk/COFF/SymbolTable.h (revision 370815) +++ lld/trunk/COFF/SymbolTable.h (revision 370816) @@ -1,138 +1,139 @@ //===- SymbolTable.h --------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_SYMBOL_TABLE_H #define LLD_COFF_SYMBOL_TABLE_H #include "InputFiles.h" #include "LTO.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/raw_ostream.h" namespace llvm { struct LTOCodeGenerator; } namespace lld { namespace coff { class Chunk; class CommonChunk; class Defined; class DefinedAbsolute; class DefinedRegular; class DefinedRelative; -class Lazy; +class LazyArchive; class SectionChunk; class Symbol; // SymbolTable is a bucket of all known symbols, including defined, // undefined, or lazy symbols (the last one is symbols in archive // files whose archive members are not yet loaded). // // We put all symbols of all files to a SymbolTable, and the // SymbolTable selects the "best" symbols if there are name // conflicts. For example, obviously, a defined symbol is better than // an undefined symbol. Or, if there's a conflict between a lazy and a // undefined, it'll read an archive member to read a real definition // to replace the lazy symbol. The logic is implemented in the // add*() functions, which are called by input files as they are parsed. // There is one add* function per symbol type. class SymbolTable { public: void addFile(InputFile *file); // Emit errors for symbols that cannot be resolved. void reportUnresolvable(); // Try to resolve any undefined symbols and update the symbol table // accordingly, then print an error message for any remaining undefined // symbols and warn about imported local symbols. void resolveRemainingUndefines(); void loadMinGWAutomaticImports(); bool handleMinGWAutomaticImport(Symbol *sym, StringRef name); // Returns a list of chunks of selected symbols. std::vector getChunks(); // Returns a symbol for a given name. Returns a nullptr if not found. Symbol *find(StringRef name); Symbol *findUnderscore(StringRef name); // Occasionally we have to resolve an undefined symbol to its // mangled symbol. This function tries to find a mangled name // for U from the symbol table, and if found, set the symbol as // a weak alias for U. Symbol *findMangle(StringRef name); // Build a set of COFF objects representing the combined contents of // BitcodeFiles and add them to the symbol table. Called after all files are // added and before the writer writes results to a file. void addCombinedLTOObjects(); std::vector compileBitcodeFiles(); // Creates an Undefined symbol for a given name. Symbol *addUndefined(StringRef name); Symbol *addSynthetic(StringRef n, Chunk *c); Symbol *addAbsolute(StringRef n, uint64_t va); Symbol *addUndefined(StringRef name, InputFile *f, bool isWeakAlias); - void addLazy(ArchiveFile *f, const Archive::Symbol &sym); + void addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym); + void addLazyObject(LazyObjFile *f, StringRef n); Symbol *addAbsolute(StringRef n, COFFSymbolRef s); Symbol *addRegular(InputFile *f, StringRef n, const llvm::object::coff_symbol_generic *s = nullptr, SectionChunk *c = nullptr); std::pair addComdat(InputFile *f, StringRef n, const llvm::object::coff_symbol_generic *s = nullptr); Symbol *addCommon(InputFile *f, StringRef n, uint64_t size, const llvm::object::coff_symbol_generic *s = nullptr, CommonChunk *c = nullptr); Symbol *addImportData(StringRef n, ImportFile *f); Symbol *addImportThunk(StringRef name, DefinedImportData *s, uint16_t machine); void addLibcall(StringRef name); void reportDuplicate(Symbol *existing, InputFile *newFile); // A list of chunks which to be added to .rdata. std::vector localImportChunks; // Iterates symbols in non-determinstic hash table order. template void forEachSymbol(T callback) { for (auto &pair : symMap) callback(pair.second); } private: /// Given a name without "__imp_" prefix, returns a defined symbol /// with the "__imp_" prefix, if it exists. Defined *impSymbol(StringRef name); /// Inserts symbol if not already present. std::pair insert(StringRef name); /// Same as insert(Name), but also sets isUsedInRegularObj. std::pair insert(StringRef name, InputFile *f); std::vector getSymsWithPrefix(StringRef prefix); llvm::DenseMap symMap; std::unique_ptr lto; }; extern SymbolTable *symtab; std::vector getSymbolLocations(ObjFile *file, uint32_t symIndex); } // namespace coff } // namespace lld #endif Index: lld/trunk/COFF/InputFiles.h =================================================================== --- lld/trunk/COFF/InputFiles.h (revision 370815) +++ lld/trunk/COFF/InputFiles.h (revision 370816) @@ -1,323 +1,356 @@ //===- InputFiles.h ---------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_INPUT_FILES_H #define LLD_COFF_INPUT_FILES_H #include "Config.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Support/StringSaver.h" #include #include #include namespace llvm { namespace pdb { class DbiModuleDescriptorBuilder; } } namespace lld { namespace coff { std::vector getArchiveMembers(llvm::object::Archive *file); using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; using llvm::COFF::MachineTypes; using llvm::object::Archive; using llvm::object::COFFObjectFile; using llvm::object::COFFSymbolRef; using llvm::object::coff_import_header; using llvm::object::coff_section; class Chunk; class Defined; class DefinedImportData; class DefinedImportThunk; class DefinedRegular; class SectionChunk; class Symbol; class Undefined; class TpiSource; // The root class of input files. class InputFile { public: - enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind }; + enum Kind { + ArchiveKind, + ObjectKind, + LazyObjectKind, + ImportKind, + BitcodeKind + }; Kind kind() const { return fileKind; } virtual ~InputFile() {} // Returns the filename. StringRef getName() const { return mb.getBufferIdentifier(); } // Reads a file (the constructor doesn't do that). virtual void parse() = 0; // Returns the CPU type this file was compiled to. virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } MemoryBufferRef mb; // An archive file name if this file is created from an archive. StringRef parentName; // Returns .drectve section contents if exist. StringRef getDirectives() { return directives; } protected: InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {} StringRef directives; private: const Kind fileKind; }; // .lib or .a file. class ArchiveFile : public InputFile { public: explicit ArchiveFile(MemoryBufferRef m); static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } void parse() override; // Enqueues an archive member load for the given symbol. If we've already // enqueued a load for the same archive member, this function does nothing, // which ensures that we don't load the same member more than once. void addMember(const Archive::Symbol &sym); private: std::unique_ptr file; llvm::DenseSet seen; }; +// .obj or .o file between -start-lib and -end-lib. +class LazyObjFile : public InputFile { +public: + explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {} + static bool classof(const InputFile *f) { + return f->kind() == LazyObjectKind; + } + // Makes this object file part of the link. + void fetch(); + // Adds the symbols in this file to the symbol table as LazyObject symbols. + void parse() override; + +private: + std::vector symbols; +}; + // .obj or .o file. This may be a member of an archive file. class ObjFile : public InputFile { public: explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {} + explicit ObjFile(MemoryBufferRef m, std::vector &&symbols) + : InputFile(ObjectKind, m), symbols(std::move(symbols)) {} static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } void parse() override; MachineTypes getMachineType() override; ArrayRef getChunks() { return chunks; } ArrayRef getDebugChunks() { return debugChunks; } ArrayRef getSXDataChunks() { return sXDataChunks; } ArrayRef getGuardFidChunks() { return guardFidChunks; } ArrayRef getGuardLJmpChunks() { return guardLJmpChunks; } ArrayRef getSymbols() { return symbols; } ArrayRef getDebugSection(StringRef secName); // Returns a Symbol object for the symbolIndex'th symbol in the // underlying object file. Symbol *getSymbol(uint32_t symbolIndex) { return symbols[symbolIndex]; } // Returns the underlying COFF file. COFFObjectFile *getCOFFObj() { return coffObj.get(); } // Add a symbol for a range extension thunk. Return the new symbol table // index. This index can be used to modify a relocation. uint32_t addRangeThunkSymbol(Symbol *thunk) { symbols.push_back(thunk); return symbols.size() - 1; } void includeResourceChunks(); bool isResourceObjFile() const { return !resourceChunks.empty(); } static std::vector instances; // Flags in the absolute @feat.00 symbol if it is present. These usually // indicate if an object was compiled with certain security features enabled // like stack guard, safeseh, /guard:cf, or other things. uint32_t feat00Flags = 0; // True if this object file is compatible with SEH. COFF-specific and // x86-only. COFF spec 5.10.1. The .sxdata section. bool hasSafeSEH() { return feat00Flags & 0x1; } // True if this file was compiled with /guard:cf. bool hasGuardCF() { return feat00Flags & 0x800; } // Pointer to the PDB module descriptor builder. Various debug info records // will reference object files by "module index", which is here. Things like // source files and section contributions are also recorded here. Will be null // if we are not producing a PDB. llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr; const coff_section *addrsigSec = nullptr; // When using Microsoft precompiled headers, this is the PCH's key. // The same key is used by both the precompiled object, and objects using the // precompiled object. Any difference indicates out-of-date objects. llvm::Optional pchSignature; // Whether this file was compiled with /hotpatch. bool hotPatchable = false; // Whether the object was already merged into the final PDB. bool mergedIntoPDB = false; // If the OBJ has a .debug$T stream, this tells how it will be handled. TpiSource *debugTypesObj = nullptr; // The .debug$T stream if there's one. llvm::Optional debugTypes; private: const coff_section* getSection(uint32_t i); const coff_section *getSection(COFFSymbolRef sym) { return getSection(sym.getSectionNumber()); } void initializeChunks(); void initializeSymbols(); void initializeFlags(); void initializeDependencies(); SectionChunk * readSection(uint32_t sectionNumber, const llvm::object::coff_aux_section_definition *def, StringRef leaderName); void readAssociativeDefinition( COFFSymbolRef coffSym, const llvm::object::coff_aux_section_definition *def); void readAssociativeDefinition( COFFSymbolRef coffSym, const llvm::object::coff_aux_section_definition *def, uint32_t parentSection); void recordPrevailingSymbolForMingw( COFFSymbolRef coffSym, llvm::DenseMap &prevailingSectionMap); void maybeAssociateSEHForMingw( COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def, const llvm::DenseMap &prevailingSectionMap); // Given a new symbol Sym with comdat selection Selection, if the new // symbol is not (yet) Prevailing and the existing comdat leader set to // Leader, emits a diagnostic if the new symbol and its selection doesn't // match the existing symbol and its selection. If either old or new // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace // the existing leader. In that case, Prevailing is set to true. void handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection, bool &prevailing, DefinedRegular *leader); llvm::Optional createDefined(COFFSymbolRef sym, std::vector &comdatDefs, bool &prevailingComdat); Symbol *createRegular(COFFSymbolRef sym); Symbol *createUndefined(COFFSymbolRef sym); std::unique_ptr coffObj; // List of all chunks defined by this file. This includes both section // chunks and non-section chunks for common symbols. std::vector chunks; std::vector resourceChunks; // CodeView debug info sections. std::vector debugChunks; // Chunks containing symbol table indices of exception handlers. Only used for // 32-bit x86. std::vector sXDataChunks; // Chunks containing symbol table indices of address taken symbols and longjmp // targets. These are not linked into the final binary when /guard:cf is set. std::vector guardFidChunks; std::vector guardLJmpChunks; // This vector contains the same chunks as Chunks, but they are // indexed such that you can get a SectionChunk by section index. // Nonexistent section indices are filled with null pointers. // (Because section number is 1-based, the first slot is always a // null pointer.) std::vector sparseChunks; // This vector contains a list of all symbols defined or referenced by this // file. They are indexed such that you can get a Symbol by symbol // index. Nonexistent indices (which are occupied by auxiliary // symbols in the real symbol table) are filled with null pointers. std::vector symbols; }; // This type represents import library members that contain DLL names // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 // for details about the format. class ImportFile : public InputFile { public: explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {} static bool classof(const InputFile *f) { return f->kind() == ImportKind; } static std::vector instances; Symbol *impSym = nullptr; Symbol *thunkSym = nullptr; std::string dllName; private: void parse() override; public: StringRef externalName; const coff_import_header *hdr; Chunk *location = nullptr; // We want to eliminate dllimported symbols if no one actually refers them. // These "Live" bits are used to keep track of which import library members // are actually in use. // // If the Live bit is turned off by MarkLive, Writer will ignore dllimported // symbols provided by this import library member. We also track whether the // imported symbol is used separately from whether the thunk is used in order // to avoid creating unnecessary thunks. bool live = !config->doGC; bool thunkLive = !config->doGC; }; // Used for LTO. class BitcodeFile : public InputFile { public: BitcodeFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive); + uint64_t offsetInArchive) + : BitcodeFile(mb, archiveName, offsetInArchive, {}) {} + explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName, + uint64_t offsetInArchive, + std::vector &&symbols); static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } ArrayRef getSymbols() { return symbols; } MachineTypes getMachineType() override; static std::vector instances; std::unique_ptr obj; private: void parse() override; std::vector symbols; }; +inline bool isBitcode(MemoryBufferRef mb) { + return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; +} + std::string replaceThinLTOSuffix(StringRef path); } // namespace coff std::string toString(const coff::InputFile *file); } // namespace lld #endif Index: lld/trunk/COFF/Symbols.cpp =================================================================== --- lld/trunk/COFF/Symbols.cpp (revision 370815) +++ lld/trunk/COFF/Symbols.cpp (revision 370816) @@ -1,142 +1,144 @@ //===- Symbols.cpp --------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Symbols.h" #include "InputFiles.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::object; using namespace lld::coff; namespace lld { static_assert(sizeof(SymbolUnion) <= 48, "symbols should be optimized for memory usage"); // Returns a symbol name for an error message. static std::string demangle(StringRef symName) { if (config->demangle) { if (Optional s = demangleMSVC(symName)) return *s; if (config->mingw) { StringRef demangleInput = symName; std::string prefix; if (demangleInput.consume_front("__imp_")) prefix = "__declspec(dllimport) "; if (config->machine == I386) demangleInput.consume_front("_"); if (Optional s = demangleItanium(demangleInput)) return prefix + *s; } } return symName; } std::string toString(coff::Symbol &b) { return demangle(b.getName()); } std::string toCOFFString(const Archive::Symbol &b) { return demangle(b.getName()); } namespace coff { StringRef Symbol::getName() { // COFF symbol names are read lazily for a performance reason. // Non-external symbol names are never used by the linker except for logging // or debugging. Their internal references are resolved not by name but by // symbol index. And because they are not external, no one can refer them by // name. Object files contain lots of non-external symbols, and creating // StringRefs for them (which involves lots of strlen() on the string table) // is a waste of time. if (nameData == nullptr) { auto *d = cast(this); StringRef nameStr; cast(d->file)->getCOFFObj()->getSymbolName(d->sym, nameStr); nameData = nameStr.data(); nameSize = nameStr.size(); assert(nameSize == nameStr.size() && "name length truncated"); } return StringRef(nameData, nameSize); } InputFile *Symbol::getFile() { if (auto *sym = dyn_cast(this)) return sym->file; - if (auto *sym = dyn_cast(this)) + if (auto *sym = dyn_cast(this)) + return sym->file; + if (auto *sym = dyn_cast(this)) return sym->file; return nullptr; } bool Symbol::isLive() const { if (auto *r = dyn_cast(this)) return r->getChunk()->live; if (auto *imp = dyn_cast(this)) return imp->file->live; if (auto *imp = dyn_cast(this)) return imp->wrappedSym->file->thunkLive; // Assume any other kind of symbol is live. return true; } // MinGW specific. void Symbol::replaceKeepingName(Symbol *other, size_t size) { StringRef origName = getName(); memcpy(this, other, size); nameData = origName.data(); nameSize = origName.size(); } COFFSymbolRef DefinedCOFF::getCOFFSymbol() { size_t symSize = cast(file)->getCOFFObj()->getSymbolTableEntrySize(); if (symSize == sizeof(coff_symbol16)) return COFFSymbolRef(reinterpret_cast(sym)); assert(symSize == sizeof(coff_symbol32)); return COFFSymbolRef(reinterpret_cast(sym)); } uint16_t DefinedAbsolute::numOutputSections; static Chunk *makeImportThunk(DefinedImportData *s, uint16_t machine) { if (machine == AMD64) return make(s); if (machine == I386) return make(s); if (machine == ARM64) return make(s); assert(machine == ARMNT); return make(s); } DefinedImportThunk::DefinedImportThunk(StringRef name, DefinedImportData *s, uint16_t machine) : Defined(DefinedImportThunkKind, name), wrappedSym(s), data(makeImportThunk(s, machine)) {} Defined *Undefined::getWeakAlias() { // A weak alias may be a weak alias to another symbol, so check recursively. for (Symbol *a = weakAlias; a; a = cast(a)->weakAlias) if (auto *d = dyn_cast(a)) return d; return nullptr; } -MemoryBufferRef Lazy::getMemberBuffer() { +MemoryBufferRef LazyArchive::getMemberBuffer() { Archive::Child c = CHECK(sym.getMember(), "could not get the member for symbol " + toCOFFString(sym)); return CHECK(c.getMemoryBufferRef(), "could not get the buffer for the member defining symbol " + toCOFFString(sym)); } } // namespace coff } // namespace lld Index: lld/trunk/COFF/Driver.h =================================================================== --- lld/trunk/COFF/Driver.h (revision 370815) +++ lld/trunk/COFF/Driver.h (revision 370816) @@ -1,207 +1,208 @@ //===- Driver.h -------------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_DRIVER_H #define LLD_COFF_DRIVER_H #include "Config.h" #include "SymbolTable.h" #include "lld/Common/LLVM.h" #include "lld/Common/Reproduce.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/TarWriter.h" #include #include #include namespace lld { namespace coff { class LinkerDriver; extern LinkerDriver *driver; using llvm::COFF::MachineTypes; using llvm::COFF::WindowsSubsystem; using llvm::Optional; class COFFOptTable : public llvm::opt::OptTable { public: COFFOptTable(); }; class ArgParser { public: // Concatenate LINK environment variable and given arguments and parse them. llvm::opt::InputArgList parseLINK(std::vector args); // Tokenizes a given string and then parses as command line options. llvm::opt::InputArgList parse(StringRef s) { return parse(tokenize(s)); } // Tokenizes a given string and then parses as command line options in // .drectve section. /EXPORT options are returned in second element // to be processed in fastpath. std::pair> parseDirectives(StringRef s); private: // Parses command line options. llvm::opt::InputArgList parse(llvm::ArrayRef args); std::vector tokenize(StringRef s); COFFOptTable table; }; class LinkerDriver { public: void link(llvm::ArrayRef args); // Used by the resolver to parse .drectve section contents. void parseDirectives(InputFile *file); // Used by ArchiveFile to enqueue members. void enqueueArchiveMember(const Archive::Child &c, const Archive::Symbol &sym, StringRef parentName); MemoryBufferRef takeBuffer(std::unique_ptr mb); - void enqueuePath(StringRef path, bool wholeArchive); + void enqueuePath(StringRef path, bool wholeArchive, bool lazy); private: std::unique_ptr tar; // for /linkrepro // Opens a file. Path has to be resolved already. MemoryBufferRef openFile(StringRef path); // Searches a file from search paths. Optional findFile(StringRef filename); Optional findLib(StringRef filename); StringRef doFindFile(StringRef filename); StringRef doFindLib(StringRef filename); StringRef doFindLibMinGW(StringRef filename); // Parses LIB environment which contains a list of search paths. void addLibSearchPaths(); // Library search path. The first element is always "" (current directory). std::vector searchPaths; // Convert resource files and potentially merge input resource object // trees into one resource tree. void convertResources(); void maybeExportMinGWSymbols(const llvm::opt::InputArgList &args); // We don't want to add the same file more than once. // Files are uniquified by their filesystem and file number. std::set visitedFiles; std::set visitedLibs; Symbol *addUndefined(StringRef sym); StringRef mangleMaybe(Symbol *s); // Windows specific -- "main" is not the only main function in Windows. // You can choose one from these four -- {w,}{WinMain,main}. // There are four different entry point functions for them, // {w,}{WinMain,main}CRTStartup, respectively. The linker needs to // choose the right one depending on which "main" function is defined. // This function looks up the symbol table and resolve corresponding // entry point name. StringRef findDefaultEntry(); WindowsSubsystem inferSubsystem(); - void addBuffer(std::unique_ptr mb, bool wholeArchive); + void addBuffer(std::unique_ptr mb, bool wholeArchive, + bool lazy); void addArchiveBuffer(MemoryBufferRef mbref, StringRef symName, StringRef parentName, uint64_t offsetInArchive); void enqueueTask(std::function task); bool run(); std::list> taskQueue; std::vector filePaths; std::vector resources; llvm::StringSet<> directivesExports; }; // Functions below this line are defined in DriverUtils.cpp. void printHelp(const char *argv0); // Parses a string in the form of "[,]". void parseNumbers(StringRef arg, uint64_t *addr, uint64_t *size = nullptr); void parseGuard(StringRef arg); // Parses a string in the form of "[.]". // Minor's default value is 0. void parseVersion(StringRef arg, uint32_t *major, uint32_t *minor); // Parses a string in the form of "[,[.]]". void parseSubsystem(StringRef arg, WindowsSubsystem *sys, uint32_t *major, uint32_t *minor); void parseAlternateName(StringRef); void parseMerge(StringRef); void parseSection(StringRef); void parseAligncomm(StringRef); // Parses a string in the form of "[:]" void parseFunctionPadMin(llvm::opt::Arg *a, llvm::COFF::MachineTypes machine); // Parses a string in the form of "EMBED[,=]|NO". void parseManifest(StringRef arg); // Parses a string in the form of "level=|uiAccess=" void parseManifestUAC(StringRef arg); // Parses a string in the form of "cd|net[,(cd|net)]*" void parseSwaprun(StringRef arg); // Create a resource file containing a manifest XML. std::unique_ptr createManifestRes(); void createSideBySideManifest(); // Used for dllexported symbols. Export parseExport(StringRef arg); void fixupExports(); void assignExportOrdinals(); // Parses a string in the form of "key=value" and check // if value matches previous values for the key. // This feature used in the directive section to reject // incompatible objects. void checkFailIfMismatch(StringRef arg, InputFile *source); // Convert Windows resource files (.res files) to a .obj file. MemoryBufferRef convertResToCOFF(ArrayRef mbs, ArrayRef objs); void runMSVCLinker(std::string rsp, ArrayRef objects); // Create enum with OPT_xxx values for each option in Options.td enum { OPT_INVALID = 0, #define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, #include "Options.inc" #undef OPTION }; } // namespace coff } // namespace lld #endif Index: lld/trunk/COFF/DebugTypes.cpp =================================================================== --- lld/trunk/COFF/DebugTypes.cpp (revision 370815) +++ lld/trunk/COFF/DebugTypes.cpp (revision 370816) @@ -1,268 +1,268 @@ //===- DebugTypes.cpp -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "DebugTypes.h" #include "Driver.h" #include "InputFiles.h" #include "lld/Common/ErrorHandler.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/PDB/GenericError.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/Support/Path.h" using namespace lld; using namespace lld::coff; using namespace llvm; using namespace llvm::codeview; namespace { // The TypeServerSource class represents a PDB type server, a file referenced by // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ // files, therefore there must be only once instance per OBJ lot. The file path // is discovered from the dependent OBJ's debug type stream. The // TypeServerSource object is then queued and loaded by the COFF Driver. The // debug type stream for such PDB files will be merged first in the final PDB, // before any dependent OBJ. class TypeServerSource : public TpiSource { public: explicit TypeServerSource(MemoryBufferRef m, llvm::pdb::NativeSession *s) : TpiSource(PDB, nullptr), session(s), mb(m) {} // Queue a PDB type server for loading in the COFF Driver static void enqueue(const ObjFile *dependentFile, const TypeServer2Record &ts); // Create an instance static Expected getInstance(MemoryBufferRef m); // Fetch the PDB instance loaded for a corresponding dependent OBJ. static Expected findFromFile(const ObjFile *dependentFile); static std::map> instances; // The interface to the PDB (if it was opened successfully) std::unique_ptr session; private: MemoryBufferRef mb; }; // This class represents the debug type stream of an OBJ file that depends on a // PDB type server (see TypeServerSource). class UseTypeServerSource : public TpiSource { public: UseTypeServerSource(const ObjFile *f, const TypeServer2Record *ts) : TpiSource(UsingPDB, f), typeServerDependency(*ts) {} // Information about the PDB type server dependency, that needs to be loaded // in before merging this OBJ. TypeServer2Record typeServerDependency; }; // This class represents the debug type stream of a Microsoft precompiled // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output // PDB, before any other OBJs that depend on this. Note that only MSVC generate // such files, clang does not. class PrecompSource : public TpiSource { public: PrecompSource(const ObjFile *f) : TpiSource(PCH, f) {} }; // This class represents the debug type stream of an OBJ file that depends on a // Microsoft precompiled headers OBJ (see PrecompSource). class UsePrecompSource : public TpiSource { public: UsePrecompSource(const ObjFile *f, const PrecompRecord *precomp) : TpiSource(UsingPCH, f), precompDependency(*precomp) {} // Information about the Precomp OBJ dependency, that needs to be loaded in // before merging this OBJ. PrecompRecord precompDependency; }; } // namespace static std::vector> GC; TpiSource::TpiSource(TpiKind k, const ObjFile *f) : kind(k), file(f) { GC.push_back(std::unique_ptr(this)); } TpiSource *lld::coff::makeTpiSource(const ObjFile *f) { return new TpiSource(TpiSource::Regular, f); } TpiSource *lld::coff::makeUseTypeServerSource(const ObjFile *f, const TypeServer2Record *ts) { TypeServerSource::enqueue(f, *ts); return new UseTypeServerSource(f, ts); } TpiSource *lld::coff::makePrecompSource(const ObjFile *f) { return new PrecompSource(f); } TpiSource *lld::coff::makeUsePrecompSource(const ObjFile *f, const PrecompRecord *precomp) { return new UsePrecompSource(f, precomp); } namespace lld { namespace coff { template <> const PrecompRecord &retrieveDependencyInfo(const TpiSource *source) { assert(source->kind == TpiSource::UsingPCH); return ((const UsePrecompSource *)source)->precompDependency; } template <> const TypeServer2Record &retrieveDependencyInfo(const TpiSource *source) { assert(source->kind == TpiSource::UsingPDB); return ((const UseTypeServerSource *)source)->typeServerDependency; } } // namespace coff } // namespace lld std::map> TypeServerSource::instances; // Make a PDB path assuming the PDB is in the same folder as the OBJ static std::string getPdbBaseName(const ObjFile *file, StringRef tSPath) { StringRef localPath = !file->parentName.empty() ? file->parentName : file->getName(); SmallString<128> path = sys::path::parent_path(localPath); // Currently, type server PDBs are only created by MSVC cl, which only runs // on Windows, so we can assume type server paths are Windows style. sys::path::append(path, sys::path::filename(tSPath, sys::path::Style::windows)); return path.str(); } // The casing of the PDB path stamped in the OBJ can differ from the actual path // on disk. With this, we ensure to always use lowercase as a key for the // PDBInputFile::Instances map, at least on Windows. static std::string normalizePdbPath(StringRef path) { #if defined(_WIN32) return path.lower(); #else // LINUX return path; #endif } // If existing, return the actual PDB path on disk. static Optional findPdbPath(StringRef pdbPath, const ObjFile *dependentFile) { // Ensure the file exists before anything else. In some cases, if the path // points to a removable device, Driver::enqueuePath() would fail with an // error (EAGAIN, "resource unavailable try again") which we want to skip // silently. if (llvm::sys::fs::exists(pdbPath)) return normalizePdbPath(pdbPath); std::string ret = getPdbBaseName(dependentFile, pdbPath); if (llvm::sys::fs::exists(ret)) return normalizePdbPath(ret); return None; } // Fetch the PDB instance that was already loaded by the COFF Driver. Expected TypeServerSource::findFromFile(const ObjFile *dependentFile) { const TypeServer2Record &ts = retrieveDependencyInfo(dependentFile->debugTypesObj); Optional p = findPdbPath(ts.Name, dependentFile); if (!p) return createFileError(ts.Name, errorCodeToError(std::error_code( ENOENT, std::generic_category()))); auto it = TypeServerSource::instances.find(*p); // The PDB file exists on disk, at this point we expect it to have been // inserted in the map by TypeServerSource::loadPDB() assert(it != TypeServerSource::instances.end()); std::pair &pdb = it->second; if (!pdb.second) return createFileError( *p, createStringError(inconvertibleErrorCode(), pdb.first.c_str())); pdb::PDBFile &pdbFile = (pdb.second)->session->getPDBFile(); pdb::InfoStream &info = cantFail(pdbFile.getPDBInfoStream()); // Just because a file with a matching name was found doesn't mean it can be // used. The GUID must match between the PDB header and the OBJ // TypeServer2 record. The 'Age' is used by MSVC incremental compilation. if (info.getGuid() != ts.getGuid()) return createFileError( ts.Name, make_error(pdb::pdb_error_code::signature_out_of_date)); return pdb.second; } // FIXME: Temporary interface until PDBLinker::maybeMergeTypeServerPDB() is // moved here. Expected lld::coff::findTypeServerSource(const ObjFile *f) { Expected ts = TypeServerSource::findFromFile(f); if (!ts) return ts.takeError(); return ts.get()->session.get(); } // Queue a PDB type server for loading in the COFF Driver void TypeServerSource::enqueue(const ObjFile *dependentFile, const TypeServer2Record &ts) { // Start by finding where the PDB is located (either the record path or next // to the OBJ file) Optional p = findPdbPath(ts.Name, dependentFile); if (!p) return; auto it = TypeServerSource::instances.emplace( *p, std::pair{}); if (!it.second) return; // another OBJ already scheduled this PDB for load - driver->enqueuePath(*p, false); + driver->enqueuePath(*p, false, false); } // Create an instance of TypeServerSource or an error string if the PDB couldn't // be loaded. The error message will be displayed later, when the referring OBJ // will be merged in. NOTE - a PDB load failure is not a link error: some // debug info will simply be missing from the final PDB - that is the default // accepted behavior. void lld::coff::loadTypeServerSource(llvm::MemoryBufferRef m) { std::string path = normalizePdbPath(m.getBufferIdentifier()); Expected ts = TypeServerSource::getInstance(m); if (!ts) TypeServerSource::instances[path] = {toString(ts.takeError()), nullptr}; else TypeServerSource::instances[path] = {{}, *ts}; } Expected TypeServerSource::getInstance(MemoryBufferRef m) { std::unique_ptr iSession; Error err = pdb::NativeSession::createFromPdb( MemoryBuffer::getMemBuffer(m, false), iSession); if (err) return std::move(err); std::unique_ptr session( static_cast(iSession.release())); pdb::PDBFile &pdbFile = session->getPDBFile(); Expected info = pdbFile.getPDBInfoStream(); // All PDB Files should have an Info stream. if (!info) return info.takeError(); return new TypeServerSource(m, session.release()); } Index: lld/trunk/COFF/Writer.cpp =================================================================== --- lld/trunk/COFF/Writer.cpp (revision 370815) +++ lld/trunk/COFF/Writer.cpp (revision 370816) @@ -1,1945 +1,1946 @@ //===- Writer.cpp ---------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Writer.h" #include "Config.h" #include "DLL.h" #include "InputFiles.h" #include "MapFile.h" #include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Threads.h" #include "lld/Common/Timer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/xxhash.h" #include #include #include #include #include using namespace llvm; using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::coff; /* To re-generate DOSProgram: $ cat > /tmp/DOSProgram.asm org 0 ; Copy cs to ds. push cs pop ds ; Point ds:dx at the $-terminated string. mov dx, str ; Int 21/AH=09h: Write string to standard output. mov ah, 0x9 int 0x21 ; Int 21/AH=4Ch: Exit with return code (in AL). mov ax, 0x4C01 int 0x21 str: db 'This program cannot be run in DOS mode.$' align 8, db 0 $ nasm -fbin /tmp/DOSProgram.asm -o /tmp/DOSProgram.bin $ xxd -i /tmp/DOSProgram.bin */ static unsigned char dosProgram[] = { 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, 0x4c, 0xcd, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x24, 0x00, 0x00 }; static_assert(sizeof(dosProgram) % 8 == 0, "DOSProgram size must be multiple of 8"); static const int dosStubSize = sizeof(dos_header) + sizeof(dosProgram); static_assert(dosStubSize % 8 == 0, "DOSStub size must be multiple of 8"); static const int numberOfDataDirectory = 16; // Global vector of all output sections. After output sections are finalized, // this can be indexed by Chunk::getOutputSection. static std::vector outputSections; OutputSection *Chunk::getOutputSection() const { return osidx == 0 ? nullptr : outputSections[osidx - 1]; } namespace { class DebugDirectoryChunk : public NonSectionChunk { public: DebugDirectoryChunk(const std::vector &r, bool writeRepro) : records(r), writeRepro(writeRepro) {} size_t getSize() const override { return (records.size() + int(writeRepro)) * sizeof(debug_directory); } void writeTo(uint8_t *b) const override { auto *d = reinterpret_cast(b); for (const Chunk *record : records) { OutputSection *os = record->getOutputSection(); uint64_t offs = os->getFileOff() + (record->getRVA() - os->getRVA()); fillEntry(d, COFF::IMAGE_DEBUG_TYPE_CODEVIEW, record->getSize(), record->getRVA(), offs); ++d; } if (writeRepro) { // FIXME: The COFF spec allows either a 0-sized entry to just say // "the timestamp field is really a hash", or a 4-byte size field // followed by that many bytes containing a longer hash (with the // lowest 4 bytes usually being the timestamp in little-endian order). // Consider storing the full 8 bytes computed by xxHash64 here. fillEntry(d, COFF::IMAGE_DEBUG_TYPE_REPRO, 0, 0, 0); } } void setTimeDateStamp(uint32_t timeDateStamp) { for (support::ulittle32_t *tds : timeDateStamps) *tds = timeDateStamp; } private: void fillEntry(debug_directory *d, COFF::DebugType debugType, size_t size, uint64_t rva, uint64_t offs) const { d->Characteristics = 0; d->TimeDateStamp = 0; d->MajorVersion = 0; d->MinorVersion = 0; d->Type = debugType; d->SizeOfData = size; d->AddressOfRawData = rva; d->PointerToRawData = offs; timeDateStamps.push_back(&d->TimeDateStamp); } mutable std::vector timeDateStamps; const std::vector &records; bool writeRepro; }; class CVDebugRecordChunk : public NonSectionChunk { public: size_t getSize() const override { return sizeof(codeview::DebugInfo) + config->pdbAltPath.size() + 1; } void writeTo(uint8_t *b) const override { // Save off the DebugInfo entry to backfill the file signature (build id) // in Writer::writeBuildId buildId = reinterpret_cast(b); // variable sized field (PDB Path) char *p = reinterpret_cast(b + sizeof(*buildId)); if (!config->pdbAltPath.empty()) memcpy(p, config->pdbAltPath.data(), config->pdbAltPath.size()); p[config->pdbAltPath.size()] = '\0'; } mutable codeview::DebugInfo *buildId = nullptr; }; // PartialSection represents a group of chunks that contribute to an // OutputSection. Collating a collection of PartialSections of same name and // characteristics constitutes the OutputSection. class PartialSectionKey { public: StringRef name; unsigned characteristics; bool operator<(const PartialSectionKey &other) const { int c = name.compare(other.name); if (c == 1) return false; if (c == 0) return characteristics < other.characteristics; return true; } }; // The writer writes a SymbolTable result to a file. class Writer { public: Writer() : buffer(errorHandler().outputBuffer) {} void run(); private: void createSections(); void createMiscChunks(); void createImportTables(); void appendImportThunks(); void locateImportTables(); void createExportTable(); void mergeSections(); void removeUnusedSections(); void assignAddresses(); void finalizeAddresses(); void removeEmptySections(); void assignOutputSectionIndices(); void createSymbolAndStringTable(); void openFile(StringRef outputPath); template void writeHeader(); void createSEHTable(); void createRuntimePseudoRelocs(); void insertCtorDtorSymbols(); void createGuardCFTables(); void markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols); void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym); void setSectionPermissions(); void writeSections(); void writeBuildId(); void sortExceptionTable(); void sortCRTSectionChunks(std::vector &chunks); void addSyntheticIdata(); void fixPartialSectionChars(StringRef name, uint32_t chars); bool fixGnuImportChunks(); PartialSection *createPartialSection(StringRef name, uint32_t outChars); PartialSection *findPartialSection(StringRef name, uint32_t outChars); llvm::Optional createSymbol(Defined *d); size_t addEntryToStringTable(StringRef str); OutputSection *findSection(StringRef name); void addBaserels(); void addBaserelBlocks(std::vector &v); uint32_t getSizeOfInitializedData(); std::unique_ptr &buffer; std::map partialSections; std::vector strtab; std::vector outputSymtab; IdataContents idata; Chunk *importTableStart = nullptr; uint64_t importTableSize = 0; Chunk *edataStart = nullptr; Chunk *edataEnd = nullptr; Chunk *iatStart = nullptr; uint64_t iatSize = 0; DelayLoadContents delayIdata; EdataContents edata; bool setNoSEHCharacteristic = false; DebugDirectoryChunk *debugDirectory = nullptr; std::vector debugRecords; CVDebugRecordChunk *buildId = nullptr; ArrayRef sectionTable; uint64_t fileSize; uint32_t pointerToSymbolTable = 0; uint64_t sizeOfImage; uint64_t sizeOfHeaders; OutputSection *textSec; OutputSection *rdataSec; OutputSection *buildidSec; OutputSection *dataSec; OutputSection *pdataSec; OutputSection *idataSec; OutputSection *edataSec; OutputSection *didatSec; OutputSection *rsrcSec; OutputSection *relocSec; OutputSection *ctorsSec; OutputSection *dtorsSec; // The first and last .pdata sections in the output file. // // We need to keep track of the location of .pdata in whichever section it // gets merged into so that we can sort its contents and emit a correct data // directory entry for the exception table. This is also the case for some // other sections (such as .edata) but because the contents of those sections // are entirely linker-generated we can keep track of their locations using // the chunks that the linker creates. All .pdata chunks come from input // files, so we need to keep track of them separately. Chunk *firstPdata = nullptr; Chunk *lastPdata; }; } // anonymous namespace namespace lld { namespace coff { static Timer codeLayoutTimer("Code Layout", Timer::root()); static Timer diskCommitTimer("Commit Output File", Timer::root()); void writeResult() { Writer().run(); } void OutputSection::addChunk(Chunk *c) { chunks.push_back(c); } void OutputSection::insertChunkAtStart(Chunk *c) { chunks.insert(chunks.begin(), c); } void OutputSection::setPermissions(uint32_t c) { header.Characteristics &= ~permMask; header.Characteristics |= c; } void OutputSection::merge(OutputSection *other) { chunks.insert(chunks.end(), other->chunks.begin(), other->chunks.end()); other->chunks.clear(); contribSections.insert(contribSections.end(), other->contribSections.begin(), other->contribSections.end()); other->contribSections.clear(); } // Write the section header to a given buffer. void OutputSection::writeHeaderTo(uint8_t *buf) { auto *hdr = reinterpret_cast(buf); *hdr = header; if (stringTableOff) { // If name is too long, write offset into the string table as a name. sprintf(hdr->Name, "/%d", stringTableOff); } else { assert(!config->debug || name.size() <= COFF::NameSize || (hdr->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0); strncpy(hdr->Name, name.data(), std::min(name.size(), (size_t)COFF::NameSize)); } } void OutputSection::addContributingPartialSection(PartialSection *sec) { contribSections.push_back(sec); } } // namespace coff } // namespace lld // Check whether the target address S is in range from a relocation // of type relType at address P. static bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) { if (config->machine == ARMNT) { int64_t diff = AbsoluteDifference(s, p + 4) + margin; switch (relType) { case IMAGE_REL_ARM_BRANCH20T: return isInt<21>(diff); case IMAGE_REL_ARM_BRANCH24T: case IMAGE_REL_ARM_BLX23T: return isInt<25>(diff); default: return true; } } else if (config->machine == ARM64) { int64_t diff = AbsoluteDifference(s, p) + margin; switch (relType) { case IMAGE_REL_ARM64_BRANCH26: return isInt<28>(diff); case IMAGE_REL_ARM64_BRANCH19: return isInt<21>(diff); case IMAGE_REL_ARM64_BRANCH14: return isInt<16>(diff); default: return true; } } else { llvm_unreachable("Unexpected architecture"); } } // Return the last thunk for the given target if it is in range, // or create a new one. static std::pair getThunk(DenseMap &lastThunks, Defined *target, uint64_t p, uint16_t type, int margin) { Defined *&lastThunk = lastThunks[target->getRVA()]; if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin)) return {lastThunk, false}; Chunk *c; switch (config->machine) { case ARMNT: c = make(target); break; case ARM64: c = make(target); break; default: llvm_unreachable("Unexpected architecture"); } Defined *d = make("", c); lastThunk = d; return {d, true}; } // This checks all relocations, and for any relocation which isn't in range // it adds a thunk after the section chunk that contains the relocation. // If the latest thunk for the specific target is in range, that is used // instead of creating a new thunk. All range checks are done with the // specified margin, to make sure that relocations that originally are in // range, but only barely, also get thunks - in case other added thunks makes // the target go out of range. // // After adding thunks, we verify that all relocations are in range (with // no extra margin requirements). If this failed, we restart (throwing away // the previously created thunks) and retry with a wider margin. static bool createThunks(OutputSection *os, int margin) { bool addressesChanged = false; DenseMap lastThunks; DenseMap, uint32_t> thunkSymtabIndices; size_t thunksSize = 0; // Recheck Chunks.size() each iteration, since we can insert more // elements into it. for (size_t i = 0; i != os->chunks.size(); ++i) { SectionChunk *sc = dyn_cast_or_null(os->chunks[i]); if (!sc) continue; size_t thunkInsertionSpot = i + 1; // Try to get a good enough estimate of where new thunks will be placed. // Offset this by the size of the new thunks added so far, to make the // estimate slightly better. size_t thunkInsertionRVA = sc->getRVA() + sc->getSize() + thunksSize; ObjFile *file = sc->file; std::vector> relocReplacements; ArrayRef originalRelocs = file->getCOFFObj()->getRelocations(sc->header); for (size_t j = 0, e = originalRelocs.size(); j < e; ++j) { const coff_relocation &rel = originalRelocs[j]; Symbol *relocTarget = file->getSymbol(rel.SymbolTableIndex); // The estimate of the source address P should be pretty accurate, // but we don't know whether the target Symbol address should be // offset by thunksSize or not (or by some of thunksSize but not all of // it), giving us some uncertainty once we have added one thunk. uint64_t p = sc->getRVA() + rel.VirtualAddress + thunksSize; Defined *sym = dyn_cast_or_null(relocTarget); if (!sym) continue; uint64_t s = sym->getRVA(); if (isInRange(rel.Type, s, p, margin)) continue; // If the target isn't in range, hook it up to an existing or new // thunk. Defined *thunk; bool wasNew; std::tie(thunk, wasNew) = getThunk(lastThunks, sym, p, rel.Type, margin); if (wasNew) { Chunk *thunkChunk = thunk->getChunk(); thunkChunk->setRVA( thunkInsertionRVA); // Estimate of where it will be located. os->chunks.insert(os->chunks.begin() + thunkInsertionSpot, thunkChunk); thunkInsertionSpot++; thunksSize += thunkChunk->getSize(); thunkInsertionRVA += thunkChunk->getSize(); addressesChanged = true; } // To redirect the relocation, add a symbol to the parent object file's // symbol table, and replace the relocation symbol table index with the // new index. auto insertion = thunkSymtabIndices.insert({{file, thunk}, ~0U}); uint32_t &thunkSymbolIndex = insertion.first->second; if (insertion.second) thunkSymbolIndex = file->addRangeThunkSymbol(thunk); relocReplacements.push_back({j, thunkSymbolIndex}); } // Get a writable copy of this section's relocations so they can be // modified. If the relocations point into the object file, allocate new // memory. Otherwise, this must be previously allocated memory that can be // modified in place. ArrayRef curRelocs = sc->getRelocs(); MutableArrayRef newRelocs; if (originalRelocs.data() == curRelocs.data()) { newRelocs = makeMutableArrayRef( bAlloc.Allocate(originalRelocs.size()), originalRelocs.size()); } else { newRelocs = makeMutableArrayRef( const_cast(curRelocs.data()), curRelocs.size()); } // Copy each relocation, but replace the symbol table indices which need // thunks. auto nextReplacement = relocReplacements.begin(); auto endReplacement = relocReplacements.end(); for (size_t i = 0, e = originalRelocs.size(); i != e; ++i) { newRelocs[i] = originalRelocs[i]; if (nextReplacement != endReplacement && nextReplacement->first == i) { newRelocs[i].SymbolTableIndex = nextReplacement->second; ++nextReplacement; } } sc->setRelocs(newRelocs); } return addressesChanged; } // Verify that all relocations are in range, with no extra margin requirements. static bool verifyRanges(const std::vector chunks) { for (Chunk *c : chunks) { SectionChunk *sc = dyn_cast_or_null(c); if (!sc) continue; ArrayRef relocs = sc->getRelocs(); for (size_t j = 0, e = relocs.size(); j < e; ++j) { const coff_relocation &rel = relocs[j]; Symbol *relocTarget = sc->file->getSymbol(rel.SymbolTableIndex); Defined *sym = dyn_cast_or_null(relocTarget); if (!sym) continue; uint64_t p = sc->getRVA() + rel.VirtualAddress; uint64_t s = sym->getRVA(); if (!isInRange(rel.Type, s, p, 0)) return false; } } return true; } // Assign addresses and add thunks if necessary. void Writer::finalizeAddresses() { assignAddresses(); if (config->machine != ARMNT && config->machine != ARM64) return; size_t origNumChunks = 0; for (OutputSection *sec : outputSections) { sec->origChunks = sec->chunks; origNumChunks += sec->chunks.size(); } int pass = 0; int margin = 1024 * 100; while (true) { // First check whether we need thunks at all, or if the previous pass of // adding them turned out ok. bool rangesOk = true; size_t numChunks = 0; for (OutputSection *sec : outputSections) { if (!verifyRanges(sec->chunks)) { rangesOk = false; break; } numChunks += sec->chunks.size(); } if (rangesOk) { if (pass > 0) log("Added " + Twine(numChunks - origNumChunks) + " thunks with " + "margin " + Twine(margin) + " in " + Twine(pass) + " passes"); return; } if (pass >= 10) fatal("adding thunks hasn't converged after " + Twine(pass) + " passes"); if (pass > 0) { // If the previous pass didn't work out, reset everything back to the // original conditions before retrying with a wider margin. This should // ideally never happen under real circumstances. for (OutputSection *sec : outputSections) sec->chunks = sec->origChunks; margin *= 2; } // Try adding thunks everywhere where it is needed, with a margin // to avoid things going out of range due to the added thunks. bool addressesChanged = false; for (OutputSection *sec : outputSections) addressesChanged |= createThunks(sec, margin); // If the verification above thought we needed thunks, we should have // added some. assert(addressesChanged); // Recalculate the layout for the whole image (and verify the ranges at // the start of the next round). assignAddresses(); pass++; } } // The main function of the writer. void Writer::run() { ScopedTimer t1(codeLayoutTimer); createImportTables(); createSections(); createMiscChunks(); appendImportThunks(); createExportTable(); mergeSections(); removeUnusedSections(); finalizeAddresses(); removeEmptySections(); assignOutputSectionIndices(); setSectionPermissions(); createSymbolAndStringTable(); if (fileSize > UINT32_MAX) fatal("image size (" + Twine(fileSize) + ") " + "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")"); openFile(config->outputFile); if (config->is64()) { writeHeader(); } else { writeHeader(); } writeSections(); sortExceptionTable(); t1.stop(); if (!config->pdbPath.empty() && config->debug) { assert(buildId); createPDB(symtab, outputSections, sectionTable, buildId->buildId); } writeBuildId(); writeMapFile(outputSections); if (errorCount()) return; ScopedTimer t2(diskCommitTimer); if (auto e = buffer->commit()) fatal("failed to write the output file: " + toString(std::move(e))); } static StringRef getOutputSectionName(StringRef name) { StringRef s = name.split('$').first; // Treat a later period as a separator for MinGW, for sections like // ".ctors.01234". return s.substr(0, s.find('.', 1)); } // For /order. static void sortBySectionOrder(std::vector &chunks) { auto getPriority = [](const Chunk *c) { if (auto *sec = dyn_cast(c)) if (sec->sym) return config->order.lookup(sec->sym->getName()); return 0; }; llvm::stable_sort(chunks, [=](const Chunk *a, const Chunk *b) { return getPriority(a) < getPriority(b); }); } // Change the characteristics of existing PartialSections that belong to the // section Name to Chars. void Writer::fixPartialSectionChars(StringRef name, uint32_t chars) { for (auto it : partialSections) { PartialSection *pSec = it.second; StringRef curName = pSec->name; if (!curName.consume_front(name) || (!curName.empty() && !curName.startswith("$"))) continue; if (pSec->characteristics == chars) continue; PartialSection *destSec = createPartialSection(pSec->name, chars); destSec->chunks.insert(destSec->chunks.end(), pSec->chunks.begin(), pSec->chunks.end()); pSec->chunks.clear(); } } // Sort concrete section chunks from GNU import libraries. // // GNU binutils doesn't use short import files, but instead produces import // libraries that consist of object files, with section chunks for the .idata$* // sections. These are linked just as regular static libraries. Each import // library consists of one header object, one object file for every imported // symbol, and one trailer object. In order for the .idata tables/lists to // be formed correctly, the section chunks within each .idata$* section need // to be grouped by library, and sorted alphabetically within each library // (which makes sure the header comes first and the trailer last). bool Writer::fixGnuImportChunks() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; // Make sure all .idata$* section chunks are mapped as RDATA in order to // be sorted into the same sections as our own synthesized .idata chunks. fixPartialSectionChars(".idata", rdata); bool hasIdata = false; // Sort all .idata$* chunks, grouping chunks from the same library, // with alphabetical ordering of the object fils within a library. for (auto it : partialSections) { PartialSection *pSec = it.second; if (!pSec->name.startswith(".idata")) continue; if (!pSec->chunks.empty()) hasIdata = true; llvm::stable_sort(pSec->chunks, [&](Chunk *s, Chunk *t) { SectionChunk *sc1 = dyn_cast_or_null(s); SectionChunk *sc2 = dyn_cast_or_null(t); if (!sc1 || !sc2) { // if SC1, order them ascending. If SC2 or both null, // S is not less than T. return sc1 != nullptr; } // Make a string with "libraryname/objectfile" for sorting, achieving // both grouping by library and sorting of objects within a library, // at once. std::string key1 = (sc1->file->parentName + "/" + sc1->file->getName()).str(); std::string key2 = (sc2->file->parentName + "/" + sc2->file->getName()).str(); return key1 < key2; }); } return hasIdata; } // Add generated idata chunks, for imported symbols and DLLs, and a // terminator in .idata$2. void Writer::addSyntheticIdata() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; idata.create(); // Add the .idata content in the right section groups, to allow // chunks from other linked in object files to be grouped together. // See Microsoft PE/COFF spec 5.4 for details. auto add = [&](StringRef n, std::vector &v) { PartialSection *pSec = createPartialSection(n, rdata); pSec->chunks.insert(pSec->chunks.end(), v.begin(), v.end()); }; // The loader assumes a specific order of data. // Add each type in the correct order. add(".idata$2", idata.dirs); add(".idata$4", idata.lookups); add(".idata$5", idata.addresses); add(".idata$6", idata.hints); add(".idata$7", idata.dllNames); } // Locate the first Chunk and size of the import directory list and the // IAT. void Writer::locateImportTables() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; if (PartialSection *importDirs = findPartialSection(".idata$2", rdata)) { if (!importDirs->chunks.empty()) importTableStart = importDirs->chunks.front(); for (Chunk *c : importDirs->chunks) importTableSize += c->getSize(); } if (PartialSection *importAddresses = findPartialSection(".idata$5", rdata)) { if (!importAddresses->chunks.empty()) iatStart = importAddresses->chunks.front(); for (Chunk *c : importAddresses->chunks) iatSize += c->getSize(); } } // Return whether a SectionChunk's suffix (the dollar and any trailing // suffix) should be removed and sorted into the main suffixless // PartialSection. static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name) { // On MinGW, comdat groups are formed by putting the comdat group name // after the '$' in the section name. For .eh_frame$, that must // still be sorted before the .eh_frame trailer from crtend.o, thus just // strip the section name trailer. For other sections, such as // .tls$$ (where non-comdat .tls symbols are otherwise stored in // ".tls$"), they must be strictly sorted after .tls. And for the // hypothetical case of comdat .CRT$XCU, we definitely need to keep the // suffix for sorting. Thus, to play it safe, only strip the suffix for // the standard sections. if (!config->mingw) return false; if (!sc || !sc->isCOMDAT()) return false; return name.startswith(".text$") || name.startswith(".data$") || name.startswith(".rdata$") || name.startswith(".pdata$") || name.startswith(".xdata$") || name.startswith(".eh_frame$"); } // Create output section objects and add them to OutputSections. void Writer::createSections() { // First, create the builtin sections. const uint32_t data = IMAGE_SCN_CNT_INITIALIZED_DATA; const uint32_t bss = IMAGE_SCN_CNT_UNINITIALIZED_DATA; const uint32_t code = IMAGE_SCN_CNT_CODE; const uint32_t discardable = IMAGE_SCN_MEM_DISCARDABLE; const uint32_t r = IMAGE_SCN_MEM_READ; const uint32_t w = IMAGE_SCN_MEM_WRITE; const uint32_t x = IMAGE_SCN_MEM_EXECUTE; SmallDenseMap, OutputSection *> sections; auto createSection = [&](StringRef name, uint32_t outChars) { OutputSection *&sec = sections[{name, outChars}]; if (!sec) { sec = make(name, outChars); outputSections.push_back(sec); } return sec; }; // Try to match the section order used by link.exe. textSec = createSection(".text", code | r | x); createSection(".bss", bss | r | w); rdataSec = createSection(".rdata", data | r); buildidSec = createSection(".buildid", data | r); dataSec = createSection(".data", data | r | w); pdataSec = createSection(".pdata", data | r); idataSec = createSection(".idata", data | r); edataSec = createSection(".edata", data | r); didatSec = createSection(".didat", data | r); rsrcSec = createSection(".rsrc", data | r); relocSec = createSection(".reloc", data | discardable | r); ctorsSec = createSection(".ctors", data | r | w); dtorsSec = createSection(".dtors", data | r | w); // Then bin chunks by name and output characteristics. for (Chunk *c : symtab->getChunks()) { auto *sc = dyn_cast(c); if (sc && !sc->live) { if (config->verbose) sc->printDiscardedMessage(); continue; } StringRef name = c->getSectionName(); if (shouldStripSectionSuffix(sc, name)) name = name.split('$').first; PartialSection *pSec = createPartialSection(name, c->getOutputCharacteristics()); pSec->chunks.push_back(c); } fixPartialSectionChars(".rsrc", data | r); fixPartialSectionChars(".edata", data | r); // Even in non MinGW cases, we might need to link against GNU import // libraries. bool hasIdata = fixGnuImportChunks(); if (!idata.empty()) hasIdata = true; if (hasIdata) addSyntheticIdata(); // Process an /order option. if (!config->order.empty()) for (auto it : partialSections) sortBySectionOrder(it.second->chunks); if (hasIdata) locateImportTables(); // Then create an OutputSection for each section. // '$' and all following characters in input section names are // discarded when determining output section. So, .text$foo // contributes to .text, for example. See PE/COFF spec 3.2. for (auto it : partialSections) { PartialSection *pSec = it.second; StringRef name = getOutputSectionName(pSec->name); uint32_t outChars = pSec->characteristics; if (name == ".CRT") { // In link.exe, there is a special case for the I386 target where .CRT // sections are treated as if they have output characteristics DATA | R if // their characteristics are DATA | R | W. This implements the same // special case for all architectures. outChars = data | r; log("Processing section " + pSec->name + " -> " + name); sortCRTSectionChunks(pSec->chunks); } OutputSection *sec = createSection(name, outChars); for (Chunk *c : pSec->chunks) sec->addChunk(c); sec->addContributingPartialSection(pSec); } // Finally, move some output sections to the end. auto sectionOrder = [&](const OutputSection *s) { // Move DISCARDABLE (or non-memory-mapped) sections to the end of file // because the loader cannot handle holes. Stripping can remove other // discardable ones than .reloc, which is first of them (created early). if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) return 2; // .rsrc should come at the end of the non-discardable sections because its // size may change by the Win32 UpdateResources() function, causing // subsequent sections to move (see https://crbug.com/827082). if (s == rsrcSec) return 1; return 0; }; llvm::stable_sort(outputSections, [&](const OutputSection *s, const OutputSection *t) { return sectionOrder(s) < sectionOrder(t); }); } void Writer::createMiscChunks() { for (MergeChunk *p : MergeChunk::instances) { if (p) { p->finalizeContents(); rdataSec->addChunk(p); } } // Create thunks for locally-dllimported symbols. if (!symtab->localImportChunks.empty()) { for (Chunk *c : symtab->localImportChunks) rdataSec->addChunk(c); } // Create Debug Information Chunks OutputSection *debugInfoSec = config->mingw ? buildidSec : rdataSec; if (config->debug || config->repro) { debugDirectory = make(debugRecords, config->repro); debugInfoSec->addChunk(debugDirectory); } if (config->debug) { // Make a CVDebugRecordChunk even when /DEBUG:CV is not specified. We // output a PDB no matter what, and this chunk provides the only means of // allowing a debugger to match a PDB and an executable. So we need it even // if we're ultimately not going to write CodeView data to the PDB. buildId = make(); debugRecords.push_back(buildId); for (Chunk *c : debugRecords) debugInfoSec->addChunk(c); } // Create SEH table. x86-only. if (config->safeSEH) createSEHTable(); // Create /guard:cf tables if requested. if (config->guardCF != GuardCFLevel::Off) createGuardCFTables(); if (config->mingw) { createRuntimePseudoRelocs(); insertCtorDtorSymbols(); } } // Create .idata section for the DLL-imported symbol table. // The format of this section is inherently Windows-specific. // IdataContents class abstracted away the details for us, // so we just let it create chunks and add them to the section. void Writer::createImportTables() { // Initialize DLLOrder so that import entries are ordered in // the same order as in the command line. (That affects DLL // initialization order, and this ordering is MSVC-compatible.) for (ImportFile *file : ImportFile::instances) { if (!file->live) continue; std::string dll = StringRef(file->dllName).lower(); if (config->dllOrder.count(dll) == 0) config->dllOrder[dll] = config->dllOrder.size(); if (file->impSym && !isa(file->impSym)) fatal(toString(*file->impSym) + " was replaced"); DefinedImportData *impSym = cast_or_null(file->impSym); if (config->delayLoads.count(StringRef(file->dllName).lower())) { if (!file->thunkSym) fatal("cannot delay-load " + toString(file) + " due to import of data: " + toString(*impSym)); delayIdata.add(impSym); } else { idata.add(impSym); } } } void Writer::appendImportThunks() { if (ImportFile::instances.empty()) return; for (ImportFile *file : ImportFile::instances) { if (!file->live) continue; if (!file->thunkSym) continue; if (!isa(file->thunkSym)) fatal(toString(*file->thunkSym) + " was replaced"); DefinedImportThunk *thunk = cast(file->thunkSym); if (file->thunkLive) textSec->addChunk(thunk->getChunk()); } if (!delayIdata.empty()) { Defined *helper = cast(config->delayLoadHelper); delayIdata.create(helper); for (Chunk *c : delayIdata.getChunks()) didatSec->addChunk(c); for (Chunk *c : delayIdata.getDataChunks()) dataSec->addChunk(c); for (Chunk *c : delayIdata.getCodeChunks()) textSec->addChunk(c); } } void Writer::createExportTable() { if (!edataSec->chunks.empty()) { // Allow using a custom built export table from input object files, instead // of having the linker synthesize the tables. if (config->hadExplicitExports) warn("literal .edata sections override exports"); } else if (!config->exports.empty()) { for (Chunk *c : edata.chunks) edataSec->addChunk(c); } if (!edataSec->chunks.empty()) { edataStart = edataSec->chunks.front(); edataEnd = edataSec->chunks.back(); } } void Writer::removeUnusedSections() { // Remove sections that we can be sure won't get content, to avoid // allocating space for their section headers. auto isUnused = [this](OutputSection *s) { if (s == relocSec) return false; // This section is populated later. // MergeChunks have zero size at this point, as their size is finalized // later. Only remove sections that have no Chunks at all. return s->chunks.empty(); }; outputSections.erase( std::remove_if(outputSections.begin(), outputSections.end(), isUnused), outputSections.end()); } // The Windows loader doesn't seem to like empty sections, // so we remove them if any. void Writer::removeEmptySections() { auto isEmpty = [](OutputSection *s) { return s->getVirtualSize() == 0; }; outputSections.erase( std::remove_if(outputSections.begin(), outputSections.end(), isEmpty), outputSections.end()); } void Writer::assignOutputSectionIndices() { // Assign final output section indices, and assign each chunk to its output // section. uint32_t idx = 1; for (OutputSection *os : outputSections) { os->sectionIndex = idx; for (Chunk *c : os->chunks) c->setOutputSectionIdx(idx); ++idx; } // Merge chunks are containers of chunks, so assign those an output section // too. for (MergeChunk *mc : MergeChunk::instances) if (mc) for (SectionChunk *sc : mc->sections) if (sc && sc->live) sc->setOutputSectionIdx(mc->getOutputSectionIdx()); } size_t Writer::addEntryToStringTable(StringRef str) { assert(str.size() > COFF::NameSize); size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field strtab.insert(strtab.end(), str.begin(), str.end()); strtab.push_back('\0'); return offsetOfEntry; } Optional Writer::createSymbol(Defined *def) { coff_symbol16 sym; switch (def->kind()) { case Symbol::DefinedAbsoluteKind: sym.Value = def->getRVA(); sym.SectionNumber = IMAGE_SYM_ABSOLUTE; break; case Symbol::DefinedSyntheticKind: // Relative symbols are unrepresentable in a COFF symbol table. return None; default: { // Don't write symbols that won't be written to the output to the symbol // table. Chunk *c = def->getChunk(); if (!c) return None; OutputSection *os = c->getOutputSection(); if (!os) return None; sym.Value = def->getRVA() - os->getRVA(); sym.SectionNumber = os->sectionIndex; break; } } // Symbols that are runtime pseudo relocations don't point to the actual // symbol data itself (as they are imported), but points to the IAT entry // instead. Avoid emitting them to the symbol table, as they can confuse // debuggers. if (def->isRuntimePseudoReloc) return None; StringRef name = def->getName(); if (name.size() > COFF::NameSize) { sym.Name.Offset.Zeroes = 0; sym.Name.Offset.Offset = addEntryToStringTable(name); } else { memset(sym.Name.ShortName, 0, COFF::NameSize); memcpy(sym.Name.ShortName, name.data(), name.size()); } if (auto *d = dyn_cast(def)) { COFFSymbolRef ref = d->getCOFFSymbol(); sym.Type = ref.getType(); sym.StorageClass = ref.getStorageClass(); } else { sym.Type = IMAGE_SYM_TYPE_NULL; sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL; } sym.NumberOfAuxSymbols = 0; return sym; } void Writer::createSymbolAndStringTable() { // PE/COFF images are limited to 8 byte section names. Longer names can be // supported by writing a non-standard string table, but this string table is // not mapped at runtime and the long names will therefore be inaccessible. // link.exe always truncates section names to 8 bytes, whereas binutils always // preserves long section names via the string table. LLD adopts a hybrid // solution where discardable sections have long names preserved and // non-discardable sections have their names truncated, to ensure that any // section which is mapped at runtime also has its name mapped at runtime. for (OutputSection *sec : outputSections) { if (sec->name.size() <= COFF::NameSize) continue; if ((sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0) continue; sec->setStringTableOff(addEntryToStringTable(sec->name)); } if (config->debugDwarf || config->debugSymtab) { for (ObjFile *file : ObjFile::instances) { for (Symbol *b : file->getSymbols()) { auto *d = dyn_cast_or_null(b); if (!d || d->writtenToSymtab) continue; d->writtenToSymtab = true; if (Optional sym = createSymbol(d)) outputSymtab.push_back(*sym); } } } if (outputSymtab.empty() && strtab.empty()) return; // We position the symbol table to be adjacent to the end of the last section. uint64_t fileOff = fileSize; pointerToSymbolTable = fileOff; fileOff += outputSymtab.size() * sizeof(coff_symbol16); fileOff += 4 + strtab.size(); fileSize = alignTo(fileOff, config->fileAlign); } void Writer::mergeSections() { if (!pdataSec->chunks.empty()) { firstPdata = pdataSec->chunks.front(); lastPdata = pdataSec->chunks.back(); } for (auto &p : config->merge) { StringRef toName = p.second; if (p.first == toName) continue; StringSet<> names; while (1) { if (!names.insert(toName).second) fatal("/merge: cycle found for section '" + p.first + "'"); auto i = config->merge.find(toName); if (i == config->merge.end()) break; toName = i->second; } OutputSection *from = findSection(p.first); OutputSection *to = findSection(toName); if (!from) continue; if (!to) { from->name = toName; continue; } to->merge(from); } } // Visits all sections to assign incremental, non-overlapping RVAs and // file offsets. void Writer::assignAddresses() { sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) + sizeof(data_directory) * numberOfDataDirectory + sizeof(coff_section) * outputSections.size(); sizeOfHeaders += config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header); sizeOfHeaders = alignTo(sizeOfHeaders, config->fileAlign); fileSize = sizeOfHeaders; // The first page is kept unmapped. uint64_t rva = alignTo(sizeOfHeaders, config->align); for (OutputSection *sec : outputSections) { if (sec == relocSec) addBaserels(); uint64_t rawSize = 0, virtualSize = 0; sec->header.VirtualAddress = rva; // If /FUNCTIONPADMIN is used, functions are padded in order to create a // hotpatchable image. const bool isCodeSection = (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) && (sec->header.Characteristics & IMAGE_SCN_MEM_READ) && (sec->header.Characteristics & IMAGE_SCN_MEM_EXECUTE); uint32_t padding = isCodeSection ? config->functionPadMin : 0; for (Chunk *c : sec->chunks) { if (padding && c->isHotPatchable()) virtualSize += padding; virtualSize = alignTo(virtualSize, c->getAlignment()); c->setRVA(rva + virtualSize); virtualSize += c->getSize(); if (c->hasData) rawSize = alignTo(virtualSize, config->fileAlign); } if (virtualSize > UINT32_MAX) error("section larger than 4 GiB: " + sec->name); sec->header.VirtualSize = virtualSize; sec->header.SizeOfRawData = rawSize; if (rawSize != 0) sec->header.PointerToRawData = fileSize; rva += alignTo(virtualSize, config->align); fileSize += alignTo(rawSize, config->fileAlign); } sizeOfImage = alignTo(rva, config->align); // Assign addresses to sections in MergeChunks. for (MergeChunk *mc : MergeChunk::instances) if (mc) mc->assignSubsectionRVAs(); } template void Writer::writeHeader() { // Write DOS header. For backwards compatibility, the first part of a PE/COFF // executable consists of an MS-DOS MZ executable. If the executable is run // under DOS, that program gets run (usually to just print an error message). // When run under Windows, the loader looks at AddressOfNewExeHeader and uses // the PE header instead. uint8_t *buf = buffer->getBufferStart(); auto *dos = reinterpret_cast(buf); buf += sizeof(dos_header); dos->Magic[0] = 'M'; dos->Magic[1] = 'Z'; dos->UsedBytesInTheLastPage = dosStubSize % 512; dos->FileSizeInPages = divideCeil(dosStubSize, 512); dos->HeaderSizeInParagraphs = sizeof(dos_header) / 16; dos->AddressOfRelocationTable = sizeof(dos_header); dos->AddressOfNewExeHeader = dosStubSize; // Write DOS program. memcpy(buf, dosProgram, sizeof(dosProgram)); buf += sizeof(dosProgram); // Write PE magic memcpy(buf, PEMagic, sizeof(PEMagic)); buf += sizeof(PEMagic); // Write COFF header auto *coff = reinterpret_cast(buf); buf += sizeof(*coff); coff->Machine = config->machine; coff->NumberOfSections = outputSections.size(); coff->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE; if (config->largeAddressAware) coff->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE; if (!config->is64()) coff->Characteristics |= IMAGE_FILE_32BIT_MACHINE; if (config->dll) coff->Characteristics |= IMAGE_FILE_DLL; if (!config->relocatable) coff->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED; if (config->swaprunCD) coff->Characteristics |= IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP; if (config->swaprunNet) coff->Characteristics |= IMAGE_FILE_NET_RUN_FROM_SWAP; coff->SizeOfOptionalHeader = sizeof(PEHeaderTy) + sizeof(data_directory) * numberOfDataDirectory; // Write PE header auto *pe = reinterpret_cast(buf); buf += sizeof(*pe); pe->Magic = config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32; // If {Major,Minor}LinkerVersion is left at 0.0, then for some // reason signing the resulting PE file with Authenticode produces a // signature that fails to validate on Windows 7 (but is OK on 10). // Set it to 14.0, which is what VS2015 outputs, and which avoids // that problem. pe->MajorLinkerVersion = 14; pe->MinorLinkerVersion = 0; pe->ImageBase = config->imageBase; pe->SectionAlignment = config->align; pe->FileAlignment = config->fileAlign; pe->MajorImageVersion = config->majorImageVersion; pe->MinorImageVersion = config->minorImageVersion; pe->MajorOperatingSystemVersion = config->majorOSVersion; pe->MinorOperatingSystemVersion = config->minorOSVersion; pe->MajorSubsystemVersion = config->majorOSVersion; pe->MinorSubsystemVersion = config->minorOSVersion; pe->Subsystem = config->subsystem; pe->SizeOfImage = sizeOfImage; pe->SizeOfHeaders = sizeOfHeaders; if (!config->noEntry) { Defined *entry = cast(config->entry); pe->AddressOfEntryPoint = entry->getRVA(); // Pointer to thumb code must have the LSB set, so adjust it. if (config->machine == ARMNT) pe->AddressOfEntryPoint |= 1; } pe->SizeOfStackReserve = config->stackReserve; pe->SizeOfStackCommit = config->stackCommit; pe->SizeOfHeapReserve = config->heapReserve; pe->SizeOfHeapCommit = config->heapCommit; if (config->appContainer) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_APPCONTAINER; if (config->dynamicBase) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE; if (config->highEntropyVA) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA; if (!config->allowBind) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND; if (config->nxCompat) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT; if (!config->allowIsolation) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION; if (config->guardCF != GuardCFLevel::Off) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_GUARD_CF; if (config->integrityCheck) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY; if (setNoSEHCharacteristic) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH; if (config->terminalServerAware) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; pe->NumberOfRvaAndSize = numberOfDataDirectory; if (textSec->getVirtualSize()) { pe->BaseOfCode = textSec->getRVA(); pe->SizeOfCode = textSec->getRawSize(); } pe->SizeOfInitializedData = getSizeOfInitializedData(); // Write data directory auto *dir = reinterpret_cast(buf); buf += sizeof(*dir) * numberOfDataDirectory; if (edataStart) { dir[EXPORT_TABLE].RelativeVirtualAddress = edataStart->getRVA(); dir[EXPORT_TABLE].Size = edataEnd->getRVA() + edataEnd->getSize() - edataStart->getRVA(); } if (importTableStart) { dir[IMPORT_TABLE].RelativeVirtualAddress = importTableStart->getRVA(); dir[IMPORT_TABLE].Size = importTableSize; } if (iatStart) { dir[IAT].RelativeVirtualAddress = iatStart->getRVA(); dir[IAT].Size = iatSize; } if (rsrcSec->getVirtualSize()) { dir[RESOURCE_TABLE].RelativeVirtualAddress = rsrcSec->getRVA(); dir[RESOURCE_TABLE].Size = rsrcSec->getVirtualSize(); } if (firstPdata) { dir[EXCEPTION_TABLE].RelativeVirtualAddress = firstPdata->getRVA(); dir[EXCEPTION_TABLE].Size = lastPdata->getRVA() + lastPdata->getSize() - firstPdata->getRVA(); } if (relocSec->getVirtualSize()) { dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA(); dir[BASE_RELOCATION_TABLE].Size = relocSec->getVirtualSize(); } if (Symbol *sym = symtab->findUnderscore("_tls_used")) { if (Defined *b = dyn_cast(sym)) { dir[TLS_TABLE].RelativeVirtualAddress = b->getRVA(); dir[TLS_TABLE].Size = config->is64() ? sizeof(object::coff_tls_directory64) : sizeof(object::coff_tls_directory32); } } if (debugDirectory) { dir[DEBUG_DIRECTORY].RelativeVirtualAddress = debugDirectory->getRVA(); dir[DEBUG_DIRECTORY].Size = debugDirectory->getSize(); } if (Symbol *sym = symtab->findUnderscore("_load_config_used")) { if (auto *b = dyn_cast(sym)) { SectionChunk *sc = b->getChunk(); assert(b->getRVA() >= sc->getRVA()); uint64_t offsetInChunk = b->getRVA() - sc->getRVA(); if (!sc->hasData || offsetInChunk + 4 > sc->getSize()) fatal("_load_config_used is malformed"); ArrayRef secContents = sc->getContents(); uint32_t loadConfigSize = *reinterpret_cast(&secContents[offsetInChunk]); if (offsetInChunk + loadConfigSize > sc->getSize()) fatal("_load_config_used is too large"); dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = b->getRVA(); dir[LOAD_CONFIG_TABLE].Size = loadConfigSize; } } if (!delayIdata.empty()) { dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = delayIdata.getDirRVA(); dir[DELAY_IMPORT_DESCRIPTOR].Size = delayIdata.getDirSize(); } // Write section table for (OutputSection *sec : outputSections) { sec->writeHeaderTo(buf); buf += sizeof(coff_section); } sectionTable = ArrayRef( buf - outputSections.size() * sizeof(coff_section), buf); if (outputSymtab.empty() && strtab.empty()) return; coff->PointerToSymbolTable = pointerToSymbolTable; uint32_t numberOfSymbols = outputSymtab.size(); coff->NumberOfSymbols = numberOfSymbols; auto *symbolTable = reinterpret_cast( buffer->getBufferStart() + coff->PointerToSymbolTable); for (size_t i = 0; i != numberOfSymbols; ++i) symbolTable[i] = outputSymtab[i]; // Create the string table, it follows immediately after the symbol table. // The first 4 bytes is length including itself. buf = reinterpret_cast(&symbolTable[numberOfSymbols]); write32le(buf, strtab.size() + 4); if (!strtab.empty()) memcpy(buf + 4, strtab.data(), strtab.size()); } void Writer::openFile(StringRef path) { buffer = CHECK( FileOutputBuffer::create(path, fileSize, FileOutputBuffer::F_executable), "failed to open " + path); } void Writer::createSEHTable() { SymbolRVASet handlers; for (ObjFile *file : ObjFile::instances) { if (!file->hasSafeSEH()) error("/safeseh: " + file->getName() + " is not compatible with SEH"); markSymbolsForRVATable(file, file->getSXDataChunks(), handlers); } // Set the "no SEH" characteristic if there really were no handlers, or if // there is no load config object to point to the table of handlers. setNoSEHCharacteristic = handlers.empty() || !symtab->findUnderscore("_load_config_used"); maybeAddRVATable(std::move(handlers), "__safe_se_handler_table", "__safe_se_handler_count"); } // Add a symbol to an RVA set. Two symbols may have the same RVA, but an RVA set // cannot contain duplicates. Therefore, the set is uniqued by Chunk and the // symbol's offset into that Chunk. static void addSymbolToRVASet(SymbolRVASet &rvaSet, Defined *s) { Chunk *c = s->getChunk(); if (auto *sc = dyn_cast(c)) c = sc->repl; // Look through ICF replacement. uint32_t off = s->getRVA() - (c ? c->getRVA() : 0); rvaSet.insert({c, off}); } // Given a symbol, add it to the GFIDs table if it is a live, defined, function // symbol in an executable section. static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms, Symbol *s) { if (!s) return; switch (s->kind()) { case Symbol::DefinedLocalImportKind: case Symbol::DefinedImportDataKind: // Defines an __imp_ pointer, so it is data, so it is ignored. break; case Symbol::DefinedCommonKind: // Common is always data, so it is ignored. break; case Symbol::DefinedAbsoluteKind: case Symbol::DefinedSyntheticKind: // Absolute is never code, synthetic generally isn't and usually isn't // determinable. break; - case Symbol::LazyKind: + case Symbol::LazyArchiveKind: + case Symbol::LazyObjectKind: case Symbol::UndefinedKind: // Undefined symbols resolve to zero, so they don't have an RVA. Lazy // symbols shouldn't have relocations. break; case Symbol::DefinedImportThunkKind: // Thunks are always code, include them. addSymbolToRVASet(addressTakenSyms, cast(s)); break; case Symbol::DefinedRegularKind: { // This is a regular, defined, symbol from a COFF file. Mark the symbol as // address taken if the symbol type is function and it's in an executable // section. auto *d = cast(s); if (d->getCOFFSymbol().getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) { SectionChunk *sc = dyn_cast(d->getChunk()); if (sc && sc->live && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) addSymbolToRVASet(addressTakenSyms, d); } break; } } } // Visit all relocations from all section contributions of this object file and // mark the relocation target as address-taken. static void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols) { for (Chunk *c : file->getChunks()) { // We only care about live section chunks. Common chunks and other chunks // don't generally contain relocations. SectionChunk *sc = dyn_cast(c); if (!sc || !sc->live) continue; for (const coff_relocation &reloc : sc->getRelocs()) { if (config->machine == I386 && reloc.Type == COFF::IMAGE_REL_I386_REL32) // Ignore relative relocations on x86. On x86_64 they can't be ignored // since they're also used to compute absolute addresses. continue; Symbol *ref = sc->file->getSymbol(reloc.SymbolTableIndex); maybeAddAddressTakenFunction(usedSymbols, ref); } } } // Create the guard function id table. This is a table of RVAs of all // address-taken functions. It is sorted and uniqued, just like the safe SEH // table. void Writer::createGuardCFTables() { SymbolRVASet addressTakenSyms; SymbolRVASet longJmpTargets; for (ObjFile *file : ObjFile::instances) { // If the object was compiled with /guard:cf, the address taken symbols // are in .gfids$y sections, and the longjmp targets are in .gljmp$y // sections. If the object was not compiled with /guard:cf, we assume there // were no setjmp targets, and that all code symbols with relocations are // possibly address-taken. if (file->hasGuardCF()) { markSymbolsForRVATable(file, file->getGuardFidChunks(), addressTakenSyms); markSymbolsForRVATable(file, file->getGuardLJmpChunks(), longJmpTargets); } else { markSymbolsWithRelocations(file, addressTakenSyms); } } // Mark the image entry as address-taken. if (config->entry) maybeAddAddressTakenFunction(addressTakenSyms, config->entry); // Mark exported symbols in executable sections as address-taken. for (Export &e : config->exports) maybeAddAddressTakenFunction(addressTakenSyms, e.sym); // Ensure sections referenced in the gfid table are 16-byte aligned. for (const ChunkAndOffset &c : addressTakenSyms) if (c.inputChunk->getAlignment() < 16) c.inputChunk->setAlignment(16); maybeAddRVATable(std::move(addressTakenSyms), "__guard_fids_table", "__guard_fids_count"); // Add the longjmp target table unless the user told us not to. if (config->guardCF == GuardCFLevel::Full) maybeAddRVATable(std::move(longJmpTargets), "__guard_longjmp_table", "__guard_longjmp_count"); // Set __guard_flags, which will be used in the load config to indicate that // /guard:cf was enabled. uint32_t guardFlags = uint32_t(coff_guard_flags::CFInstrumented) | uint32_t(coff_guard_flags::HasFidTable); if (config->guardCF == GuardCFLevel::Full) guardFlags |= uint32_t(coff_guard_flags::HasLongJmpTable); Symbol *flagSym = symtab->findUnderscore("__guard_flags"); cast(flagSym)->setVA(guardFlags); } // Take a list of input sections containing symbol table indices and add those // symbols to an RVA table. The challenge is that symbol RVAs are not known and // depend on the table size, so we can't directly build a set of integers. void Writer::markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols) { for (SectionChunk *c : symIdxChunks) { // Skip sections discarded by linker GC. This comes up when a .gfids section // is associated with something like a vtable and the vtable is discarded. // In this case, the associated gfids section is discarded, and we don't // mark the virtual member functions as address-taken by the vtable. if (!c->live) continue; // Validate that the contents look like symbol table indices. ArrayRef data = c->getContents(); if (data.size() % 4 != 0) { warn("ignoring " + c->getSectionName() + " symbol table index section in object " + toString(file)); continue; } // Read each symbol table index and check if that symbol was included in the // final link. If so, add it to the table symbol set. ArrayRef symIndices( reinterpret_cast(data.data()), data.size() / 4); ArrayRef objSymbols = file->getSymbols(); for (uint32_t symIndex : symIndices) { if (symIndex >= objSymbols.size()) { warn("ignoring invalid symbol table index in section " + c->getSectionName() + " in object " + toString(file)); continue; } if (Symbol *s = objSymbols[symIndex]) { if (s->isLive()) addSymbolToRVASet(tableSymbols, cast(s)); } } } } // Replace the absolute table symbol with a synthetic symbol pointing to // tableChunk so that we can emit base relocations for it and resolve section // relative relocations. void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym) { if (tableSymbols.empty()) return; RVATableChunk *tableChunk = make(std::move(tableSymbols)); rdataSec->addChunk(tableChunk); Symbol *t = symtab->findUnderscore(tableSym); Symbol *c = symtab->findUnderscore(countSym); replaceSymbol(t, t->getName(), tableChunk); cast(c)->setVA(tableChunk->getSize() / 4); } // MinGW specific. Gather all relocations that are imported from a DLL even // though the code didn't expect it to, produce the table that the runtime // uses for fixing them up, and provide the synthetic symbols that the // runtime uses for finding the table. void Writer::createRuntimePseudoRelocs() { std::vector rels; for (Chunk *c : symtab->getChunks()) { auto *sc = dyn_cast(c); if (!sc || !sc->live) continue; sc->getRuntimePseudoRelocs(rels); } if (!rels.empty()) log("Writing " + Twine(rels.size()) + " runtime pseudo relocations"); PseudoRelocTableChunk *table = make(rels); rdataSec->addChunk(table); EmptyChunk *endOfList = make(); rdataSec->addChunk(endOfList); Symbol *headSym = symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST__"); Symbol *endSym = symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST_END__"); replaceSymbol(headSym, headSym->getName(), table); replaceSymbol(endSym, endSym->getName(), endOfList); } // MinGW specific. // The MinGW .ctors and .dtors lists have sentinels at each end; // a (uintptr_t)-1 at the start and a (uintptr_t)0 at the end. // There's a symbol pointing to the start sentinel pointer, __CTOR_LIST__ // and __DTOR_LIST__ respectively. void Writer::insertCtorDtorSymbols() { AbsolutePointerChunk *ctorListHead = make(-1); AbsolutePointerChunk *ctorListEnd = make(0); AbsolutePointerChunk *dtorListHead = make(-1); AbsolutePointerChunk *dtorListEnd = make(0); ctorsSec->insertChunkAtStart(ctorListHead); ctorsSec->addChunk(ctorListEnd); dtorsSec->insertChunkAtStart(dtorListHead); dtorsSec->addChunk(dtorListEnd); Symbol *ctorListSym = symtab->findUnderscore("__CTOR_LIST__"); Symbol *dtorListSym = symtab->findUnderscore("__DTOR_LIST__"); replaceSymbol(ctorListSym, ctorListSym->getName(), ctorListHead); replaceSymbol(dtorListSym, dtorListSym->getName(), dtorListHead); } // Handles /section options to allow users to overwrite // section attributes. void Writer::setSectionPermissions() { for (auto &p : config->section) { StringRef name = p.first; uint32_t perm = p.second; for (OutputSection *sec : outputSections) if (sec->name == name) sec->setPermissions(perm); } } // Write section contents to a mmap'ed file. void Writer::writeSections() { // Record the number of sections to apply section index relocations // against absolute symbols. See applySecIdx in Chunks.cpp.. DefinedAbsolute::numOutputSections = outputSections.size(); uint8_t *buf = buffer->getBufferStart(); for (OutputSection *sec : outputSections) { uint8_t *secBuf = buf + sec->getFileOff(); // Fill gaps between functions in .text with INT3 instructions // instead of leaving as NUL bytes (which can be interpreted as // ADD instructions). if (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) memset(secBuf, 0xCC, sec->getRawSize()); parallelForEach(sec->chunks, [&](Chunk *c) { c->writeTo(secBuf + c->getRVA() - sec->getRVA()); }); } } void Writer::writeBuildId() { // There are two important parts to the build ID. // 1) If building with debug info, the COFF debug directory contains a // timestamp as well as a Guid and Age of the PDB. // 2) In all cases, the PE COFF file header also contains a timestamp. // For reproducibility, instead of a timestamp we want to use a hash of the // PE contents. if (config->debug) { assert(buildId && "BuildId is not set!"); // BuildId->BuildId was filled in when the PDB was written. } // At this point the only fields in the COFF file which remain unset are the // "timestamp" in the COFF file header, and the ones in the coff debug // directory. Now we can hash the file and write that hash to the various // timestamp fields in the file. StringRef outputFileData( reinterpret_cast(buffer->getBufferStart()), buffer->getBufferSize()); uint32_t timestamp = config->timestamp; uint64_t hash = 0; bool generateSyntheticBuildId = config->mingw && config->debug && config->pdbPath.empty(); if (config->repro || generateSyntheticBuildId) hash = xxHash64(outputFileData); if (config->repro) timestamp = static_cast(hash); if (generateSyntheticBuildId) { // For MinGW builds without a PDB file, we still generate a build id // to allow associating a crash dump to the executable. buildId->buildId->PDB70.CVSignature = OMF::Signature::PDB70; buildId->buildId->PDB70.Age = 1; memcpy(buildId->buildId->PDB70.Signature, &hash, 8); // xxhash only gives us 8 bytes, so put some fixed data in the other half. memcpy(&buildId->buildId->PDB70.Signature[8], "LLD PDB.", 8); } if (debugDirectory) debugDirectory->setTimeDateStamp(timestamp); uint8_t *buf = buffer->getBufferStart(); buf += dosStubSize + sizeof(PEMagic); object::coff_file_header *coffHeader = reinterpret_cast(buf); coffHeader->TimeDateStamp = timestamp; } // Sort .pdata section contents according to PE/COFF spec 5.5. void Writer::sortExceptionTable() { if (!firstPdata) return; // We assume .pdata contains function table entries only. auto bufAddr = [&](Chunk *c) { OutputSection *os = c->getOutputSection(); return buffer->getBufferStart() + os->getFileOff() + c->getRVA() - os->getRVA(); }; uint8_t *begin = bufAddr(firstPdata); uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize(); if (config->machine == AMD64) { struct Entry { ulittle32_t begin, end, unwind; }; parallelSort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); return; } if (config->machine == ARMNT || config->machine == ARM64) { struct Entry { ulittle32_t begin, unwind; }; parallelSort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); return; } errs() << "warning: don't know how to handle .pdata.\n"; } // The CRT section contains, among other things, the array of function // pointers that initialize every global variable that is not trivially // constructed. The CRT calls them one after the other prior to invoking // main(). // // As per C++ spec, 3.6.2/2.3, // "Variables with ordered initialization defined within a single // translation unit shall be initialized in the order of their definitions // in the translation unit" // // It is therefore critical to sort the chunks containing the function // pointers in the order that they are listed in the object file (top to // bottom), otherwise global objects might not be initialized in the // correct order. void Writer::sortCRTSectionChunks(std::vector &chunks) { auto sectionChunkOrder = [](const Chunk *a, const Chunk *b) { auto sa = dyn_cast(a); auto sb = dyn_cast(b); assert(sa && sb && "Non-section chunks in CRT section!"); StringRef sAObj = sa->file->mb.getBufferIdentifier(); StringRef sBObj = sb->file->mb.getBufferIdentifier(); return sAObj == sBObj && sa->getSectionNumber() < sb->getSectionNumber(); }; llvm::stable_sort(chunks, sectionChunkOrder); if (config->verbose) { for (auto &c : chunks) { auto sc = dyn_cast(c); log(" " + sc->file->mb.getBufferIdentifier().str() + ", SectionID: " + Twine(sc->getSectionNumber())); } } } OutputSection *Writer::findSection(StringRef name) { for (OutputSection *sec : outputSections) if (sec->name == name) return sec; return nullptr; } uint32_t Writer::getSizeOfInitializedData() { uint32_t res = 0; for (OutputSection *s : outputSections) if (s->header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) res += s->getRawSize(); return res; } // Add base relocations to .reloc section. void Writer::addBaserels() { if (!config->relocatable) return; relocSec->chunks.clear(); std::vector v; for (OutputSection *sec : outputSections) { if (sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) continue; // Collect all locations for base relocations. for (Chunk *c : sec->chunks) c->getBaserels(&v); // Add the addresses to .reloc section. if (!v.empty()) addBaserelBlocks(v); v.clear(); } } // Add addresses to .reloc section. Note that addresses are grouped by page. void Writer::addBaserelBlocks(std::vector &v) { const uint32_t mask = ~uint32_t(pageSize - 1); uint32_t page = v[0].rva & mask; size_t i = 0, j = 1; for (size_t e = v.size(); j < e; ++j) { uint32_t p = v[j].rva & mask; if (p == page) continue; relocSec->addChunk(make(page, &v[i], &v[0] + j)); i = j; page = p; } if (i == j) return; relocSec->addChunk(make(page, &v[i], &v[0] + j)); } PartialSection *Writer::createPartialSection(StringRef name, uint32_t outChars) { PartialSection *&pSec = partialSections[{name, outChars}]; if (pSec) return pSec; pSec = make(name, outChars); return pSec; } PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) { auto it = partialSections.find({name, outChars}); if (it != partialSections.end()) return it->second; return nullptr; } Index: lld/trunk/COFF/SymbolTable.cpp =================================================================== --- lld/trunk/COFF/SymbolTable.cpp (revision 370815) +++ lld/trunk/COFF/SymbolTable.cpp (revision 370816) @@ -1,694 +1,725 @@ //===- SymbolTable.cpp ----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "SymbolTable.h" #include "Config.h" #include "Driver.h" #include "LTO.h" #include "PDB.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Timer.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/WindowsMachineFlag.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; namespace lld { namespace coff { static Timer ltoTimer("LTO", Timer::root()); SymbolTable *symtab; void SymbolTable::addFile(InputFile *file) { log("Reading " + toString(file)); file->parse(); MachineTypes mt = file->getMachineType(); if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) { config->machine = mt; } else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && config->machine != mt) { error(toString(file) + ": machine type " + machineToStr(mt) + " conflicts with " + machineToStr(config->machine)); return; } if (auto *f = dyn_cast(file)) { ObjFile::instances.push_back(f); } else if (auto *f = dyn_cast(file)) { BitcodeFile::instances.push_back(f); } else if (auto *f = dyn_cast(file)) { ImportFile::instances.push_back(f); } driver->parseDirectives(file); } static void errorOrWarn(const Twine &s) { if (config->forceUnresolved) warn(s); else error(s); } +// Causes the file associated with a lazy symbol to be linked in. +static void forceLazy(Symbol *s) { + s->pendingArchiveLoad = true; + switch (s->kind()) { + case Symbol::Kind::LazyArchiveKind: { + auto *l = cast(s); + l->file->addMember(l->sym); + break; + } + case Symbol::Kind::LazyObjectKind: + cast(s)->file->fetch(); + break; + default: + llvm_unreachable( + "symbol passed to forceLazy is not a LazyArchive or LazyObject"); + } +} + // Returns the symbol in SC whose value is <= Addr that is closest to Addr. // This is generally the global variable or function whose definition contains // Addr. static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) { DefinedRegular *candidate = nullptr; for (Symbol *s : sc->file->getSymbols()) { auto *d = dyn_cast_or_null(s); if (!d || !d->data || d->file != sc->file || d->getChunk() != sc || d->getValue() > addr || (candidate && d->getValue() < candidate->getValue())) continue; candidate = d; } return candidate; } static std::vector getSymbolLocations(BitcodeFile *file) { std::string res("\n>>> referenced by "); StringRef source = file->obj->getSourceFileName(); if (!source.empty()) res += source.str() + "\n>>> "; res += toString(file); return {res}; } // Given a file and the index of a symbol in that file, returns a description // of all references to that symbol from that file. If no debug information is // available, returns just the name of the file, else one string per actual // reference as described in the debug info. std::vector getSymbolLocations(ObjFile *file, uint32_t symIndex) { struct Location { Symbol *sym; std::pair fileLine; }; std::vector locations; for (Chunk *c : file->getChunks()) { auto *sc = dyn_cast(c); if (!sc) continue; for (const coff_relocation &r : sc->getRelocs()) { if (r.SymbolTableIndex != symIndex) continue; std::pair fileLine = getFileLine(sc, r.VirtualAddress); Symbol *sym = getSymbol(sc, r.VirtualAddress); if (!fileLine.first.empty() || sym) locations.push_back({sym, fileLine}); } } if (locations.empty()) return std::vector({"\n>>> referenced by " + toString(file)}); std::vector symbolLocations(locations.size()); size_t i = 0; for (Location loc : locations) { llvm::raw_string_ostream os(symbolLocations[i++]); os << "\n>>> referenced by "; if (!loc.fileLine.first.empty()) os << loc.fileLine.first << ":" << loc.fileLine.second << "\n>>> "; os << toString(file); if (loc.sym) os << ":(" << toString(*loc.sym) << ')'; } return symbolLocations; } std::vector getSymbolLocations(InputFile *file, uint32_t symIndex) { if (auto *o = dyn_cast(file)) return getSymbolLocations(o, symIndex); if (auto *b = dyn_cast(file)) return getSymbolLocations(b); llvm_unreachable("unsupported file type passed to getSymbolLocations"); return {}; } // For an undefined symbol, stores all files referencing it and the index of // the undefined symbol in each file. struct UndefinedDiag { Symbol *sym; struct File { InputFile *file; uint32_t symIndex; }; std::vector files; }; static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) { std::string out; llvm::raw_string_ostream os(out); os << "undefined symbol: " << toString(*undefDiag.sym); const size_t maxUndefReferences = 10; size_t i = 0, numRefs = 0; for (const UndefinedDiag::File &ref : undefDiag.files) { std::vector symbolLocations = getSymbolLocations(ref.file, ref.symIndex); numRefs += symbolLocations.size(); for (const std::string &s : symbolLocations) { if (i >= maxUndefReferences) break; os << s; i++; } } if (i < numRefs) os << "\n>>> referenced " << numRefs - i << " more times"; errorOrWarn(os.str()); } void SymbolTable::loadMinGWAutomaticImports() { for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); if (!undef) continue; if (!sym->isUsedInRegularObj) continue; if (undef->getWeakAlias()) continue; StringRef name = undef->getName(); if (name.startswith("__imp_")) continue; - // If we have an undefined symbol, but we have a Lazy representing a - // symbol we could load from file, make sure to load that. - Lazy *l = dyn_cast_or_null(find(("__imp_" + name).str())); - if (!l || l->pendingArchiveLoad) + // If we have an undefined symbol, but we have a lazy symbol we could + // load, load it. + Symbol *l = find(("__imp_" + name).str()); + if (!l || l->pendingArchiveLoad || !l->isLazy()) continue; - log("Loading lazy " + l->getName() + " from " + l->file->getName() + + log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() + " for automatic import"); - l->pendingArchiveLoad = true; - l->file->addMember(l->sym); + forceLazy(l); } } Defined *SymbolTable::impSymbol(StringRef name) { if (name.startswith("__imp_")) return nullptr; return dyn_cast_or_null(find(("__imp_" + name).str())); } bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { Defined *imp = impSymbol(name); if (!imp) return false; // Replace the reference directly to a variable with a reference // to the import address table instead. This obviously isn't right, // but we mark the symbol as isRuntimePseudoReloc, and a later pass // will add runtime pseudo relocations for every relocation against // this Symbol. The runtime pseudo relocation framework expects the // reference itself to point at the IAT entry. size_t impSize = 0; if (isa(imp)) { log("Automatically importing " + name + " from " + cast(imp)->getDLLName()); impSize = sizeof(DefinedImportData); } else if (isa(imp)) { log("Automatically importing " + name + " from " + toString(cast(imp)->file)); impSize = sizeof(DefinedRegular); } else { warn("unable to automatically import " + name + " from " + imp->getName() + " from " + toString(cast(imp)->file) + "; unexpected symbol type"); return false; } sym->replaceKeepingName(imp, impSize); sym->isRuntimePseudoReloc = true; // There may exist symbols named .refptr. which only consist // of a single pointer to . If it turns out is // automatically imported, we don't need to keep the .refptr. // pointer at all, but redirect all accesses to it to the IAT entry // for __imp_ instead, and drop the whole .refptr. chunk. DefinedRegular *refptr = dyn_cast_or_null(find((".refptr." + name).str())); if (refptr && refptr->getChunk()->getSize() == config->wordsize) { SectionChunk *sc = dyn_cast_or_null(refptr->getChunk()); if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) { log("Replacing .refptr." + name + " with " + imp->getName()); refptr->getChunk()->live = false; refptr->replaceKeepingName(imp, impSize); } } return true; } /// Helper function for reportUnresolvable and resolveRemainingUndefines. /// This function emits an "undefined symbol" diagnostic for each symbol in /// undefs. If localImports is not nullptr, it also emits a "locally /// defined symbol imported" diagnostic for symbols in localImports. /// objFiles and bitcodeFiles (if not nullptr) are used to report where /// undefined symbols are referenced. static void reportProblemSymbols(const SmallPtrSetImpl &undefs, const DenseMap *localImports, const std::vector objFiles, const std::vector *bitcodeFiles) { // Return early if there is nothing to report (which should be // the common case). if (undefs.empty() && (!localImports || localImports->empty())) return; for (Symbol *b : config->gcroot) { if (undefs.count(b)) errorOrWarn(": undefined symbol: " + toString(*b)); if (localImports) if (Symbol *imp = localImports->lookup(b)) warn(": locally defined symbol imported: " + toString(*imp) + " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); } std::vector undefDiags; DenseMap firstDiag; auto processFile = [&](InputFile *file, ArrayRef symbols) { uint32_t symIndex = (uint32_t)-1; for (Symbol *sym : symbols) { ++symIndex; if (!sym) continue; if (undefs.count(sym)) { auto it = firstDiag.find(sym); if (it == firstDiag.end()) { firstDiag[sym] = undefDiags.size(); undefDiags.push_back({sym, {{file, symIndex}}}); } else { undefDiags[it->second].files.push_back({file, symIndex}); } } if (localImports) if (Symbol *imp = localImports->lookup(sym)) warn(toString(file) + ": locally defined symbol imported: " + toString(*imp) + " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); } }; for (ObjFile *file : objFiles) processFile(file, file->getSymbols()); if (bitcodeFiles) for (BitcodeFile *file : *bitcodeFiles) processFile(file, file->getSymbols()); for (const UndefinedDiag &undefDiag : undefDiags) reportUndefinedSymbol(undefDiag); } void SymbolTable::reportUnresolvable() { SmallPtrSet undefs; for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); if (!undef) continue; if (undef->getWeakAlias()) continue; StringRef name = undef->getName(); if (name.startswith("__imp_")) { Symbol *imp = find(name.substr(strlen("__imp_"))); if (imp && isa(imp)) continue; } if (name.contains("_PchSym_")) continue; if (config->mingw && impSymbol(name)) continue; undefs.insert(sym); } reportProblemSymbols(undefs, /* localImports */ nullptr, ObjFile::instances, &BitcodeFile::instances); } void SymbolTable::resolveRemainingUndefines() { SmallPtrSet undefs; DenseMap localImports; for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); if (!undef) continue; if (!sym->isUsedInRegularObj) continue; StringRef name = undef->getName(); // A weak alias may have been resolved, so check for that. if (Defined *d = undef->getWeakAlias()) { // We want to replace Sym with D. However, we can't just blindly // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an // internal symbol, and internal symbols are stored as "unparented" // Symbols. For that reason we need to check which type of symbol we // are dealing with and copy the correct number of bytes. if (isa(d)) memcpy(sym, d, sizeof(DefinedRegular)); else if (isa(d)) memcpy(sym, d, sizeof(DefinedAbsolute)); else memcpy(sym, d, sizeof(SymbolUnion)); continue; } // If we can resolve a symbol by removing __imp_ prefix, do that. // This odd rule is for compatibility with MSVC linker. if (name.startswith("__imp_")) { Symbol *imp = find(name.substr(strlen("__imp_"))); if (imp && isa(imp)) { auto *d = cast(imp); replaceSymbol(sym, name, d); localImportChunks.push_back(cast(sym)->getChunk()); localImports[sym] = d; continue; } } // We don't want to report missing Microsoft precompiled headers symbols. // A proper message will be emitted instead in PDBLinker::aquirePrecompObj if (name.contains("_PchSym_")) continue; if (config->mingw && handleMinGWAutomaticImport(sym, name)) continue; // Remaining undefined symbols are not fatal if /force is specified. // They are replaced with dummy defined symbols. if (config->forceUnresolved) replaceSymbol(sym, name, 0); undefs.insert(sym); } reportProblemSymbols( undefs, config->warnLocallyDefinedImported ? &localImports : nullptr, ObjFile::instances, /* bitcode files no longer needed */ nullptr); } std::pair SymbolTable::insert(StringRef name) { bool inserted = false; Symbol *&sym = symMap[CachedHashStringRef(name)]; if (!sym) { sym = reinterpret_cast(make()); sym->isUsedInRegularObj = false; sym->pendingArchiveLoad = false; inserted = true; } return {sym, inserted}; } std::pair SymbolTable::insert(StringRef name, InputFile *file) { std::pair result = insert(name); if (!file || !isa(file)) result.first->isUsedInRegularObj = true; return result; } Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f, bool isWeakAlias) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name, f); - if (wasInserted || (isa(s) && isWeakAlias)) { + if (wasInserted || (s->isLazy() && isWeakAlias)) { replaceSymbol(s, name); return s; } - if (auto *l = dyn_cast(s)) { - if (!s->pendingArchiveLoad) { - s->pendingArchiveLoad = true; - l->file->addMember(l->sym); - } - } + if (s->isLazy()) + forceLazy(s); return s; } -void SymbolTable::addLazy(ArchiveFile *f, const Archive::Symbol &sym) { +void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) { StringRef name = sym.getName(); Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name); if (wasInserted) { - replaceSymbol(s, f, sym); + replaceSymbol(s, f, sym); return; } auto *u = dyn_cast(s); if (!u || u->weakAlias || s->pendingArchiveLoad) return; s->pendingArchiveLoad = true; f->addMember(sym); } +void SymbolTable::addLazyObject(LazyObjFile *f, StringRef n) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(n, f); + if (wasInserted) { + replaceSymbol(s, f, n); + return; + } + auto *u = dyn_cast(s); + if (!u || u->weakAlias || s->pendingArchiveLoad) + return; + s->pendingArchiveLoad = true; + f->fetch(); +} + void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile) { std::string msg = "duplicate symbol: " + toString(*existing) + " in " + toString(existing->getFile()) + " and in " + toString(newFile); if (config->forceMultiple) warn(msg); else error(msg); } Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; - if (wasInserted || isa(s) || isa(s)) + if (wasInserted || isa(s) || s->isLazy()) replaceSymbol(s, n, sym); else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; - if (wasInserted || isa(s) || isa(s)) + if (wasInserted || isa(s) || s->isLazy()) replaceSymbol(s, n, va); else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; - if (wasInserted || isa(s) || isa(s)) + if (wasInserted || isa(s) || s->isLazy()) replaceSymbol(s, n, c); else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addRegular(InputFile *f, StringRef n, const coff_symbol_generic *sym, SectionChunk *c) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, f); if (wasInserted || !isa(s)) replaceSymbol(s, f, n, /*IsCOMDAT*/ false, /*IsExternal*/ true, sym, c); else reportDuplicate(s, f); return s; } std::pair SymbolTable::addComdat(InputFile *f, StringRef n, const coff_symbol_generic *sym) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, f); if (wasInserted || !isa(s)) { replaceSymbol(s, f, n, /*IsCOMDAT*/ true, /*IsExternal*/ true, sym, nullptr); return {cast(s), true}; } auto *existingSymbol = cast(s); if (!existingSymbol->isCOMDAT) reportDuplicate(s, f); return {existingSymbol, false}; } Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size, const coff_symbol_generic *sym, CommonChunk *c) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, f); if (wasInserted || !isa(s)) replaceSymbol(s, f, n, size, sym, c); else if (auto *dc = dyn_cast(s)) if (size > dc->getSize()) replaceSymbol(s, f, n, size, sym, c); return s; } Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; - if (wasInserted || isa(s) || isa(s)) { + if (wasInserted || isa(s) || s->isLazy()) { replaceSymbol(s, n, f); return s; } reportDuplicate(s, f); return nullptr; } Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id, uint16_t machine) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name, nullptr); s->isUsedInRegularObj = true; - if (wasInserted || isa(s) || isa(s)) { + if (wasInserted || isa(s) || s->isLazy()) { replaceSymbol(s, name, id, machine); return s; } reportDuplicate(s, id->file); return nullptr; } void SymbolTable::addLibcall(StringRef name) { Symbol *sym = findUnderscore(name); if (!sym) return; - if (Lazy *l = dyn_cast(sym)) { + if (auto *l = dyn_cast(sym)) { MemoryBufferRef mb = l->getMemberBuffer(); - if (identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode) + if (isBitcode(mb)) + addUndefined(sym->getName()); + } else if (LazyObject *o = dyn_cast(sym)) { + if (isBitcode(o->file->mb)) addUndefined(sym->getName()); } } std::vector SymbolTable::getChunks() { std::vector res; for (ObjFile *file : ObjFile::instances) { ArrayRef v = file->getChunks(); res.insert(res.end(), v.begin(), v.end()); } return res; } Symbol *SymbolTable::find(StringRef name) { return symMap.lookup(CachedHashStringRef(name)); } Symbol *SymbolTable::findUnderscore(StringRef name) { if (config->machine == I386) return find(("_" + name).str()); return find(name); } // Return all symbols that start with Prefix, possibly ignoring the first // character of Prefix or the first character symbol. std::vector SymbolTable::getSymsWithPrefix(StringRef prefix) { std::vector syms; for (auto pair : symMap) { StringRef name = pair.first.val(); if (name.startswith(prefix) || name.startswith(prefix.drop_front()) || name.drop_front().startswith(prefix) || name.drop_front().startswith(prefix.drop_front())) { syms.push_back(pair.second); } } return syms; } Symbol *SymbolTable::findMangle(StringRef name) { if (Symbol *sym = find(name)) if (!isa(sym)) return sym; // Efficient fuzzy string lookup is impossible with a hash table, so iterate // the symbol table once and collect all possibly matching symbols into this // vector. Then compare each possibly matching symbol with each possible // mangling. std::vector syms = getSymsWithPrefix(name); auto findByPrefix = [&syms](const Twine &t) -> Symbol * { std::string prefix = t.str(); for (auto *s : syms) if (s->getName().startswith(prefix)) return s; return nullptr; }; // For non-x86, just look for C++ functions. if (config->machine != I386) return findByPrefix("?" + name + "@@Y"); if (!name.startswith("_")) return nullptr; // Search for x86 stdcall function. if (Symbol *s = findByPrefix(name + "@")) return s; // Search for x86 fastcall function. if (Symbol *s = findByPrefix("@" + name.substr(1) + "@")) return s; // Search for x86 vectorcall function. if (Symbol *s = findByPrefix(name.substr(1) + "@@")) return s; // Search for x86 C++ non-member function. return findByPrefix("?" + name.substr(1) + "@@Y"); } Symbol *SymbolTable::addUndefined(StringRef name) { return addUndefined(name, nullptr, false); } std::vector SymbolTable::compileBitcodeFiles() { lto.reset(new BitcodeCompiler); for (BitcodeFile *f : BitcodeFile::instances) lto->add(*f); return lto->compile(); } void SymbolTable::addCombinedLTOObjects() { if (BitcodeFile::instances.empty()) return; ScopedTimer t(ltoTimer); for (StringRef object : compileBitcodeFiles()) { auto *obj = make(MemoryBufferRef(object, "lto.tmp")); obj->parse(); ObjFile::instances.push_back(obj); } } } // namespace coff } // namespace lld Index: lld/trunk/COFF/Options.td =================================================================== --- lld/trunk/COFF/Options.td (revision 370815) +++ lld/trunk/COFF/Options.td (revision 370816) @@ -1,228 +1,232 @@ include "llvm/Option/OptParser.td" // link.exe accepts options starting with either a dash or a slash. // Flag that takes no arguments. class F : Flag<["/", "-", "/?", "-?"], name>; // Flag that takes one argument after ":". class P : Joined<["/", "-", "/?", "-?"], name#":">, HelpText; // Boolean flag which can be suffixed by ":no". Using it unsuffixed turns the // flag on and using it suffixed by ":no" turns it off. multiclass B { def "" : F, HelpText; def _no : F, HelpText; } def align : P<"align", "Section alignment">; def aligncomm : P<"aligncomm", "Set common symbol alignment">; def alternatename : P<"alternatename", "Define weak alias">; def base : P<"base", "Base address of the program">; def color_diagnostics: Flag<["--"], "color-diagnostics">, HelpText<"Use colors in diagnostics">; def color_diagnostics_eq: Joined<["--"], "color-diagnostics=">, HelpText<"Use colors in diagnostics; one of 'always', 'never', 'auto'">; def defaultlib : P<"defaultlib", "Add the library to the list of input files">; def delayload : P<"delayload", "Delay loaded DLL name">; def entry : P<"entry", "Name of entry point symbol">; def errorlimit : P<"errorlimit", "Maximum number of errors to emit before stopping (0 = no limit)">; def export : P<"export", "Export a function">; // No help text because /failifmismatch is not intended to be used by the user. def failifmismatch : P<"failifmismatch", "">; def filealign : P<"filealign", "Section alignment in the output file">; def functionpadmin : F<"functionpadmin">; def functionpadmin_opt : P<"functionpadmin", "Prepares an image for hotpatching">; def guard : P<"guard", "Control flow guard">; def heap : P<"heap", "Size of the heap">; def ignore : P<"ignore", "Specify warning codes to ignore">; def implib : P<"implib", "Import library name">; def lib : F<"lib">, HelpText<"Act like lib.exe; must be first argument if present">; def libpath : P<"libpath", "Additional library search path">; def linkrepro : P<"linkrepro", "Dump linker invocation and input files for debugging">; def lldltocache : P<"lldltocache", "Path to ThinLTO cached object file directory">; def lldltocachepolicy : P<"lldltocachepolicy", "Pruning policy for the ThinLTO cache">; def lldsavetemps : F<"lldsavetemps">, HelpText<"Save temporary files instead of deleting them">; def machine : P<"machine", "Specify target platform">; def merge : P<"merge", "Combine sections">; def mllvm : P<"mllvm", "Options to pass to LLVM">; def nodefaultlib : P<"nodefaultlib", "Remove a default library">; def opt : P<"opt", "Control optimizations">; def order : P<"order", "Put functions in order">; def out : P<"out", "Path to file to write output">; def natvis : P<"natvis", "Path to natvis file to embed in the PDB">; def no_color_diagnostics: F<"no-color-diagnostics">, HelpText<"Do not use colors in diagnostics">; def pdb : P<"pdb", "PDB file path">; def pdbaltpath : P<"pdbaltpath", "PDB file path to embed in the image">; def section : P<"section", "Specify section attributes">; def stack : P<"stack", "Size of the stack">; def stub : P<"stub", "Specify DOS stub file">; def subsystem : P<"subsystem", "Specify subsystem">; def timestamp : P<"timestamp", "Specify the PE header timestamp">; def version : P<"version", "Specify a version number in the PE header">; def wholearchive_file : P<"wholearchive", "Include all object files from this archive">; def disallowlib : Joined<["/", "-", "/?", "-?"], "disallowlib:">, Alias; def manifest : F<"manifest">, HelpText<"Create .manifest file">; def manifest_colon : P< "manifest", "NO disables manifest output; EMBED[,ID=#] embeds manifest as resource in the image">; def manifestuac : P<"manifestuac", "User access control">; def manifestfile : P<"manifestfile", "Manifest output path, with /manifest">; def manifestdependency : P< "manifestdependency", "Attributes for element in manifest file; implies /manifest">; def manifestinput : P< "manifestinput", "Additional manifest inputs; only valid with /manifest:embed">; // We cannot use multiclass P because class name "incl" is different // from its command line option name. We do this because "include" is // a reserved keyword in tablegen. def incl : Joined<["/", "-", "/?", "-?"], "include:">, HelpText<"Force symbol to be added to symbol table as undefined one">; // "def" is also a keyword. def deffile : Joined<["/", "-", "/?", "-?"], "def:">, HelpText<"Use module-definition file">; def debug : F<"debug">, HelpText<"Embed a symbol table in the image">; def debug_opt : P<"debug", "Embed a symbol table in the image with option">; def debugtype : P<"debugtype", "Debug Info Options">; def dll : F<"dll">, HelpText<"Create a DLL">; def driver : P<"driver", "Generate a Windows NT Kernel Mode Driver">; def nodefaultlib_all : F<"nodefaultlib">, HelpText<"Remove all default libraries">; def noentry : F<"noentry">, HelpText<"Don't add reference to DllMainCRTStartup; only valid with /dll">; def profile : F<"profile">; def repro : F<"Brepro">, HelpText<"Use a hash of the executable as the PE header timestamp">; def swaprun : P<"swaprun", "Comma-separated list of 'cd' or 'net'">; def swaprun_cd : F<"swaprun:cd">, Alias, AliasArgs<["cd"]>, HelpText<"Make loader run output binary from swap instead of from CD">; def swaprun_net : F<"swaprun:net">, Alias, AliasArgs<["net"]>, HelpText<"Make loader run output binary from swap instead of from network">; def verbose : F<"verbose">; def wholearchive_flag : F<"wholearchive">; def force : F<"force">, HelpText<"Allow undefined and multiply defined symbols when creating executables">; def force_unresolved : F<"force:unresolved">, HelpText<"Allow undefined symbols when creating executables">; def force_multiple : F<"force:multiple">, HelpText<"Allow multiply defined symbols when creating executables">; def force_multipleres : F<"force:multipleres">, HelpText<"Allow multiply defined resources when creating executables">; defm WX : B<"WX", "Treat warnings as errors", "Don't treat warnings as errors">; defm allowbind : B<"allowbind", "Enable DLL binding (default)", "Disable DLL binding">; defm allowisolation : B<"allowisolation", "Enable DLL isolation (default)", "Disable DLL isolation">; defm appcontainer : B<"appcontainer", "Image can only be run in an app container", "Image can run outside an app container (default)">; defm dynamicbase : B<"dynamicbase", "Enable ASLR (default unless /fixed)", "Disable ASLR (default when /fixed)">; defm fixed : B<"fixed", "Disable base relocations", "Enable base relocations (default)">; defm highentropyva : B<"highentropyva", "Enable 64-bit ASLR (default on 64-bit)", "Disable 64-bit ASLR">; defm incremental : B<"incremental", "Keep original import library if contents are unchanged", "Overwrite import library even if contents are unchanged">; defm integritycheck : B<"integritycheck", "Set FORCE_INTEGRITY bit in PE header", "No effect (default)">; defm largeaddressaware : B<"largeaddressaware", "Enable large addresses (default on 64-bit)", "Disable large addresses (default on 32-bit)">; defm nxcompat : B<"nxcompat", "Enable data execution prevention (default)", "Disable data execution provention">; defm safeseh : B<"safeseh", "Produce an image with Safe Exception Handler (only for x86)", "Don't produce an image with Safe Exception Handler">; defm tsaware : B<"tsaware", "Create Terminal Server aware executable (default)", "Create non-Terminal Server aware executable">; def help : F<"help">; // /?? and -?? must be before /? and -? to not confuse lib/Options. def help_q : Flag<["/??", "-??", "/?", "-?"], "">, Alias; // LLD extensions +def end_lib : F<"end-lib">, + HelpText<"End a grouping of objects that should be treated as if they were together in an archive">; def exclude_all_symbols : F<"exclude-all-symbols">; def export_all_symbols : F<"export-all-symbols">; defm demangle : B<"demangle", "Demangle symbols in output (default)", "Do not demangle symbols in output">; def include_optional : Joined<["/", "-", "/?", "-?"], "includeoptional:">, HelpText<"Add symbol as undefined, but allow it to remain undefined">; def kill_at : F<"kill-at">; def lldmingw : F<"lldmingw">; def output_def : Joined<["/", "-", "/?", "-?"], "output-def:">; def pdb_source_path : P<"pdbsourcepath", "Base path used to make relative source file path absolute in PDB">; def rsp_quoting : Joined<["--"], "rsp-quoting=">, HelpText<"Quoting style for response files, 'windows' (default) or 'posix'">; +def start_lib : F<"start-lib">, + HelpText<"Start a grouping of objects that should be treated as if they were together in an archive">; def thinlto_emit_imports_files : F<"thinlto-emit-imports-files">, HelpText<"Emit .imports files with -thinlto-index-only">; def thinlto_index_only : F<"thinlto-index-only">, HelpText<"Instead of linking, emit ThinLTO index files">; def thinlto_index_only_arg : P< "thinlto-index-only", "-thinlto-index-only and also write native module names to file">; def thinlto_object_suffix_replace : P< "thinlto-object-suffix-replace", "'old;new' replace old suffix with new suffix in ThinLTO index">; def thinlto_prefix_replace: P< "thinlto-prefix-replace", "'old;new' replace old prefix with new prefix in ThinLTO outputs">; def lto_obj_path : P< "lto-obj-path", "output native object for merged LTO unit to this path">; def dash_dash_version : Flag<["--"], "version">, HelpText<"Print version information">; defm threads: B<"threads", "Run the linker multi-threaded (default)", "Do not run the linker multi-threaded">; // Flags for debugging def lldmap : F<"lldmap">; def lldmap_file : Joined<["/", "-", "/?", "-?"], "lldmap:">; def show_timing : F<"time">; def summary : F<"summary">; //============================================================================== // The flags below do nothing. They are defined only for link.exe compatibility. //============================================================================== class QF : Joined<["/", "-", "/?", "-?"], name#":">; def ignoreidl : F<"ignoreidl">; def nologo : F<"nologo">; def throwingnew : F<"throwingnew">; def editandcontinue : F<"editandcontinue">; def fastfail : F<"fastfail">; def delay : QF<"delay">; def errorreport : QF<"errorreport">; def idlout : QF<"idlout">; def maxilksize : QF<"maxilksize">; def tlbid : QF<"tlbid">; def tlbout : QF<"tlbout">; def verbose_all : QF<"verbose">; def guardsym : QF<"guardsym">; Index: lld/trunk/COFF/InputFiles.cpp =================================================================== --- lld/trunk/COFF/InputFiles.cpp (revision 370815) +++ lld/trunk/COFF/InputFiles.cpp (revision 370816) @@ -1,883 +1,929 @@ //===- InputFiles.cpp -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "InputFiles.h" #include "Chunks.h" #include "Config.h" #include "DebugTypes.h" #include "Driver.h" #include "SymbolTable.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm-c/lto.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Target/TargetOptions.h" #include #include #include using namespace llvm; using namespace llvm::COFF; using namespace llvm::codeview; using namespace llvm::object; using namespace llvm::support::endian; using llvm::Triple; using llvm::support::ulittle32_t; namespace lld { namespace coff { std::vector ObjFile::instances; std::vector ImportFile::instances; std::vector BitcodeFile::instances; /// Checks that Source is compatible with being a weak alias to Target. /// If Source is Undefined and has no weak alias set, makes it a weak /// alias to Target. static void checkAndSetWeakAlias(SymbolTable *symtab, InputFile *f, Symbol *source, Symbol *target) { if (auto *u = dyn_cast(source)) { if (u->weakAlias && u->weakAlias != target) { // Weak aliases as produced by GCC are named in the form // .weak.., where is the name // of another symbol emitted near the weak symbol. // Just use the definition from the first object file that defined // this weak symbol. if (config->mingw) return; symtab->reportDuplicate(source, f); } u->weakAlias = target; } } +static bool ignoredSymbolName(StringRef name) { + return name == "@feat.00" || name == "@comp.id"; +} + ArchiveFile::ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {} void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. file = CHECK(Archive::create(mb), this); // Read the symbol table to construct Lazy objects. for (const Archive::Symbol &sym : file->symbols()) - symtab->addLazy(this, sym); + symtab->addLazyArchive(this, sym); } // Returns a buffer pointing to a member file containing a given symbol. void ArchiveFile::addMember(const Archive::Symbol &sym) { const Archive::Child &c = CHECK(sym.getMember(), "could not get the member for symbol " + toCOFFString(sym)); // Return an empty buffer if we have already returned the same buffer. if (!seen.insert(c.getChildOffset()).second) return; driver->enqueueArchiveMember(c, sym, getName()); } std::vector getArchiveMembers(Archive *file) { std::vector v; Error err = Error::success(); for (const ErrorOr &cOrErr : file->children(err)) { Archive::Child c = CHECK(cOrErr, file->getFileName() + ": could not get the child of the archive"); MemoryBufferRef mbref = CHECK(c.getMemoryBufferRef(), file->getFileName() + ": could not get the buffer for a child of the archive"); v.push_back(mbref); } if (err) fatal(file->getFileName() + ": Archive::children failed: " + toString(std::move(err))); return v; } +void LazyObjFile::fetch() { + if (mb.getBuffer().empty()) + return; + + InputFile *file; + if (isBitcode(mb)) + file = make(mb, "", 0, std::move(symbols)); + else + file = make(mb, std::move(symbols)); + mb = {}; + symtab->addFile(file); +} + +void LazyObjFile::parse() { + if (isBitcode(this->mb)) { + // Bitcode file. + std::unique_ptr obj = + CHECK(lto::InputFile::create(this->mb), this); + for (const lto::InputFile::Symbol &sym : obj->symbols()) { + if (!sym.isUndefined()) + symtab->addLazyObject(this, sym.getName()); + } + return; + } + + // Native object file. + std::unique_ptr coffObjPtr = CHECK(createBinary(mb), this); + COFFObjectFile *coffObj = cast(coffObjPtr.get()); + uint32_t numSymbols = coffObj->getNumberOfSymbols(); + for (uint32_t i = 0; i < numSymbols; ++i) { + COFFSymbolRef coffSym = check(coffObj->getSymbol(i)); + if (coffSym.isUndefined() || !coffSym.isExternal() || + coffSym.isWeakExternal()) + continue; + StringRef name; + coffObj->getSymbolName(coffSym, name); + if (coffSym.isAbsolute() && ignoredSymbolName(name)) + continue; + symtab->addLazyObject(this, name); + i += coffSym.getNumberOfAuxSymbols(); + } +} + void ObjFile::parse() { // Parse a memory buffer as a COFF file. std::unique_ptr bin = CHECK(createBinary(mb), this); if (auto *obj = dyn_cast(bin.get())) { bin.release(); coffObj.reset(obj); } else { fatal(toString(this) + " is not a COFF file"); } // Read section and symbol tables. initializeChunks(); initializeSymbols(); initializeFlags(); initializeDependencies(); } const coff_section* ObjFile::getSection(uint32_t i) { const coff_section *sec; if (auto ec = coffObj->getSection(i, sec)) fatal("getSection failed: #" + Twine(i) + ": " + ec.message()); return sec; } // We set SectionChunk pointers in the SparseChunks vector to this value // temporarily to mark comdat sections as having an unknown resolution. As we // walk the object file's symbol table, once we visit either a leader symbol or // an associative section definition together with the parent comdat's leader, // we set the pointer to either nullptr (to mark the section as discarded) or a // valid SectionChunk for that section. static SectionChunk *const pendingComdat = reinterpret_cast(1); void ObjFile::initializeChunks() { uint32_t numSections = coffObj->getNumberOfSections(); chunks.reserve(numSections); sparseChunks.resize(numSections + 1); for (uint32_t i = 1; i < numSections + 1; ++i) { const coff_section *sec = getSection(i); if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT) sparseChunks[i] = pendingComdat; else sparseChunks[i] = readSection(i, nullptr, ""); } } SectionChunk *ObjFile::readSection(uint32_t sectionNumber, const coff_aux_section_definition *def, StringRef leaderName) { const coff_section *sec = getSection(sectionNumber); StringRef name; if (Expected e = coffObj->getSectionName(sec)) name = *e; else fatal("getSectionName failed: #" + Twine(sectionNumber) + ": " + toString(e.takeError())); if (name == ".drectve") { ArrayRef data; cantFail(coffObj->getSectionContents(sec, data)); directives = StringRef((const char *)data.data(), data.size()); return nullptr; } if (name == ".llvm_addrsig") { addrsigSec = sec; return nullptr; } // Object files may have DWARF debug info or MS CodeView debug info // (or both). // // DWARF sections don't need any special handling from the perspective // of the linker; they are just a data section containing relocations. // We can just link them to complete debug info. // // CodeView needs linker support. We need to interpret debug info, // and then write it to a separate .pdb file. // Ignore DWARF debug info unless /debug is given. if (!config->debug && name.startswith(".debug_")) return nullptr; if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) return nullptr; auto *c = make(this, sec); if (def) c->checksum = def->CheckSum; // CodeView sections are stored to a different vector because they are not // linked in the regular manner. if (c->isCodeView()) debugChunks.push_back(c); else if (name == ".gfids$y") guardFidChunks.push_back(c); else if (name == ".gljmp$y") guardLJmpChunks.push_back(c); else if (name == ".sxdata") sXDataChunks.push_back(c); else if (config->tailMerge && sec->NumberOfRelocations == 0 && name == ".rdata" && leaderName.startswith("??_C@")) // COFF sections that look like string literal sections (i.e. no // relocations, in .rdata, leader symbol name matches the MSVC name mangling // for string literals) are subject to string tail merging. MergeChunk::addSection(c); else if (name == ".rsrc" || name.startswith(".rsrc$")) resourceChunks.push_back(c); else chunks.push_back(c); return c; } void ObjFile::includeResourceChunks() { chunks.insert(chunks.end(), resourceChunks.begin(), resourceChunks.end()); } void ObjFile::readAssociativeDefinition( COFFSymbolRef sym, const coff_aux_section_definition *def) { readAssociativeDefinition(sym, def, def->getNumber(sym.isBigObj())); } void ObjFile::readAssociativeDefinition(COFFSymbolRef sym, const coff_aux_section_definition *def, uint32_t parentIndex) { SectionChunk *parent = sparseChunks[parentIndex]; int32_t sectionNumber = sym.getSectionNumber(); auto diag = [&]() { StringRef name, parentName; coffObj->getSymbolName(sym, name); const coff_section *parentSec = getSection(parentIndex); if (Expected e = coffObj->getSectionName(parentSec)) parentName = *e; error(toString(this) + ": associative comdat " + name + " (sec " + Twine(sectionNumber) + ") has invalid reference to section " + parentName + " (sec " + Twine(parentIndex) + ")"); }; if (parent == pendingComdat) { // This can happen if an associative comdat refers to another associative // comdat that appears after it (invalid per COFF spec) or to a section // without any symbols. diag(); return; } // Check whether the parent is prevailing. If it is, so are we, and we read // the section; otherwise mark it as discarded. if (parent) { SectionChunk *c = readSection(sectionNumber, def, ""); sparseChunks[sectionNumber] = c; if (c) { c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE; parent->addAssociative(c); } } else { sparseChunks[sectionNumber] = nullptr; } } void ObjFile::recordPrevailingSymbolForMingw( COFFSymbolRef sym, DenseMap &prevailingSectionMap) { // For comdat symbols in executable sections, where this is the copy // of the section chunk we actually include instead of discarding it, // add the symbol to a map to allow using it for implicitly // associating .[px]data$ sections to it. int32_t sectionNumber = sym.getSectionNumber(); SectionChunk *sc = sparseChunks[sectionNumber]; if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) { StringRef name; coffObj->getSymbolName(sym, name); if (getMachineType() == I386) name.consume_front("_"); prevailingSectionMap[name] = sectionNumber; } } void ObjFile::maybeAssociateSEHForMingw( COFFSymbolRef sym, const coff_aux_section_definition *def, const DenseMap &prevailingSectionMap) { StringRef name; coffObj->getSymbolName(sym, name); if (name.consume_front(".pdata$") || name.consume_front(".xdata$") || name.consume_front(".eh_frame$")) { // For MinGW, treat .[px]data$ and .eh_frame$ as implicitly // associative to the symbol . auto parentSym = prevailingSectionMap.find(name); if (parentSym != prevailingSectionMap.end()) readAssociativeDefinition(sym, def, parentSym->second); } } Symbol *ObjFile::createRegular(COFFSymbolRef sym) { SectionChunk *sc = sparseChunks[sym.getSectionNumber()]; if (sym.isExternal()) { StringRef name; coffObj->getSymbolName(sym, name); if (sc) return symtab->addRegular(this, name, sym.getGeneric(), sc); // For MinGW symbols named .weak.* that point to a discarded section, // don't create an Undefined symbol. If nothing ever refers to the symbol, // everything should be fine. If something actually refers to the symbol // (e.g. the undefined weak alias), linking will fail due to undefined // references at the end. if (config->mingw && name.startswith(".weak.")) return nullptr; return symtab->addUndefined(name, this, false); } if (sc) return make(this, /*Name*/ "", /*IsCOMDAT*/ false, /*IsExternal*/ false, sym.getGeneric(), sc); return nullptr; } void ObjFile::initializeSymbols() { uint32_t numSymbols = coffObj->getNumberOfSymbols(); symbols.resize(numSymbols); SmallVector, 8> weakAliases; std::vector pendingIndexes; pendingIndexes.reserve(numSymbols); DenseMap prevailingSectionMap; std::vector comdatDefs( coffObj->getNumberOfSections() + 1); for (uint32_t i = 0; i < numSymbols; ++i) { COFFSymbolRef coffSym = check(coffObj->getSymbol(i)); bool prevailingComdat; if (coffSym.isUndefined()) { symbols[i] = createUndefined(coffSym); } else if (coffSym.isWeakExternal()) { symbols[i] = createUndefined(coffSym); uint32_t tagIndex = coffSym.getAux()->TagIndex; weakAliases.emplace_back(symbols[i], tagIndex); } else if (Optional optSym = createDefined(coffSym, comdatDefs, prevailingComdat)) { symbols[i] = *optSym; if (config->mingw && prevailingComdat) recordPrevailingSymbolForMingw(coffSym, prevailingSectionMap); } else { // createDefined() returns None if a symbol belongs to a section that // was pending at the point when the symbol was read. This can happen in // two cases: // 1) section definition symbol for a comdat leader; // 2) symbol belongs to a comdat section associated with another section. // In both of these cases, we can expect the section to be resolved by // the time we finish visiting the remaining symbols in the symbol // table. So we postpone the handling of this symbol until that time. pendingIndexes.push_back(i); } i += coffSym.getNumberOfAuxSymbols(); } for (uint32_t i : pendingIndexes) { COFFSymbolRef sym = check(coffObj->getSymbol(i)); if (const coff_aux_section_definition *def = sym.getSectionDefinition()) { if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) readAssociativeDefinition(sym, def); else if (config->mingw) maybeAssociateSEHForMingw(sym, def, prevailingSectionMap); } if (sparseChunks[sym.getSectionNumber()] == pendingComdat) { StringRef name; coffObj->getSymbolName(sym, name); log("comdat section " + name + " without leader and unassociated, discarding"); continue; } symbols[i] = createRegular(sym); } for (auto &kv : weakAliases) { Symbol *sym = kv.first; uint32_t idx = kv.second; checkAndSetWeakAlias(symtab, this, sym, symbols[idx]); } } Symbol *ObjFile::createUndefined(COFFSymbolRef sym) { StringRef name; coffObj->getSymbolName(sym, name); return symtab->addUndefined(name, this, sym.isWeakExternal()); } void ObjFile::handleComdatSelection(COFFSymbolRef sym, COMDATType &selection, bool &prevailing, DefinedRegular *leader) { if (prevailing) return; // There's already an existing comdat for this symbol: `Leader`. // Use the comdats's selection field to determine if the new // symbol in `Sym` should be discarded, produce a duplicate symbol // error, etc. SectionChunk *leaderChunk = nullptr; COMDATType leaderSelection = IMAGE_COMDAT_SELECT_ANY; if (leader->data) { leaderChunk = leader->getChunk(); leaderSelection = leaderChunk->selection; } else { // FIXME: comdats from LTO files don't know their selection; treat them // as "any". selection = leaderSelection; } if ((selection == IMAGE_COMDAT_SELECT_ANY && leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) || (selection == IMAGE_COMDAT_SELECT_LARGEST && leaderSelection == IMAGE_COMDAT_SELECT_ANY)) { // cl.exe picks "any" for vftables when building with /GR- and // "largest" when building with /GR. To be able to link object files // compiled with each flag, "any" and "largest" are merged as "largest". leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST; } // Other than that, comdat selections must match. This is a bit more // strict than link.exe which allows merging "any" and "largest" if "any" // is the first symbol the linker sees, and it allows merging "largest" // with everything (!) if "largest" is the first symbol the linker sees. // Making this symmetric independent of which selection is seen first // seems better though. // (This behavior matches ModuleLinker::getComdatResult().) if (selection != leaderSelection) { log(("conflicting comdat type for " + toString(*leader) + ": " + Twine((int)leaderSelection) + " in " + toString(leader->getFile()) + " and " + Twine((int)selection) + " in " + toString(this)) .str()); symtab->reportDuplicate(leader, this); return; } switch (selection) { case IMAGE_COMDAT_SELECT_NODUPLICATES: symtab->reportDuplicate(leader, this); break; case IMAGE_COMDAT_SELECT_ANY: // Nothing to do. break; case IMAGE_COMDAT_SELECT_SAME_SIZE: if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) symtab->reportDuplicate(leader, this); break; case IMAGE_COMDAT_SELECT_EXACT_MATCH: { SectionChunk newChunk(this, getSection(sym)); // link.exe only compares section contents here and doesn't complain // if the two comdat sections have e.g. different alignment. // Match that. if (leaderChunk->getContents() != newChunk.getContents()) symtab->reportDuplicate(leader, this); break; } case IMAGE_COMDAT_SELECT_ASSOCIATIVE: // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE. // (This means lld-link doesn't produce duplicate symbol errors for // associative comdats while link.exe does, but associate comdats // are never extern in practice.) llvm_unreachable("createDefined not called for associative comdats"); case IMAGE_COMDAT_SELECT_LARGEST: if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) { // Replace the existing comdat symbol with the new one. StringRef name; coffObj->getSymbolName(sym, name); // FIXME: This is incorrect: With /opt:noref, the previous sections // make it into the final executable as well. Correct handling would // be to undo reading of the whole old section that's being replaced, // or doing one pass that determines what the final largest comdat // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading // only the largest one. replaceSymbol(leader, this, name, /*IsCOMDAT*/ true, /*IsExternal*/ true, sym.getGeneric(), nullptr); prevailing = true; } break; case IMAGE_COMDAT_SELECT_NEWEST: llvm_unreachable("should have been rejected earlier"); } } Optional ObjFile::createDefined( COFFSymbolRef sym, std::vector &comdatDefs, bool &prevailing) { prevailing = false; auto getName = [&]() { StringRef s; coffObj->getSymbolName(sym, s); return s; }; if (sym.isCommon()) { auto *c = make(sym); chunks.push_back(c); return symtab->addCommon(this, getName(), sym.getValue(), sym.getGeneric(), c); } if (sym.isAbsolute()) { StringRef name = getName(); - // Skip special symbols. - if (name == "@comp.id") - return nullptr; - if (name == "@feat.00") { + if (name == "@feat.00") feat00Flags = sym.getValue(); + // Skip special symbols. + if (ignoredSymbolName(name)) return nullptr; - } if (sym.isExternal()) return symtab->addAbsolute(name, sym); return make(name, sym); } int32_t sectionNumber = sym.getSectionNumber(); if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) return nullptr; if (llvm::COFF::isReservedSectionNumber(sectionNumber)) fatal(toString(this) + ": " + getName() + " should not refer to special section " + Twine(sectionNumber)); if ((uint32_t)sectionNumber >= sparseChunks.size()) fatal(toString(this) + ": " + getName() + " should not refer to non-existent section " + Twine(sectionNumber)); // Comdat handling. // A comdat symbol consists of two symbol table entries. // The first symbol entry has the name of the section (e.g. .text), fixed // values for the other fields, and one auxilliary record. // The second symbol entry has the name of the comdat symbol, called the // "comdat leader". // When this function is called for the first symbol entry of a comdat, // it sets comdatDefs and returns None, and when it's called for the second // symbol entry it reads comdatDefs and then sets it back to nullptr. // Handle comdat leader. if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) { comdatDefs[sectionNumber] = nullptr; DefinedRegular *leader; if (sym.isExternal()) { std::tie(leader, prevailing) = symtab->addComdat(this, getName(), sym.getGeneric()); } else { leader = make(this, /*Name*/ "", /*IsCOMDAT*/ false, /*IsExternal*/ false, sym.getGeneric()); prevailing = true; } if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES || // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either. def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) { fatal("unknown comdat type " + std::to_string((int)def->Selection) + " for " + getName() + " in " + toString(this)); } COMDATType selection = (COMDATType)def->Selection; if (leader->isCOMDAT) handleComdatSelection(sym, selection, prevailing, leader); if (prevailing) { SectionChunk *c = readSection(sectionNumber, def, getName()); sparseChunks[sectionNumber] = c; c->sym = cast(leader); c->selection = selection; cast(leader)->data = &c->repl; } else { sparseChunks[sectionNumber] = nullptr; } return leader; } // Prepare to handle the comdat leader symbol by setting the section's // ComdatDefs pointer if we encounter a non-associative comdat. if (sparseChunks[sectionNumber] == pendingComdat) { if (const coff_aux_section_definition *def = sym.getSectionDefinition()) { if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE) comdatDefs[sectionNumber] = def; } return None; } return createRegular(sym); } MachineTypes ObjFile::getMachineType() { if (coffObj) return static_cast(coffObj->getMachine()); return IMAGE_FILE_MACHINE_UNKNOWN; } ArrayRef ObjFile::getDebugSection(StringRef secName) { if (SectionChunk *sec = SectionChunk::findByName(debugChunks, secName)) return sec->consumeDebugMagic(); return {}; } // OBJ files systematically store critical informations in a .debug$S stream, // even if the TU was compiled with no debug info. At least two records are // always there. S_OBJNAME stores a 32-bit signature, which is loaded into the // PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is // currently used to initialize the hotPatchable member. void ObjFile::initializeFlags() { ArrayRef data = getDebugSection(".debug$S"); if (data.empty()) return; DebugSubsectionArray subsections; BinaryStreamReader reader(data, support::little); ExitOnError exitOnErr; exitOnErr(reader.readArray(subsections, data.size())); for (const DebugSubsectionRecord &ss : subsections) { if (ss.kind() != DebugSubsectionKind::Symbols) continue; unsigned offset = 0; // Only parse the first two records. We are only looking for S_OBJNAME // and S_COMPILE3, and they usually appear at the beginning of the // stream. for (unsigned i = 0; i < 2; ++i) { Expected sym = readSymbolFromStream(ss.getRecordData(), offset); if (!sym) { consumeError(sym.takeError()); return; } if (sym->kind() == SymbolKind::S_COMPILE3) { auto cs = cantFail(SymbolDeserializer::deserializeAs(sym.get())); hotPatchable = (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None; } if (sym->kind() == SymbolKind::S_OBJNAME) { auto objName = cantFail(SymbolDeserializer::deserializeAs( sym.get())); pchSignature = objName.Signature; } offset += sym->length(); } } } // Depending on the compilation flags, OBJs can refer to external files, // necessary to merge this OBJ into the final PDB. We currently support two // types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu. // And PDB type servers, when compiling with /Zi. This function extracts these // dependencies and makes them available as a TpiSource interface (see // DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular // output even with /Yc and /Yu and with /Zi. void ObjFile::initializeDependencies() { if (!config->debug) return; bool isPCH = false; ArrayRef data = getDebugSection(".debug$P"); if (!data.empty()) isPCH = true; else data = getDebugSection(".debug$T"); if (data.empty()) return; CVTypeArray types; BinaryStreamReader reader(data, support::little); cantFail(reader.readArray(types, reader.getLength())); CVTypeArray::Iterator firstType = types.begin(); if (firstType == types.end()) return; debugTypes.emplace(types); if (isPCH) { debugTypesObj = makePrecompSource(this); return; } if (firstType->kind() == LF_TYPESERVER2) { TypeServer2Record ts = cantFail( TypeDeserializer::deserializeAs(firstType->data())); debugTypesObj = makeUseTypeServerSource(this, &ts); return; } if (firstType->kind() == LF_PRECOMP) { PrecompRecord precomp = cantFail( TypeDeserializer::deserializeAs(firstType->data())); debugTypesObj = makeUsePrecompSource(this, &precomp); return; } debugTypesObj = makeTpiSource(this); } StringRef ltrim1(StringRef s, const char *chars) { if (!s.empty() && strchr(chars, s[0])) return s.substr(1); return s; } void ImportFile::parse() { const char *buf = mb.getBufferStart(); const auto *hdr = reinterpret_cast(buf); // Check if the total size is valid. if (mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData) fatal("broken import library"); // Read names and create an __imp_ symbol. StringRef name = saver.save(StringRef(buf + sizeof(*hdr))); StringRef impName = saver.save("__imp_" + name); const char *nameStart = buf + sizeof(coff_import_header) + name.size() + 1; dllName = StringRef(nameStart); StringRef extName; switch (hdr->getNameType()) { case IMPORT_ORDINAL: extName = ""; break; case IMPORT_NAME: extName = name; break; case IMPORT_NAME_NOPREFIX: extName = ltrim1(name, "?@_"); break; case IMPORT_NAME_UNDECORATE: extName = ltrim1(name, "?@_"); extName = extName.substr(0, extName.find('@')); break; } this->hdr = hdr; externalName = extName; impSym = symtab->addImportData(impName, this); // If this was a duplicate, we logged an error but may continue; // in this case, impSym is nullptr. if (!impSym) return; if (hdr->getType() == llvm::COFF::IMPORT_CONST) static_cast(symtab->addImportData(name, this)); // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) if (hdr->getType() == llvm::COFF::IMPORT_CODE) thunkSym = symtab->addImportThunk( name, cast_or_null(impSym), hdr->Machine); } BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) - : InputFile(BitcodeKind, mb) { + uint64_t offsetInArchive, + std::vector &&symbols) + : InputFile(BitcodeKind, mb), symbols(std::move(symbols)) { std::string path = mb.getBufferIdentifier().str(); if (config->thinLTOIndexOnly) path = replaceThinLTOSuffix(mb.getBufferIdentifier()); // ThinLTO assumes that all MemoryBufferRefs given to it have a unique // name. If two archives define two members with the same name, this // causes a collision which result in only one of the objects being taken // into consideration at LTO time (which very likely causes undefined // symbols later in the link stage). So we append file offset to make // filename unique. MemoryBufferRef mbref( mb.getBuffer(), saver.save(archiveName + path + (archiveName.empty() ? "" : utostr(offsetInArchive)))); obj = check(lto::InputFile::create(mbref)); } void BitcodeFile::parse() { std::vector> comdat(obj->getComdatTable().size()); for (size_t i = 0; i != obj->getComdatTable().size(); ++i) // FIXME: lto::InputFile doesn't keep enough data to do correct comdat // selection handling. comdat[i] = symtab->addComdat(this, saver.save(obj->getComdatTable()[i])); for (const lto::InputFile::Symbol &objSym : obj->symbols()) { StringRef symName = saver.save(objSym.getName()); int comdatIndex = objSym.getComdatIndex(); Symbol *sym; if (objSym.isUndefined()) { sym = symtab->addUndefined(symName, this, false); } else if (objSym.isCommon()) { sym = symtab->addCommon(this, symName, objSym.getCommonSize()); } else if (objSym.isWeak() && objSym.isIndirect()) { // Weak external. sym = symtab->addUndefined(symName, this, true); std::string fallback = objSym.getCOFFWeakExternalFallback(); Symbol *alias = symtab->addUndefined(saver.save(fallback)); checkAndSetWeakAlias(symtab, this, sym, alias); } else if (comdatIndex != -1) { if (symName == obj->getComdatTable()[comdatIndex]) sym = comdat[comdatIndex].first; else if (comdat[comdatIndex].second) sym = symtab->addRegular(this, symName); else sym = symtab->addUndefined(symName, this, false); } else { sym = symtab->addRegular(this, symName); } symbols.push_back(sym); if (objSym.isUsed()) config->gcroot.push_back(sym); } directives = obj->getCOFFLinkerOpts(); } MachineTypes BitcodeFile::getMachineType() { switch (Triple(obj->getTargetTriple()).getArch()) { case Triple::x86_64: return AMD64; case Triple::x86: return I386; case Triple::arm: return ARMNT; case Triple::aarch64: return ARM64; default: return IMAGE_FILE_MACHINE_UNKNOWN; } } std::string replaceThinLTOSuffix(StringRef path) { StringRef suffix = config->thinLTOObjectSuffixReplace.first; StringRef repl = config->thinLTOObjectSuffixReplace.second; if (path.consume_back(suffix)) return (path + repl).str(); return path; } } // namespace coff } // namespace lld // Returns the last element of a path, which is supposed to be a filename. static StringRef getBasename(StringRef path) { return sys::path::filename(path, sys::path::Style::windows); } // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". std::string lld::toString(const coff::InputFile *file) { if (!file) return ""; if (file->parentName.empty() || file->kind() == coff::InputFile::ImportKind) return file->getName(); return (getBasename(file->parentName) + "(" + getBasename(file->getName()) + ")") .str(); } Index: lld/trunk/COFF/Driver.cpp =================================================================== --- lld/trunk/COFF/Driver.cpp (revision 370815) +++ lld/trunk/COFF/Driver.cpp (revision 370816) @@ -1,1928 +1,1960 @@ //===- Driver.cpp ---------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Driver.h" #include "Config.h" #include "DebugTypes.h" #include "ICF.h" #include "InputFiles.h" #include "MarkLive.h" #include "MinGW.h" #include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" #include "lld/Common/Args.h" #include "lld/Common/Driver.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" #include "lld/Common/Threads.h" #include "lld/Common/Timer.h" #include "lld/Common/Version.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/COFFModuleDefinition.h" #include "llvm/Object/WindowsMachineFlag.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/Debug.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ToolDrivers/llvm-lib/LibDriver.h" #include #include #include using namespace llvm; using namespace llvm::object; using namespace llvm::COFF; using llvm::sys::Process; namespace lld { namespace coff { static Timer inputFileTimer("Input File Reading", Timer::root()); Configuration *config; LinkerDriver *driver; bool link(ArrayRef args, bool canExitEarly, raw_ostream &diag) { errorHandler().logName = args::getFilenameWithoutExe(args[0]); errorHandler().errorOS = &diag; errorHandler().errorLimitExceededMsg = "too many errors emitted, stopping now" " (use /errorlimit:0 to see all errors)"; errorHandler().exitEarly = canExitEarly; enableColors(diag.has_colors()); config = make(); symtab = make(); driver = make(); driver->link(args); // Call exit() if we can to avoid calling destructors. if (canExitEarly) exitLld(errorCount() ? 1 : 0); freeArena(); ObjFile::instances.clear(); ImportFile::instances.clear(); BitcodeFile::instances.clear(); memset(MergeChunk::instances, 0, sizeof(MergeChunk::instances)); return !errorCount(); } // Parse options of the form "old;new". static std::pair getOldNewOptions(opt::InputArgList &args, unsigned id) { auto *arg = args.getLastArg(id); if (!arg) return {"", ""}; StringRef s = arg->getValue(); std::pair ret = s.split(';'); if (ret.second.empty()) error(arg->getSpelling() + " expects 'old;new' format, but got " + s); return ret; } // Drop directory components and replace extension with ".exe" or ".dll". static std::string getOutputPath(StringRef path) { auto p = path.find_last_of("\\/"); StringRef s = (p == StringRef::npos) ? path : path.substr(p + 1); const char* e = config->dll ? ".dll" : ".exe"; return (s.substr(0, s.rfind('.')) + e).str(); } // Returns true if S matches /crtend.?\.o$/. static bool isCrtend(StringRef s) { if (!s.endswith(".o")) return false; s = s.drop_back(2); if (s.endswith("crtend")) return true; return !s.empty() && s.drop_back().endswith("crtend"); } // ErrorOr is not default constructible, so it cannot be used as the type // parameter of a future. // FIXME: We could open the file in createFutureForFile and avoid needing to // return an error here, but for the moment that would cost us a file descriptor // (a limited resource on Windows) for the duration that the future is pending. using MBErrPair = std::pair, std::error_code>; // Create a std::future that opens and maps a file using the best strategy for // the host platform. static std::future createFutureForFile(std::string path) { #if _WIN32 // On Windows, file I/O is relatively slow so it is best to do this // asynchronously. auto strategy = std::launch::async; #else auto strategy = std::launch::deferred; #endif return std::async(strategy, [=]() { auto mbOrErr = MemoryBuffer::getFile(path, /*FileSize*/ -1, /*RequiresNullTerminator*/ false); if (!mbOrErr) return MBErrPair{nullptr, mbOrErr.getError()}; return MBErrPair{std::move(*mbOrErr), std::error_code()}; }); } // Symbol names are mangled by prepending "_" on x86. static StringRef mangle(StringRef sym) { assert(config->machine != IMAGE_FILE_MACHINE_UNKNOWN); if (config->machine == I386) return saver.save("_" + sym); return sym; } static bool findUnderscoreMangle(StringRef sym) { Symbol *s = symtab->findMangle(mangle(sym)); return s && !isa(s); } MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr mb) { MemoryBufferRef mbref = *mb; make>(std::move(mb)); // take ownership if (driver->tar) driver->tar->append(relativeToRoot(mbref.getBufferIdentifier()), mbref.getBuffer()); return mbref; } void LinkerDriver::addBuffer(std::unique_ptr mb, - bool wholeArchive) { + bool wholeArchive, bool lazy) { StringRef filename = mb->getBufferIdentifier(); MemoryBufferRef mbref = takeBuffer(std::move(mb)); filePaths.push_back(filename); // File type is detected by contents, not by file extension. switch (identify_magic(mbref.getBuffer())) { case file_magic::windows_resource: resources.push_back(mbref); break; case file_magic::archive: if (wholeArchive) { std::unique_ptr file = CHECK(Archive::create(mbref), filename + ": failed to parse archive"); Archive *archive = file.get(); make>(std::move(file)); // take ownership for (MemoryBufferRef m : getArchiveMembers(archive)) addArchiveBuffer(m, "", filename, 0); return; } symtab->addFile(make(mbref)); break; case file_magic::bitcode: - symtab->addFile(make(mbref, "", 0)); + if (lazy) + symtab->addFile(make(mbref)); + else + symtab->addFile(make(mbref, "", 0)); break; case file_magic::coff_object: case file_magic::coff_import_library: - symtab->addFile(make(mbref)); + if (lazy) + symtab->addFile(make(mbref)); + else + symtab->addFile(make(mbref)); break; case file_magic::pdb: loadTypeServerSource(mbref); break; case file_magic::coff_cl_gl_object: error(filename + ": is not a native COFF file. Recompile without /GL"); break; case file_magic::pecoff_executable: if (filename.endswith_lower(".dll")) { error(filename + ": bad file type. Did you specify a DLL instead of an " "import library?"); break; } LLVM_FALLTHROUGH; default: error(mbref.getBufferIdentifier() + ": unknown file type"); break; } } -void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive) { +void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive, bool lazy) { auto future = std::make_shared>(createFutureForFile(path)); std::string pathStr = path; enqueueTask([=]() { auto mbOrErr = future->get(); if (mbOrErr.second) { std::string msg = "could not open '" + pathStr + "': " + mbOrErr.second.message(); // Check if the filename is a typo for an option flag. OptTable thinks // that all args that are not known options and that start with / are // filenames, but e.g. `/nodefaultlibs` is more likely a typo for // the option `/nodefaultlib` than a reference to a file in the root // directory. std::string nearest; if (COFFOptTable().findNearest(pathStr, nearest) > 1) error(msg); else error(msg + "; did you mean '" + nearest + "'"); } else - driver->addBuffer(std::move(mbOrErr.first), wholeArchive); + driver->addBuffer(std::move(mbOrErr.first), wholeArchive, lazy); }); } void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName, StringRef parentName, uint64_t offsetInArchive) { file_magic magic = identify_magic(mb.getBuffer()); if (magic == file_magic::coff_import_library) { InputFile *imp = make(mb); imp->parentName = parentName; symtab->addFile(imp); return; } InputFile *obj; if (magic == file_magic::coff_object) { obj = make(mb); } else if (magic == file_magic::bitcode) { obj = make(mb, parentName, offsetInArchive); } else { error("unknown file type: " + mb.getBufferIdentifier()); return; } obj->parentName = parentName; symtab->addFile(obj); log("Loaded " + toString(obj) + " for " + symName); } void LinkerDriver::enqueueArchiveMember(const Archive::Child &c, const Archive::Symbol &sym, StringRef parentName) { auto reportBufferError = [=](Error &&e, StringRef childName) { fatal("could not get the buffer for the member defining symbol " + toCOFFString(sym) + ": " + parentName + "(" + childName + "): " + toString(std::move(e))); }; if (!c.getParent()->isThin()) { uint64_t offsetInArchive = c.getChildOffset(); Expected mbOrErr = c.getMemoryBufferRef(); if (!mbOrErr) reportBufferError(mbOrErr.takeError(), check(c.getFullName())); MemoryBufferRef mb = mbOrErr.get(); enqueueTask([=]() { driver->addArchiveBuffer(mb, toCOFFString(sym), parentName, offsetInArchive); }); return; } std::string childName = CHECK( c.getFullName(), "could not get the filename for the member defining symbol " + toCOFFString(sym)); auto future = std::make_shared>( createFutureForFile(childName)); enqueueTask([=]() { auto mbOrErr = future->get(); if (mbOrErr.second) reportBufferError(errorCodeToError(mbOrErr.second), childName); driver->addArchiveBuffer(takeBuffer(std::move(mbOrErr.first)), toCOFFString(sym), parentName, /*OffsetInArchive=*/0); }); } static bool isDecorated(StringRef sym) { return sym.startswith("@") || sym.contains("@@") || sym.startswith("?") || (!config->mingw && sym.contains('@')); } // Parses .drectve section contents and returns a list of files // specified by /defaultlib. void LinkerDriver::parseDirectives(InputFile *file) { StringRef s = file->getDirectives(); if (s.empty()) return; log("Directives: " + toString(file) + ": " + s); ArgParser parser; // .drectve is always tokenized using Windows shell rules. // /EXPORT: option can appear too many times, processing in fastpath. opt::InputArgList args; std::vector exports; std::tie(args, exports) = parser.parseDirectives(s); for (StringRef e : exports) { // If a common header file contains dllexported function // declarations, many object files may end up with having the // same /EXPORT options. In order to save cost of parsing them, // we dedup them first. if (!directivesExports.insert(e).second) continue; Export exp = parseExport(e); if (config->machine == I386 && config->mingw) { if (!isDecorated(exp.name)) exp.name = saver.save("_" + exp.name); if (!exp.extName.empty() && !isDecorated(exp.extName)) exp.extName = saver.save("_" + exp.extName); } exp.directives = true; config->exports.push_back(exp); } for (auto *arg : args) { switch (arg->getOption().getID()) { case OPT_aligncomm: parseAligncomm(arg->getValue()); break; case OPT_alternatename: parseAlternateName(arg->getValue()); break; case OPT_defaultlib: if (Optional path = findLib(arg->getValue())) - enqueuePath(*path, false); + enqueuePath(*path, false, false); break; case OPT_entry: config->entry = addUndefined(mangle(arg->getValue())); break; case OPT_failifmismatch: checkFailIfMismatch(arg->getValue(), file); break; case OPT_incl: addUndefined(arg->getValue()); break; case OPT_merge: parseMerge(arg->getValue()); break; case OPT_nodefaultlib: config->noDefaultLibs.insert(doFindLib(arg->getValue()).lower()); break; case OPT_section: parseSection(arg->getValue()); break; case OPT_subsystem: parseSubsystem(arg->getValue(), &config->subsystem, &config->majorOSVersion, &config->minorOSVersion); break; // Only add flags here that link.exe accepts in // `#pragma comment(linker, "/flag")`-generated sections. case OPT_editandcontinue: case OPT_guardsym: case OPT_throwingnew: break; default: error(arg->getSpelling() + " is not allowed in .drectve"); } } } // Find file from search paths. You can omit ".obj", this function takes // care of that. Note that the returned path is not guaranteed to exist. StringRef LinkerDriver::doFindFile(StringRef filename) { bool hasPathSep = (filename.find_first_of("/\\") != StringRef::npos); if (hasPathSep) return filename; bool hasExt = filename.contains('.'); for (StringRef dir : searchPaths) { SmallString<128> path = dir; sys::path::append(path, filename); if (sys::fs::exists(path.str())) return saver.save(path.str()); if (!hasExt) { path.append(".obj"); if (sys::fs::exists(path.str())) return saver.save(path.str()); } } return filename; } static Optional getUniqueID(StringRef path) { sys::fs::UniqueID ret; if (sys::fs::getUniqueID(path, ret)) return None; return ret; } // Resolves a file path. This never returns the same path // (in that case, it returns None). Optional LinkerDriver::findFile(StringRef filename) { StringRef path = doFindFile(filename); if (Optional id = getUniqueID(path)) { bool seen = !visitedFiles.insert(*id).second; if (seen) return None; } if (path.endswith_lower(".lib")) visitedLibs.insert(sys::path::filename(path)); return path; } // MinGW specific. If an embedded directive specified to link to // foo.lib, but it isn't found, try libfoo.a instead. StringRef LinkerDriver::doFindLibMinGW(StringRef filename) { if (filename.contains('/') || filename.contains('\\')) return filename; SmallString<128> s = filename; sys::path::replace_extension(s, ".a"); StringRef libName = saver.save("lib" + s.str()); return doFindFile(libName); } // Find library file from search path. StringRef LinkerDriver::doFindLib(StringRef filename) { // Add ".lib" to Filename if that has no file extension. bool hasExt = filename.contains('.'); if (!hasExt) filename = saver.save(filename + ".lib"); StringRef ret = doFindFile(filename); // For MinGW, if the find above didn't turn up anything, try // looking for a MinGW formatted library name. if (config->mingw && ret == filename) return doFindLibMinGW(filename); return ret; } // Resolves a library path. /nodefaultlib options are taken into // consideration. This never returns the same path (in that case, // it returns None). Optional LinkerDriver::findLib(StringRef filename) { if (config->noDefaultLibAll) return None; if (!visitedLibs.insert(filename.lower()).second) return None; StringRef path = doFindLib(filename); if (config->noDefaultLibs.count(path.lower())) return None; if (Optional id = getUniqueID(path)) if (!visitedFiles.insert(*id).second) return None; return path; } // Parses LIB environment which contains a list of search paths. void LinkerDriver::addLibSearchPaths() { Optional envOpt = Process::GetEnv("LIB"); if (!envOpt.hasValue()) return; StringRef env = saver.save(*envOpt); while (!env.empty()) { StringRef path; std::tie(path, env) = env.split(';'); searchPaths.push_back(path); } } Symbol *LinkerDriver::addUndefined(StringRef name) { Symbol *b = symtab->addUndefined(name); if (!b->isGCRoot) { b->isGCRoot = true; config->gcroot.push_back(b); } return b; } StringRef LinkerDriver::mangleMaybe(Symbol *s) { // If the plain symbol name has already been resolved, do nothing. Undefined *unmangled = dyn_cast(s); if (!unmangled) return ""; // Otherwise, see if a similar, mangled symbol exists in the symbol table. Symbol *mangled = symtab->findMangle(unmangled->getName()); if (!mangled) return ""; // If we find a similar mangled symbol, make this an alias to it and return // its name. log(unmangled->getName() + " aliased to " + mangled->getName()); unmangled->weakAlias = symtab->addUndefined(mangled->getName()); return mangled->getName(); } // Windows specific -- find default entry point name. // // There are four different entry point functions for Windows executables, // each of which corresponds to a user-defined "main" function. This function // infers an entry point from a user-defined "main" function. StringRef LinkerDriver::findDefaultEntry() { assert(config->subsystem != IMAGE_SUBSYSTEM_UNKNOWN && "must handle /subsystem before calling this"); if (config->mingw) return mangle(config->subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI ? "WinMainCRTStartup" : "mainCRTStartup"); if (config->subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) { if (findUnderscoreMangle("wWinMain")) { if (!findUnderscoreMangle("WinMain")) return mangle("wWinMainCRTStartup"); warn("found both wWinMain and WinMain; using latter"); } return mangle("WinMainCRTStartup"); } if (findUnderscoreMangle("wmain")) { if (!findUnderscoreMangle("main")) return mangle("wmainCRTStartup"); warn("found both wmain and main; using latter"); } return mangle("mainCRTStartup"); } WindowsSubsystem LinkerDriver::inferSubsystem() { if (config->dll) return IMAGE_SUBSYSTEM_WINDOWS_GUI; if (config->mingw) return IMAGE_SUBSYSTEM_WINDOWS_CUI; // Note that link.exe infers the subsystem from the presence of these // functions even if /entry: or /nodefaultlib are passed which causes them // to not be called. bool haveMain = findUnderscoreMangle("main"); bool haveWMain = findUnderscoreMangle("wmain"); bool haveWinMain = findUnderscoreMangle("WinMain"); bool haveWWinMain = findUnderscoreMangle("wWinMain"); if (haveMain || haveWMain) { if (haveWinMain || haveWWinMain) { warn(std::string("found ") + (haveMain ? "main" : "wmain") + " and " + (haveWinMain ? "WinMain" : "wWinMain") + "; defaulting to /subsystem:console"); } return IMAGE_SUBSYSTEM_WINDOWS_CUI; } if (haveWinMain || haveWWinMain) return IMAGE_SUBSYSTEM_WINDOWS_GUI; return IMAGE_SUBSYSTEM_UNKNOWN; } static uint64_t getDefaultImageBase() { if (config->is64()) return config->dll ? 0x180000000 : 0x140000000; return config->dll ? 0x10000000 : 0x400000; } static std::string createResponseFile(const opt::InputArgList &args, ArrayRef filePaths, ArrayRef searchPaths) { SmallString<0> data; raw_svector_ostream os(data); for (auto *arg : args) { switch (arg->getOption().getID()) { case OPT_linkrepro: case OPT_INPUT: case OPT_defaultlib: case OPT_libpath: case OPT_manifest: case OPT_manifest_colon: case OPT_manifestdependency: case OPT_manifestfile: case OPT_manifestinput: case OPT_manifestuac: break; case OPT_implib: case OPT_pdb: case OPT_out: os << arg->getSpelling() << sys::path::filename(arg->getValue()) << "\n"; break; default: os << toString(*arg) << "\n"; } } for (StringRef path : searchPaths) { std::string relPath = relativeToRoot(path); os << "/libpath:" << quote(relPath) << "\n"; } for (StringRef path : filePaths) os << quote(relativeToRoot(path)) << "\n"; return data.str(); } enum class DebugKind { Unknown, None, Full, FastLink, GHash, Dwarf, Symtab }; static DebugKind parseDebugKind(const opt::InputArgList &args) { auto *a = args.getLastArg(OPT_debug, OPT_debug_opt); if (!a) return DebugKind::None; if (a->getNumValues() == 0) return DebugKind::Full; DebugKind debug = StringSwitch(a->getValue()) .CaseLower("none", DebugKind::None) .CaseLower("full", DebugKind::Full) .CaseLower("fastlink", DebugKind::FastLink) // LLD extensions .CaseLower("ghash", DebugKind::GHash) .CaseLower("dwarf", DebugKind::Dwarf) .CaseLower("symtab", DebugKind::Symtab) .Default(DebugKind::Unknown); if (debug == DebugKind::FastLink) { warn("/debug:fastlink unsupported; using /debug:full"); return DebugKind::Full; } if (debug == DebugKind::Unknown) { error("/debug: unknown option: " + Twine(a->getValue())); return DebugKind::None; } return debug; } static unsigned parseDebugTypes(const opt::InputArgList &args) { unsigned debugTypes = static_cast(DebugType::None); if (auto *a = args.getLastArg(OPT_debugtype)) { SmallVector types; StringRef(a->getValue()) .split(types, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); for (StringRef type : types) { unsigned v = StringSwitch(type.lower()) .Case("cv", static_cast(DebugType::CV)) .Case("pdata", static_cast(DebugType::PData)) .Case("fixup", static_cast(DebugType::Fixup)) .Default(0); if (v == 0) { warn("/debugtype: unknown option '" + type + "'"); continue; } debugTypes |= v; } return debugTypes; } // Default debug types debugTypes = static_cast(DebugType::CV); if (args.hasArg(OPT_driver)) debugTypes |= static_cast(DebugType::PData); if (args.hasArg(OPT_profile)) debugTypes |= static_cast(DebugType::Fixup); return debugTypes; } static std::string getMapFile(const opt::InputArgList &args) { auto *arg = args.getLastArg(OPT_lldmap, OPT_lldmap_file); if (!arg) return ""; if (arg->getOption().getID() == OPT_lldmap_file) return arg->getValue(); assert(arg->getOption().getID() == OPT_lldmap); StringRef outFile = config->outputFile; return (outFile.substr(0, outFile.rfind('.')) + ".map").str(); } static std::string getImplibPath() { if (!config->implib.empty()) return config->implib; SmallString<128> out = StringRef(config->outputFile); sys::path::replace_extension(out, ".lib"); return out.str(); } // // The import name is caculated as the following: // // | LIBRARY w/ ext | LIBRARY w/o ext | no LIBRARY // -----+----------------+---------------------+------------------ // LINK | {value} | {value}.{.dll/.exe} | {output name} // LIB | {value} | {value}.dll | {output name}.dll // static std::string getImportName(bool asLib) { SmallString<128> out; if (config->importName.empty()) { out.assign(sys::path::filename(config->outputFile)); if (asLib) sys::path::replace_extension(out, ".dll"); } else { out.assign(config->importName); if (!sys::path::has_extension(out)) sys::path::replace_extension(out, (config->dll || asLib) ? ".dll" : ".exe"); } return out.str(); } static void createImportLibrary(bool asLib) { std::vector exports; for (Export &e1 : config->exports) { COFFShortExport e2; e2.Name = e1.name; e2.SymbolName = e1.symbolName; e2.ExtName = e1.extName; e2.Ordinal = e1.ordinal; e2.Noname = e1.noname; e2.Data = e1.data; e2.Private = e1.isPrivate; e2.Constant = e1.constant; exports.push_back(e2); } auto handleError = [](Error &&e) { handleAllErrors(std::move(e), [](ErrorInfoBase &eib) { error(eib.message()); }); }; std::string libName = getImportName(asLib); std::string path = getImplibPath(); if (!config->incremental) { handleError(writeImportLibrary(libName, path, exports, config->machine, config->mingw)); return; } // If the import library already exists, replace it only if the contents // have changed. ErrorOr> oldBuf = MemoryBuffer::getFile( path, /*FileSize*/ -1, /*RequiresNullTerminator*/ false); if (!oldBuf) { handleError(writeImportLibrary(libName, path, exports, config->machine, config->mingw)); return; } SmallString<128> tmpName; if (std::error_code ec = sys::fs::createUniqueFile(path + ".tmp-%%%%%%%%.lib", tmpName)) fatal("cannot create temporary file for import library " + path + ": " + ec.message()); if (Error e = writeImportLibrary(libName, tmpName, exports, config->machine, config->mingw)) { handleError(std::move(e)); return; } std::unique_ptr newBuf = check(MemoryBuffer::getFile( tmpName, /*FileSize*/ -1, /*RequiresNullTerminator*/ false)); if ((*oldBuf)->getBuffer() != newBuf->getBuffer()) { oldBuf->reset(); handleError(errorCodeToError(sys::fs::rename(tmpName, path))); } else { sys::fs::remove(tmpName); } } static void parseModuleDefs(StringRef path) { std::unique_ptr mb = CHECK( MemoryBuffer::getFile(path, -1, false, true), "could not open " + path); COFFModuleDefinition m = check(parseCOFFModuleDefinition( mb->getMemBufferRef(), config->machine, config->mingw)); if (config->outputFile.empty()) config->outputFile = saver.save(m.OutputFile); config->importName = saver.save(m.ImportName); if (m.ImageBase) config->imageBase = m.ImageBase; if (m.StackReserve) config->stackReserve = m.StackReserve; if (m.StackCommit) config->stackCommit = m.StackCommit; if (m.HeapReserve) config->heapReserve = m.HeapReserve; if (m.HeapCommit) config->heapCommit = m.HeapCommit; if (m.MajorImageVersion) config->majorImageVersion = m.MajorImageVersion; if (m.MinorImageVersion) config->minorImageVersion = m.MinorImageVersion; if (m.MajorOSVersion) config->majorOSVersion = m.MajorOSVersion; if (m.MinorOSVersion) config->minorOSVersion = m.MinorOSVersion; for (COFFShortExport e1 : m.Exports) { Export e2; // In simple cases, only Name is set. Renamed exports are parsed // and set as "ExtName = Name". If Name has the form "OtherDll.Func", // it shouldn't be a normal exported function but a forward to another // DLL instead. This is supported by both MS and GNU linkers. if (e1.ExtName != e1.Name && StringRef(e1.Name).contains('.')) { e2.name = saver.save(e1.ExtName); e2.forwardTo = saver.save(e1.Name); config->exports.push_back(e2); continue; } e2.name = saver.save(e1.Name); e2.extName = saver.save(e1.ExtName); e2.ordinal = e1.Ordinal; e2.noname = e1.Noname; e2.data = e1.Data; e2.isPrivate = e1.Private; e2.constant = e1.Constant; config->exports.push_back(e2); } } void LinkerDriver::enqueueTask(std::function task) { taskQueue.push_back(std::move(task)); } bool LinkerDriver::run() { ScopedTimer t(inputFileTimer); bool didWork = !taskQueue.empty(); while (!taskQueue.empty()) { taskQueue.front()(); taskQueue.pop_front(); } return didWork; } // Parse an /order file. If an option is given, the linker places // COMDAT sections in the same order as their names appear in the // given file. static void parseOrderFile(StringRef arg) { // For some reason, the MSVC linker requires a filename to be // preceded by "@". if (!arg.startswith("@")) { error("malformed /order option: '@' missing"); return; } // Get a list of all comdat sections for error checking. DenseSet set; for (Chunk *c : symtab->getChunks()) if (auto *sec = dyn_cast(c)) if (sec->sym) set.insert(sec->sym->getName()); // Open a file. StringRef path = arg.substr(1); std::unique_ptr mb = CHECK( MemoryBuffer::getFile(path, -1, false, true), "could not open " + path); // Parse a file. An order file contains one symbol per line. // All symbols that were not present in a given order file are // considered to have the lowest priority 0 and are placed at // end of an output section. for (std::string s : args::getLines(mb->getMemBufferRef())) { if (config->machine == I386 && !isDecorated(s)) s = "_" + s; if (set.count(s) == 0) { if (config->warnMissingOrderSymbol) warn("/order:" + arg + ": missing symbol: " + s + " [LNK4037]"); } else config->order[s] = INT_MIN + config->order.size(); } } static void markAddrsig(Symbol *s) { if (auto *d = dyn_cast_or_null(s)) if (SectionChunk *c = dyn_cast_or_null(d->getChunk())) c->keepUnique = true; } static void findKeepUniqueSections() { // Exported symbols could be address-significant in other executables or DSOs, // so we conservatively mark them as address-significant. for (Export &r : config->exports) markAddrsig(r.sym); // Visit the address-significance table in each object file and mark each // referenced symbol as address-significant. for (ObjFile *obj : ObjFile::instances) { ArrayRef syms = obj->getSymbols(); if (obj->addrsigSec) { ArrayRef contents; cantFail( obj->getCOFFObj()->getSectionContents(obj->addrsigSec, contents)); const uint8_t *cur = contents.begin(); while (cur != contents.end()) { unsigned size; const char *err; uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err); if (err) fatal(toString(obj) + ": could not decode addrsig section: " + err); if (symIndex >= syms.size()) fatal(toString(obj) + ": invalid symbol index in addrsig section"); markAddrsig(syms[symIndex]); cur += size; } } else { // If an object file does not have an address-significance table, // conservatively mark all of its symbols as address-significant. for (Symbol *s : syms) markAddrsig(s); } } } // link.exe replaces each %foo% in altPath with the contents of environment // variable foo, and adds the two magic env vars _PDB (expands to the basename // of pdb's output path) and _EXT (expands to the extension of the output // binary). // lld only supports %_PDB% and %_EXT% and warns on references to all other env // vars. static void parsePDBAltPath(StringRef altPath) { SmallString<128> buf; StringRef pdbBasename = sys::path::filename(config->pdbPath, sys::path::Style::windows); StringRef binaryExtension = sys::path::extension(config->outputFile, sys::path::Style::windows); if (!binaryExtension.empty()) binaryExtension = binaryExtension.substr(1); // %_EXT% does not include '.'. // Invariant: // +--------- cursor ('a...' might be the empty string). // | +----- firstMark // | | +- secondMark // v v v // a...%...%... size_t cursor = 0; while (cursor < altPath.size()) { size_t firstMark, secondMark; if ((firstMark = altPath.find('%', cursor)) == StringRef::npos || (secondMark = altPath.find('%', firstMark + 1)) == StringRef::npos) { // Didn't find another full fragment, treat rest of string as literal. buf.append(altPath.substr(cursor)); break; } // Found a full fragment. Append text in front of first %, and interpret // text between first and second % as variable name. buf.append(altPath.substr(cursor, firstMark - cursor)); StringRef var = altPath.substr(firstMark, secondMark - firstMark + 1); if (var.equals_lower("%_pdb%")) buf.append(pdbBasename); else if (var.equals_lower("%_ext%")) buf.append(binaryExtension); else { warn("only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " + var + " as literal"); buf.append(var); } cursor = secondMark + 1; } config->pdbAltPath = buf; } /// Convert resource files and potentially merge input resource object /// trees into one resource tree. /// Call after ObjFile::Instances is complete. void LinkerDriver::convertResources() { std::vector resourceObjFiles; for (ObjFile *f : ObjFile::instances) { if (f->isResourceObjFile()) resourceObjFiles.push_back(f); } if (!config->mingw && (resourceObjFiles.size() > 1 || (resourceObjFiles.size() == 1 && !resources.empty()))) { error((!resources.empty() ? "internal .obj file created from .res files" : toString(resourceObjFiles[1])) + ": more than one resource obj file not allowed, already got " + toString(resourceObjFiles.front())); return; } if (resources.empty() && resourceObjFiles.size() <= 1) { // No resources to convert, and max one resource object file in // the input. Keep that preconverted resource section as is. for (ObjFile *f : resourceObjFiles) f->includeResourceChunks(); return; } ObjFile *f = make(convertResToCOFF(resources, resourceObjFiles)); symtab->addFile(f); f->includeResourceChunks(); } // In MinGW, if no symbols are chosen to be exported, then all symbols are // automatically exported by default. This behavior can be forced by the // -export-all-symbols option, so that it happens even when exports are // explicitly specified. The automatic behavior can be disabled using the // -exclude-all-symbols option, so that lld-link behaves like link.exe rather // than MinGW in the case that nothing is explicitly exported. void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) { if (!config->dll) return; if (!args.hasArg(OPT_export_all_symbols)) { if (!config->exports.empty()) return; if (args.hasArg(OPT_exclude_all_symbols)) return; } AutoExporter exporter; for (auto *arg : args.filtered(OPT_wholearchive_file)) if (Optional path = doFindFile(arg->getValue())) exporter.addWholeArchive(*path); symtab->forEachSymbol([&](Symbol *s) { auto *def = dyn_cast(s); if (!exporter.shouldExport(def)) return; Export e; e.name = def->getName(); e.sym = def; if (Chunk *c = def->getChunk()) if (!(c->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE)) e.data = true; config->exports.push_back(e); }); } static const char *libcallRoutineNames[] = { #define HANDLE_LIBCALL(code, name) name, #include "llvm/IR/RuntimeLibcalls.def" #undef HANDLE_LIBCALL }; void LinkerDriver::link(ArrayRef argsArr) { // Needed for LTO. InitializeAllTargetInfos(); InitializeAllTargets(); InitializeAllTargetMCs(); InitializeAllAsmParsers(); InitializeAllAsmPrinters(); // If the first command line argument is "/lib", link.exe acts like lib.exe. // We call our own implementation of lib.exe that understands bitcode files. if (argsArr.size() > 1 && StringRef(argsArr[1]).equals_lower("/lib")) { if (llvm::libDriverMain(argsArr.slice(1)) != 0) fatal("lib failed"); return; } // Parse command line options. ArgParser parser; opt::InputArgList args = parser.parseLINK(argsArr); // Parse and evaluate -mllvm options. std::vector v; v.push_back("lld-link (LLVM option parsing)"); for (auto *arg : args.filtered(OPT_mllvm)) v.push_back(arg->getValue()); cl::ParseCommandLineOptions(v.size(), v.data()); // Handle /errorlimit early, because error() depends on it. if (auto *arg = args.getLastArg(OPT_errorlimit)) { int n = 20; StringRef s = arg->getValue(); if (s.getAsInteger(10, n)) error(arg->getSpelling() + " number expected, but got " + s); errorHandler().errorLimit = n; } // Handle /help if (args.hasArg(OPT_help)) { printHelp(argsArr[0]); return; } lld::threadsEnabled = args.hasFlag(OPT_threads, OPT_threads_no, true); if (args.hasArg(OPT_show_timing)) config->showTiming = true; config->showSummary = args.hasArg(OPT_summary); ScopedTimer t(Timer::root()); // Handle --version, which is an lld extension. This option is a bit odd // because it doesn't start with "/", but we deliberately chose "--" to // avoid conflict with /version and for compatibility with clang-cl. if (args.hasArg(OPT_dash_dash_version)) { outs() << getLLDVersion() << "\n"; return; } // Handle /lldmingw early, since it can potentially affect how other // options are handled. config->mingw = args.hasArg(OPT_lldmingw); if (auto *arg = args.getLastArg(OPT_linkrepro)) { SmallString<64> path = StringRef(arg->getValue()); sys::path::append(path, "repro.tar"); Expected> errOrWriter = TarWriter::create(path, "repro"); if (errOrWriter) { tar = std::move(*errOrWriter); } else { error("/linkrepro: failed to open " + path + ": " + toString(errOrWriter.takeError())); } } if (!args.hasArg(OPT_INPUT)) { if (args.hasArg(OPT_deffile)) config->noEntry = true; else fatal("no input files"); } // Construct search path list. searchPaths.push_back(""); for (auto *arg : args.filtered(OPT_libpath)) searchPaths.push_back(arg->getValue()); addLibSearchPaths(); // Handle /ignore for (auto *arg : args.filtered(OPT_ignore)) { SmallVector vec; StringRef(arg->getValue()).split(vec, ','); for (StringRef s : vec) { if (s == "4037") config->warnMissingOrderSymbol = false; else if (s == "4099") config->warnDebugInfoUnusable = false; else if (s == "4217") config->warnLocallyDefinedImported = false; // Other warning numbers are ignored. } } // Handle /out if (auto *arg = args.getLastArg(OPT_out)) config->outputFile = arg->getValue(); // Handle /verbose if (args.hasArg(OPT_verbose)) config->verbose = true; errorHandler().verbose = config->verbose; // Handle /force or /force:unresolved if (args.hasArg(OPT_force, OPT_force_unresolved)) config->forceUnresolved = true; // Handle /force or /force:multiple if (args.hasArg(OPT_force, OPT_force_multiple)) config->forceMultiple = true; // Handle /force or /force:multipleres if (args.hasArg(OPT_force, OPT_force_multipleres)) config->forceMultipleRes = true; // Handle /debug DebugKind debug = parseDebugKind(args); if (debug == DebugKind::Full || debug == DebugKind::Dwarf || debug == DebugKind::GHash) { config->debug = true; config->incremental = true; } // Handle /demangle config->demangle = args.hasFlag(OPT_demangle, OPT_demangle_no); // Handle /debugtype config->debugTypes = parseDebugTypes(args); // Handle /pdb bool shouldCreatePDB = (debug == DebugKind::Full || debug == DebugKind::GHash); if (shouldCreatePDB) { if (auto *arg = args.getLastArg(OPT_pdb)) config->pdbPath = arg->getValue(); if (auto *arg = args.getLastArg(OPT_pdbaltpath)) config->pdbAltPath = arg->getValue(); if (args.hasArg(OPT_natvis)) config->natvisFiles = args.getAllArgValues(OPT_natvis); if (auto *arg = args.getLastArg(OPT_pdb_source_path)) config->pdbSourcePath = arg->getValue(); } // Handle /noentry if (args.hasArg(OPT_noentry)) { if (args.hasArg(OPT_dll)) config->noEntry = true; else error("/noentry must be specified with /dll"); } // Handle /dll if (args.hasArg(OPT_dll)) { config->dll = true; config->manifestID = 2; } // Handle /dynamicbase and /fixed. We can't use hasFlag for /dynamicbase // because we need to explicitly check whether that option or its inverse was // present in the argument list in order to handle /fixed. auto *dynamicBaseArg = args.getLastArg(OPT_dynamicbase, OPT_dynamicbase_no); if (dynamicBaseArg && dynamicBaseArg->getOption().getID() == OPT_dynamicbase_no) config->dynamicBase = false; // MSDN claims "/FIXED:NO is the default setting for a DLL, and /FIXED is the // default setting for any other project type.", but link.exe defaults to // /FIXED:NO for exe outputs as well. Match behavior, not docs. bool fixed = args.hasFlag(OPT_fixed, OPT_fixed_no, false); if (fixed) { if (dynamicBaseArg && dynamicBaseArg->getOption().getID() == OPT_dynamicbase) { error("/fixed must not be specified with /dynamicbase"); } else { config->relocatable = false; config->dynamicBase = false; } } // Handle /appcontainer config->appContainer = args.hasFlag(OPT_appcontainer, OPT_appcontainer_no, false); // Handle /machine if (auto *arg = args.getLastArg(OPT_machine)) { config->machine = getMachineType(arg->getValue()); if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) fatal(Twine("unknown /machine argument: ") + arg->getValue()); } // Handle /nodefaultlib: for (auto *arg : args.filtered(OPT_nodefaultlib)) config->noDefaultLibs.insert(doFindLib(arg->getValue()).lower()); // Handle /nodefaultlib if (args.hasArg(OPT_nodefaultlib_all)) config->noDefaultLibAll = true; // Handle /base if (auto *arg = args.getLastArg(OPT_base)) parseNumbers(arg->getValue(), &config->imageBase); // Handle /filealign if (auto *arg = args.getLastArg(OPT_filealign)) { parseNumbers(arg->getValue(), &config->fileAlign); if (!isPowerOf2_64(config->fileAlign)) error("/filealign: not a power of two: " + Twine(config->fileAlign)); } // Handle /stack if (auto *arg = args.getLastArg(OPT_stack)) parseNumbers(arg->getValue(), &config->stackReserve, &config->stackCommit); // Handle /guard:cf if (auto *arg = args.getLastArg(OPT_guard)) parseGuard(arg->getValue()); // Handle /heap if (auto *arg = args.getLastArg(OPT_heap)) parseNumbers(arg->getValue(), &config->heapReserve, &config->heapCommit); // Handle /version if (auto *arg = args.getLastArg(OPT_version)) parseVersion(arg->getValue(), &config->majorImageVersion, &config->minorImageVersion); // Handle /subsystem if (auto *arg = args.getLastArg(OPT_subsystem)) parseSubsystem(arg->getValue(), &config->subsystem, &config->majorOSVersion, &config->minorOSVersion); // Handle /timestamp if (llvm::opt::Arg *arg = args.getLastArg(OPT_timestamp, OPT_repro)) { if (arg->getOption().getID() == OPT_repro) { config->timestamp = 0; config->repro = true; } else { config->repro = false; StringRef value(arg->getValue()); if (value.getAsInteger(0, config->timestamp)) fatal(Twine("invalid timestamp: ") + value + ". Expected 32-bit integer"); } } else { config->repro = false; config->timestamp = time(nullptr); } // Handle /alternatename for (auto *arg : args.filtered(OPT_alternatename)) parseAlternateName(arg->getValue()); // Handle /include for (auto *arg : args.filtered(OPT_incl)) addUndefined(arg->getValue()); // Handle /implib if (auto *arg = args.getLastArg(OPT_implib)) config->implib = arg->getValue(); // Handle /opt. bool doGC = debug == DebugKind::None || args.hasArg(OPT_profile); unsigned icfLevel = args.hasArg(OPT_profile) ? 0 : 1; // 0: off, 1: limited, 2: on unsigned tailMerge = 1; for (auto *arg : args.filtered(OPT_opt)) { std::string str = StringRef(arg->getValue()).lower(); SmallVector vec; StringRef(str).split(vec, ','); for (StringRef s : vec) { if (s == "ref") { doGC = true; } else if (s == "noref") { doGC = false; } else if (s == "icf" || s.startswith("icf=")) { icfLevel = 2; } else if (s == "noicf") { icfLevel = 0; } else if (s == "lldtailmerge") { tailMerge = 2; } else if (s == "nolldtailmerge") { tailMerge = 0; } else if (s.startswith("lldlto=")) { StringRef optLevel = s.substr(7); if (optLevel.getAsInteger(10, config->ltoo) || config->ltoo > 3) error("/opt:lldlto: invalid optimization level: " + optLevel); } else if (s.startswith("lldltojobs=")) { StringRef jobs = s.substr(11); if (jobs.getAsInteger(10, config->thinLTOJobs) || config->thinLTOJobs == 0) error("/opt:lldltojobs: invalid job count: " + jobs); } else if (s.startswith("lldltopartitions=")) { StringRef n = s.substr(17); if (n.getAsInteger(10, config->ltoPartitions) || config->ltoPartitions == 0) error("/opt:lldltopartitions: invalid partition count: " + n); } else if (s != "lbr" && s != "nolbr") error("/opt: unknown option: " + s); } } // Limited ICF is enabled if GC is enabled and ICF was never mentioned // explicitly. // FIXME: LLD only implements "limited" ICF, i.e. it only merges identical // code. If the user passes /OPT:ICF explicitly, LLD should merge identical // comdat readonly data. if (icfLevel == 1 && !doGC) icfLevel = 0; config->doGC = doGC; config->doICF = icfLevel > 0; config->tailMerge = (tailMerge == 1 && config->doICF) || tailMerge == 2; // Handle /lldsavetemps if (args.hasArg(OPT_lldsavetemps)) config->saveTemps = true; // Handle /kill-at if (args.hasArg(OPT_kill_at)) config->killAt = true; // Handle /lldltocache if (auto *arg = args.getLastArg(OPT_lldltocache)) config->ltoCache = arg->getValue(); // Handle /lldsavecachepolicy if (auto *arg = args.getLastArg(OPT_lldltocachepolicy)) config->ltoCachePolicy = CHECK( parseCachePruningPolicy(arg->getValue()), Twine("/lldltocachepolicy: invalid cache policy: ") + arg->getValue()); // Handle /failifmismatch for (auto *arg : args.filtered(OPT_failifmismatch)) checkFailIfMismatch(arg->getValue(), nullptr); // Handle /merge for (auto *arg : args.filtered(OPT_merge)) parseMerge(arg->getValue()); // Add default section merging rules after user rules. User rules take // precedence, but we will emit a warning if there is a conflict. parseMerge(".idata=.rdata"); parseMerge(".didat=.rdata"); parseMerge(".edata=.rdata"); parseMerge(".xdata=.rdata"); parseMerge(".bss=.data"); if (config->mingw) { parseMerge(".ctors=.rdata"); parseMerge(".dtors=.rdata"); parseMerge(".CRT=.rdata"); } // Handle /section for (auto *arg : args.filtered(OPT_section)) parseSection(arg->getValue()); // Handle /align if (auto *arg = args.getLastArg(OPT_align)) { parseNumbers(arg->getValue(), &config->align); if (!isPowerOf2_64(config->align)) error("/align: not a power of two: " + StringRef(arg->getValue())); } // Handle /aligncomm for (auto *arg : args.filtered(OPT_aligncomm)) parseAligncomm(arg->getValue()); // Handle /manifestdependency. This enables /manifest unless /manifest:no is // also passed. if (auto *arg = args.getLastArg(OPT_manifestdependency)) { config->manifestDependency = arg->getValue(); config->manifest = Configuration::SideBySide; } // Handle /manifest and /manifest: if (auto *arg = args.getLastArg(OPT_manifest, OPT_manifest_colon)) { if (arg->getOption().getID() == OPT_manifest) config->manifest = Configuration::SideBySide; else parseManifest(arg->getValue()); } // Handle /manifestuac if (auto *arg = args.getLastArg(OPT_manifestuac)) parseManifestUAC(arg->getValue()); // Handle /manifestfile if (auto *arg = args.getLastArg(OPT_manifestfile)) config->manifestFile = arg->getValue(); // Handle /manifestinput for (auto *arg : args.filtered(OPT_manifestinput)) config->manifestInput.push_back(arg->getValue()); if (!config->manifestInput.empty() && config->manifest != Configuration::Embed) { fatal("/manifestinput: requires /manifest:embed"); } config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files); config->thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) || args.hasArg(OPT_thinlto_index_only_arg); config->thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_arg); config->thinLTOPrefixReplace = getOldNewOptions(args, OPT_thinlto_prefix_replace); config->thinLTOObjectSuffixReplace = getOldNewOptions(args, OPT_thinlto_object_suffix_replace); config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path); // Handle miscellaneous boolean flags. config->allowBind = args.hasFlag(OPT_allowbind, OPT_allowbind_no, true); config->allowIsolation = args.hasFlag(OPT_allowisolation, OPT_allowisolation_no, true); config->incremental = args.hasFlag(OPT_incremental, OPT_incremental_no, !config->doGC && !config->doICF && !args.hasArg(OPT_order) && !args.hasArg(OPT_profile)); config->integrityCheck = args.hasFlag(OPT_integritycheck, OPT_integritycheck_no, false); config->nxCompat = args.hasFlag(OPT_nxcompat, OPT_nxcompat_no, true); for (auto *arg : args.filtered(OPT_swaprun)) parseSwaprun(arg->getValue()); config->terminalServerAware = !config->dll && args.hasFlag(OPT_tsaware, OPT_tsaware_no, true); config->debugDwarf = debug == DebugKind::Dwarf; config->debugGHashes = debug == DebugKind::GHash; config->debugSymtab = debug == DebugKind::Symtab; config->mapFile = getMapFile(args); if (config->incremental && args.hasArg(OPT_profile)) { warn("ignoring '/incremental' due to '/profile' specification"); config->incremental = false; } if (config->incremental && args.hasArg(OPT_order)) { warn("ignoring '/incremental' due to '/order' specification"); config->incremental = false; } if (config->incremental && config->doGC) { warn("ignoring '/incremental' because REF is enabled; use '/opt:noref' to " "disable"); config->incremental = false; } if (config->incremental && config->doICF) { warn("ignoring '/incremental' because ICF is enabled; use '/opt:noicf' to " "disable"); config->incremental = false; } if (errorCount()) return; std::set wholeArchives; for (auto *arg : args.filtered(OPT_wholearchive_file)) if (Optional path = doFindFile(arg->getValue())) if (Optional id = getUniqueID(*path)) wholeArchives.insert(*id); // A predicate returning true if a given path is an argument for // /wholearchive:, or /wholearchive is enabled globally. // This function is a bit tricky because "foo.obj /wholearchive:././foo.obj" // needs to be handled as "/wholearchive:foo.obj foo.obj". auto isWholeArchive = [&](StringRef path) -> bool { if (args.hasArg(OPT_wholearchive_flag)) return true; if (Optional id = getUniqueID(path)) return wholeArchives.count(*id); return false; }; - // Create a list of input files. Files can be given as arguments - // for /defaultlib option. - for (auto *arg : args.filtered(OPT_INPUT, OPT_wholearchive_file)) - if (Optional path = findFile(arg->getValue())) - enqueuePath(*path, isWholeArchive(*path)); + // Create a list of input files. These can be given as OPT_INPUT options + // and OPT_wholearchive_file options, and we also need to track OPT_start_lib + // and OPT_end_lib. + bool inLib = false; + for (auto *arg : args) { + switch (arg->getOption().getID()) { + case OPT_end_lib: + if (!inLib) + error("stray " + arg->getSpelling()); + inLib = false; + break; + case OPT_start_lib: + if (inLib) + error("nested " + arg->getSpelling()); + inLib = true; + break; + case OPT_wholearchive_file: + if (Optional path = findFile(arg->getValue())) + enqueuePath(*path, true, inLib); + break; + case OPT_INPUT: + if (Optional path = findFile(arg->getValue())) + enqueuePath(*path, isWholeArchive(*path), inLib); + break; + default: + // Ignore other options. + break; + } + } + // Process files specified as /defaultlib. These should be enequeued after + // other files, which is why they are in a separate loop. for (auto *arg : args.filtered(OPT_defaultlib)) if (Optional path = findLib(arg->getValue())) - enqueuePath(*path, false); + enqueuePath(*path, false, false); // Windows specific -- Create a resource file containing a manifest file. if (config->manifest == Configuration::Embed) - addBuffer(createManifestRes(), false); + addBuffer(createManifestRes(), false, false); // Read all input files given via the command line. run(); if (errorCount()) return; // We should have inferred a machine type by now from the input files, but if // not we assume x64. if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) { warn("/machine is not specified. x64 is assumed"); config->machine = AMD64; } config->wordsize = config->is64() ? 8 : 4; // Handle /safeseh, x86 only, on by default, except for mingw. if (config->machine == I386 && args.hasFlag(OPT_safeseh, OPT_safeseh_no, !config->mingw)) config->safeSEH = true; // Handle /functionpadmin for (auto *arg : args.filtered(OPT_functionpadmin, OPT_functionpadmin_opt)) parseFunctionPadMin(arg, config->machine); if (tar) tar->append("response.txt", createResponseFile(args, filePaths, ArrayRef(searchPaths).slice(1))); // Handle /largeaddressaware config->largeAddressAware = args.hasFlag( OPT_largeaddressaware, OPT_largeaddressaware_no, config->is64()); // Handle /highentropyva config->highEntropyVA = config->is64() && args.hasFlag(OPT_highentropyva, OPT_highentropyva_no, true); if (!config->dynamicBase && (config->machine == ARMNT || config->machine == ARM64)) error("/dynamicbase:no is not compatible with " + machineToStr(config->machine)); // Handle /export for (auto *arg : args.filtered(OPT_export)) { Export e = parseExport(arg->getValue()); if (config->machine == I386) { if (!isDecorated(e.name)) e.name = saver.save("_" + e.name); if (!e.extName.empty() && !isDecorated(e.extName)) e.extName = saver.save("_" + e.extName); } config->exports.push_back(e); } // Handle /def if (auto *arg = args.getLastArg(OPT_deffile)) { // parseModuleDefs mutates Config object. parseModuleDefs(arg->getValue()); } // Handle generation of import library from a def file. if (!args.hasArg(OPT_INPUT)) { fixupExports(); createImportLibrary(/*asLib=*/true); return; } // Windows specific -- if no /subsystem is given, we need to infer // that from entry point name. Must happen before /entry handling, // and after the early return when just writing an import library. if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) { config->subsystem = inferSubsystem(); if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) fatal("subsystem must be defined"); } // Handle /entry and /dll if (auto *arg = args.getLastArg(OPT_entry)) { config->entry = addUndefined(mangle(arg->getValue())); } else if (!config->entry && !config->noEntry) { if (args.hasArg(OPT_dll)) { StringRef s = (config->machine == I386) ? "__DllMainCRTStartup@12" : "_DllMainCRTStartup"; config->entry = addUndefined(s); } else { // Windows specific -- If entry point name is not given, we need to // infer that from user-defined entry name. StringRef s = findDefaultEntry(); if (s.empty()) fatal("entry point must be defined"); config->entry = addUndefined(s); log("Entry name inferred: " + s); } } // Handle /delayload for (auto *arg : args.filtered(OPT_delayload)) { config->delayLoads.insert(StringRef(arg->getValue()).lower()); if (config->machine == I386) { config->delayLoadHelper = addUndefined("___delayLoadHelper2@8"); } else { config->delayLoadHelper = addUndefined("__delayLoadHelper2"); } } // Set default image name if neither /out or /def set it. if (config->outputFile.empty()) { config->outputFile = getOutputPath((*args.filtered(OPT_INPUT).begin())->getValue()); } // Fail early if an output file is not writable. if (auto e = tryCreateFile(config->outputFile)) { error("cannot open output file " + config->outputFile + ": " + e.message()); return; } if (shouldCreatePDB) { // Put the PDB next to the image if no /pdb flag was passed. if (config->pdbPath.empty()) { config->pdbPath = config->outputFile; sys::path::replace_extension(config->pdbPath, ".pdb"); } // The embedded PDB path should be the absolute path to the PDB if no // /pdbaltpath flag was passed. if (config->pdbAltPath.empty()) { config->pdbAltPath = config->pdbPath; // It's important to make the path absolute and remove dots. This path // will eventually be written into the PE header, and certain Microsoft // tools won't work correctly if these assumptions are not held. sys::fs::make_absolute(config->pdbAltPath); sys::path::remove_dots(config->pdbAltPath); } else { // Don't do this earlier, so that Config->OutputFile is ready. parsePDBAltPath(config->pdbAltPath); } } // Set default image base if /base is not given. if (config->imageBase == uint64_t(-1)) config->imageBase = getDefaultImageBase(); symtab->addSynthetic(mangle("__ImageBase"), nullptr); if (config->machine == I386) { symtab->addAbsolute("___safe_se_handler_table", 0); symtab->addAbsolute("___safe_se_handler_count", 0); } symtab->addAbsolute(mangle("__guard_fids_count"), 0); symtab->addAbsolute(mangle("__guard_fids_table"), 0); symtab->addAbsolute(mangle("__guard_flags"), 0); symtab->addAbsolute(mangle("__guard_iat_count"), 0); symtab->addAbsolute(mangle("__guard_iat_table"), 0); symtab->addAbsolute(mangle("__guard_longjmp_count"), 0); symtab->addAbsolute(mangle("__guard_longjmp_table"), 0); // Needed for MSVC 2017 15.5 CRT. symtab->addAbsolute(mangle("__enclave_config"), 0); if (config->mingw) { symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0); symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0); symtab->addAbsolute(mangle("__CTOR_LIST__"), 0); symtab->addAbsolute(mangle("__DTOR_LIST__"), 0); } // This code may add new undefined symbols to the link, which may enqueue more // symbol resolution tasks, so we need to continue executing tasks until we // converge. do { // Windows specific -- if entry point is not found, // search for its mangled names. if (config->entry) mangleMaybe(config->entry); // Windows specific -- Make sure we resolve all dllexported symbols. for (Export &e : config->exports) { if (!e.forwardTo.empty()) continue; e.sym = addUndefined(e.name); if (!e.directives) e.symbolName = mangleMaybe(e.sym); } // Add weak aliases. Weak aliases is a mechanism to give remaining // undefined symbols final chance to be resolved successfully. for (auto pair : config->alternateNames) { StringRef from = pair.first; StringRef to = pair.second; Symbol *sym = symtab->find(from); if (!sym) continue; if (auto *u = dyn_cast(sym)) if (!u->weakAlias) u->weakAlias = symtab->addUndefined(to); } // If any inputs are bitcode files, the LTO code generator may create // references to library functions that are not explicit in the bitcode // file's symbol table. If any of those library functions are defined in a // bitcode file in an archive member, we need to arrange to use LTO to // compile those archive members by adding them to the link beforehand. if (!BitcodeFile::instances.empty()) for (const char *s : libcallRoutineNames) symtab->addLibcall(s); // Windows specific -- if __load_config_used can be resolved, resolve it. if (symtab->findUnderscore("_load_config_used")) addUndefined(mangle("_load_config_used")); } while (run()); if (args.hasArg(OPT_include_optional)) { // Handle /includeoptional for (auto *arg : args.filtered(OPT_include_optional)) - if (dyn_cast_or_null(symtab->find(arg->getValue()))) + if (dyn_cast_or_null(symtab->find(arg->getValue()))) addUndefined(arg->getValue()); while (run()); } if (config->mingw) { // Load any further object files that might be needed for doing automatic // imports. // // For cases with no automatically imported symbols, this iterates once // over the symbol table and doesn't do anything. // // For the normal case with a few automatically imported symbols, this // should only need to be run once, since each new object file imported // is an import library and wouldn't add any new undefined references, // but there's nothing stopping the __imp_ symbols from coming from a // normal object file as well (although that won't be used for the // actual autoimport later on). If this pass adds new undefined references, // we won't iterate further to resolve them. symtab->loadMinGWAutomaticImports(); run(); } // At this point, we should not have any symbols that cannot be resolved. // If we are going to do codegen for link-time optimization, check for // unresolvable symbols first, so we don't spend time generating code that // will fail to link anyway. if (!BitcodeFile::instances.empty() && !config->forceUnresolved) symtab->reportUnresolvable(); if (errorCount()) return; // Do LTO by compiling bitcode input files to a set of native COFF files then // link those files (unless -thinlto-index-only was given, in which case we // resolve symbols and write indices, but don't generate native code or link). symtab->addCombinedLTOObjects(); // If -thinlto-index-only is given, we should create only "index // files" and not object files. Index file creation is already done // in addCombinedLTOObject, so we are done if that's the case. if (config->thinLTOIndexOnly) return; // If we generated native object files from bitcode files, this resolves // references to the symbols we use from them. run(); // Resolve remaining undefined symbols and warn about imported locals. symtab->resolveRemainingUndefines(); if (errorCount()) return; config->hadExplicitExports = !config->exports.empty(); if (config->mingw) { // In MinGW, all symbols are automatically exported if no symbols // are chosen to be exported. maybeExportMinGWSymbols(args); // Make sure the crtend.o object is the last object file. This object // file can contain terminating section chunks that need to be placed // last. GNU ld processes files and static libraries explicitly in the // order provided on the command line, while lld will pull in needed // files from static libraries only after the last object file on the // command line. for (auto i = ObjFile::instances.begin(), e = ObjFile::instances.end(); i != e; i++) { ObjFile *file = *i; if (isCrtend(file->getName())) { ObjFile::instances.erase(i); ObjFile::instances.push_back(file); break; } } } // Windows specific -- when we are creating a .dll file, we also // need to create a .lib file. In MinGW mode, we only do that when the // -implib option is given explicitly, for compatibility with GNU ld. if (!config->exports.empty() || config->dll) { fixupExports(); if (!config->mingw || !config->implib.empty()) createImportLibrary(/*asLib=*/false); assignExportOrdinals(); } // Handle /output-def (MinGW specific). if (auto *arg = args.getLastArg(OPT_output_def)) writeDefFile(arg->getValue()); // Set extra alignment for .comm symbols for (auto pair : config->alignComm) { StringRef name = pair.first; uint32_t alignment = pair.second; Symbol *sym = symtab->find(name); if (!sym) { warn("/aligncomm symbol " + name + " not found"); continue; } // If the symbol isn't common, it must have been replaced with a regular // symbol, which will carry its own alignment. auto *dc = dyn_cast(sym); if (!dc) continue; CommonChunk *c = dc->getChunk(); c->setAlignment(std::max(c->getAlignment(), alignment)); } // Windows specific -- Create a side-by-side manifest file. if (config->manifest == Configuration::SideBySide) createSideBySideManifest(); // Handle /order. We want to do this at this moment because we // need a complete list of comdat sections to warn on nonexistent // functions. if (auto *arg = args.getLastArg(OPT_order)) parseOrderFile(arg->getValue()); // Identify unreferenced COMDAT sections. if (config->doGC) markLive(symtab->getChunks()); // Needs to happen after the last call to addFile(). convertResources(); // Identify identical COMDAT sections to merge them. if (config->doICF) { findKeepUniqueSections(); doICF(symtab->getChunks()); } // Write the result. writeResult(); // Stop early so we can print the results. Timer::root().stop(); if (config->showTiming) Timer::root().print(); } } // namespace coff } // namespace lld Index: lld/trunk/COFF/Symbols.h =================================================================== --- lld/trunk/COFF/Symbols.h (revision 370815) +++ lld/trunk/COFF/Symbols.h (revision 370816) @@ -1,444 +1,455 @@ //===- Symbols.h ------------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_SYMBOLS_H #define LLD_COFF_SYMBOLS_H #include "Chunks.h" #include "Config.h" #include "lld/Common/LLVM.h" #include "lld/Common/Memory.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include #include #include namespace lld { std::string toString(coff::Symbol &b); // There are two different ways to convert an Archive::Symbol to a string: // One for Microsoft name mangling and one for Itanium name mangling. // Call the functions toCOFFString and toELFString, not just toString. std::string toCOFFString(const coff::Archive::Symbol &b); namespace coff { using llvm::object::Archive; using llvm::object::COFFSymbolRef; using llvm::object::coff_import_header; using llvm::object::coff_symbol_generic; class ArchiveFile; class InputFile; class ObjFile; class SymbolTable; // The base class for real symbol classes. class Symbol { public: enum Kind { // The order of these is significant. We start with the regular defined // symbols as those are the most prevalent and the zero tag is the cheapest // to set. Among the defined kinds, the lower the kind is preferred over // the higher kind when testing whether one symbol should take precedence // over another. DefinedRegularKind = 0, DefinedCommonKind, DefinedLocalImportKind, DefinedImportThunkKind, DefinedImportDataKind, DefinedAbsoluteKind, DefinedSyntheticKind, UndefinedKind, - LazyKind, + LazyArchiveKind, + LazyObjectKind, LastDefinedCOFFKind = DefinedCommonKind, LastDefinedKind = DefinedSyntheticKind, }; Kind kind() const { return static_cast(symbolKind); } // Returns the symbol name. StringRef getName(); void replaceKeepingName(Symbol *other, size_t size); // Returns the file from which this symbol was created. InputFile *getFile(); // Indicates that this symbol will be included in the final image. Only valid // after calling markLive. bool isLive() const; + bool isLazy() const { + return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; + } + protected: friend SymbolTable; explicit Symbol(Kind k, StringRef n = "") : symbolKind(k), isExternal(true), isCOMDAT(false), writtenToSymtab(false), pendingArchiveLoad(false), isGCRoot(false), isRuntimePseudoReloc(false), nameSize(n.size()), nameData(n.empty() ? nullptr : n.data()) {} const unsigned symbolKind : 8; unsigned isExternal : 1; public: // This bit is used by the \c DefinedRegular subclass. unsigned isCOMDAT : 1; // This bit is used by Writer::createSymbolAndStringTable() to prevent // symbols from being written to the symbol table more than once. unsigned writtenToSymtab : 1; // True if this symbol was referenced by a regular (non-bitcode) object. unsigned isUsedInRegularObj : 1; // True if we've seen both a lazy and an undefined symbol with this symbol // name, which means that we have enqueued an archive member load and should // not load any more archive members to resolve the same symbol. unsigned pendingArchiveLoad : 1; /// True if we've already added this symbol to the list of GC roots. unsigned isGCRoot : 1; unsigned isRuntimePseudoReloc : 1; protected: // Symbol name length. Assume symbol lengths fit in a 32-bit integer. uint32_t nameSize; const char *nameData; }; // The base class for any defined symbols, including absolute symbols, // etc. class Defined : public Symbol { public: Defined(Kind k, StringRef n) : Symbol(k, n) {} static bool classof(const Symbol *s) { return s->kind() <= LastDefinedKind; } // Returns the RVA (relative virtual address) of this symbol. The // writer sets and uses RVAs. uint64_t getRVA(); // Returns the chunk containing this symbol. Absolute symbols and __ImageBase // do not have chunks, so this may return null. Chunk *getChunk(); }; // Symbols defined via a COFF object file or bitcode file. For COFF files, this // stores a coff_symbol_generic*, and names of internal symbols are lazily // loaded through that. For bitcode files, Sym is nullptr and the name is stored // as a decomposed StringRef. class DefinedCOFF : public Defined { friend Symbol; public: DefinedCOFF(Kind k, InputFile *f, StringRef n, const coff_symbol_generic *s) : Defined(k, n), file(f), sym(s) {} static bool classof(const Symbol *s) { return s->kind() <= LastDefinedCOFFKind; } InputFile *getFile() { return file; } COFFSymbolRef getCOFFSymbol(); InputFile *file; protected: const coff_symbol_generic *sym; }; // Regular defined symbols read from object file symbol tables. class DefinedRegular : public DefinedCOFF { public: DefinedRegular(InputFile *f, StringRef n, bool isCOMDAT, bool isExternal = false, const coff_symbol_generic *s = nullptr, SectionChunk *c = nullptr) : DefinedCOFF(DefinedRegularKind, f, n, s), data(c ? &c->repl : nullptr) { this->isExternal = isExternal; this->isCOMDAT = isCOMDAT; } static bool classof(const Symbol *s) { return s->kind() == DefinedRegularKind; } uint64_t getRVA() const { return (*data)->getRVA() + sym->Value; } SectionChunk *getChunk() const { return *data; } uint32_t getValue() const { return sym->Value; } SectionChunk **data; }; class DefinedCommon : public DefinedCOFF { public: DefinedCommon(InputFile *f, StringRef n, uint64_t size, const coff_symbol_generic *s = nullptr, CommonChunk *c = nullptr) : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) { this->isExternal = true; } static bool classof(const Symbol *s) { return s->kind() == DefinedCommonKind; } uint64_t getRVA() { return data->getRVA(); } CommonChunk *getChunk() { return data; } private: friend SymbolTable; uint64_t getSize() const { return size; } CommonChunk *data; uint64_t size; }; // Absolute symbols. class DefinedAbsolute : public Defined { public: DefinedAbsolute(StringRef n, COFFSymbolRef s) : Defined(DefinedAbsoluteKind, n), va(s.getValue()) { isExternal = s.isExternal(); } DefinedAbsolute(StringRef n, uint64_t v) : Defined(DefinedAbsoluteKind, n), va(v) {} static bool classof(const Symbol *s) { return s->kind() == DefinedAbsoluteKind; } uint64_t getRVA() { return va - config->imageBase; } void setVA(uint64_t v) { va = v; } // Section index relocations against absolute symbols resolve to // this 16 bit number, and it is the largest valid section index // plus one. This variable keeps it. static uint16_t numOutputSections; private: uint64_t va; }; // This symbol is used for linker-synthesized symbols like __ImageBase and // __safe_se_handler_table. class DefinedSynthetic : public Defined { public: explicit DefinedSynthetic(StringRef name, Chunk *c) : Defined(DefinedSyntheticKind, name), c(c) {} static bool classof(const Symbol *s) { return s->kind() == DefinedSyntheticKind; } // A null chunk indicates that this is __ImageBase. Otherwise, this is some // other synthesized chunk, like SEHTableChunk. uint32_t getRVA() { return c ? c->getRVA() : 0; } Chunk *getChunk() { return c; } private: Chunk *c; }; // This class represents a symbol defined in an archive file. It is // created from an archive file header, and it knows how to load an // object file from an archive to replace itself with a defined -// symbol. If the resolver finds both Undefined and Lazy for -// the same name, it will ask the Lazy to load a file. -class Lazy : public Symbol { +// symbol. If the resolver finds both Undefined and LazyArchive for +// the same name, it will ask the LazyArchive to load a file. +class LazyArchive : public Symbol { public: - Lazy(ArchiveFile *f, const Archive::Symbol s) - : Symbol(LazyKind, s.getName()), file(f), sym(s) {} + LazyArchive(ArchiveFile *f, const Archive::Symbol s) + : Symbol(LazyArchiveKind, s.getName()), file(f), sym(s) {} - static bool classof(const Symbol *s) { return s->kind() == LazyKind; } + static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } MemoryBufferRef getMemberBuffer(); ArchiveFile *file; - -private: - friend SymbolTable; - -private: const Archive::Symbol sym; }; +class LazyObject : public Symbol { +public: + LazyObject(LazyObjFile *f, StringRef n) + : Symbol(LazyObjectKind, n), file(f) {} + static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } + LazyObjFile *file; +}; + // Undefined symbols. class Undefined : public Symbol { public: explicit Undefined(StringRef n) : Symbol(UndefinedKind, n) {} static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } // An undefined symbol can have a fallback symbol which gives an // undefined symbol a second chance if it would remain undefined. // If it remains undefined, it'll be replaced with whatever the // Alias pointer points to. Symbol *weakAlias = nullptr; // If this symbol is external weak, try to resolve it to a defined // symbol by searching the chain of fallback symbols. Returns the symbol if // successful, otherwise returns null. Defined *getWeakAlias(); }; // Windows-specific classes. // This class represents a symbol imported from a DLL. This has two // names for internal use and external use. The former is used for // name resolution, and the latter is used for the import descriptor // table in an output. The former has "__imp_" prefix. class DefinedImportData : public Defined { public: DefinedImportData(StringRef n, ImportFile *f) : Defined(DefinedImportDataKind, n), file(f) { } static bool classof(const Symbol *s) { return s->kind() == DefinedImportDataKind; } uint64_t getRVA() { return file->location->getRVA(); } Chunk *getChunk() { return file->location; } void setLocation(Chunk *addressTable) { file->location = addressTable; } StringRef getDLLName() { return file->dllName; } StringRef getExternalName() { return file->externalName; } uint16_t getOrdinal() { return file->hdr->OrdinalHint; } ImportFile *file; }; // This class represents a symbol for a jump table entry which jumps // to a function in a DLL. Linker are supposed to create such symbols // without "__imp_" prefix for all function symbols exported from // DLLs, so that you can call DLL functions as regular functions with // a regular name. A function pointer is given as a DefinedImportData. class DefinedImportThunk : public Defined { public: DefinedImportThunk(StringRef name, DefinedImportData *s, uint16_t machine); static bool classof(const Symbol *s) { return s->kind() == DefinedImportThunkKind; } uint64_t getRVA() { return data->getRVA(); } Chunk *getChunk() { return data; } DefinedImportData *wrappedSym; private: Chunk *data; }; // If you have a symbol "foo" in your object file, a symbol name // "__imp_foo" becomes automatically available as a pointer to "foo". // This class is for such automatically-created symbols. // Yes, this is an odd feature. We didn't intend to implement that. // This is here just for compatibility with MSVC. class DefinedLocalImport : public Defined { public: DefinedLocalImport(StringRef n, Defined *s) : Defined(DefinedLocalImportKind, n), data(make(s)) {} static bool classof(const Symbol *s) { return s->kind() == DefinedLocalImportKind; } uint64_t getRVA() { return data->getRVA(); } Chunk *getChunk() { return data; } private: LocalImportChunk *data; }; inline uint64_t Defined::getRVA() { switch (kind()) { case DefinedAbsoluteKind: return cast(this)->getRVA(); case DefinedSyntheticKind: return cast(this)->getRVA(); case DefinedImportDataKind: return cast(this)->getRVA(); case DefinedImportThunkKind: return cast(this)->getRVA(); case DefinedLocalImportKind: return cast(this)->getRVA(); case DefinedCommonKind: return cast(this)->getRVA(); case DefinedRegularKind: return cast(this)->getRVA(); - case LazyKind: + case LazyArchiveKind: + case LazyObjectKind: case UndefinedKind: llvm_unreachable("Cannot get the address for an undefined symbol."); } llvm_unreachable("unknown symbol kind"); } inline Chunk *Defined::getChunk() { switch (kind()) { case DefinedRegularKind: return cast(this)->getChunk(); case DefinedAbsoluteKind: return nullptr; case DefinedSyntheticKind: return cast(this)->getChunk(); case DefinedImportDataKind: return cast(this)->getChunk(); case DefinedImportThunkKind: return cast(this)->getChunk(); case DefinedLocalImportKind: return cast(this)->getChunk(); case DefinedCommonKind: return cast(this)->getChunk(); - case LazyKind: + case LazyArchiveKind: + case LazyObjectKind: case UndefinedKind: llvm_unreachable("Cannot get the chunk of an undefined symbol."); } llvm_unreachable("unknown symbol kind"); } // A buffer class that is large enough to hold any Symbol-derived // object. We allocate memory using this class and instantiate a symbol // using the placement new. union SymbolUnion { alignas(DefinedRegular) char a[sizeof(DefinedRegular)]; alignas(DefinedCommon) char b[sizeof(DefinedCommon)]; alignas(DefinedAbsolute) char c[sizeof(DefinedAbsolute)]; alignas(DefinedSynthetic) char d[sizeof(DefinedSynthetic)]; - alignas(Lazy) char e[sizeof(Lazy)]; + alignas(LazyArchive) char e[sizeof(LazyArchive)]; alignas(Undefined) char f[sizeof(Undefined)]; alignas(DefinedImportData) char g[sizeof(DefinedImportData)]; alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)]; alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)]; + alignas(LazyObject) char j[sizeof(LazyObject)]; }; template void replaceSymbol(Symbol *s, ArgT &&... arg) { static_assert(std::is_trivially_destructible(), "Symbol types must be trivially destructible"); static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small"); static_assert(alignof(T) <= alignof(SymbolUnion), "SymbolUnion not aligned enough"); assert(static_cast(static_cast(nullptr)) == nullptr && "Not a Symbol"); new (s) T(std::forward(arg)...); } } // namespace coff } // namespace lld #endif