Index: lld/trunk/COFF/Config.h =================================================================== --- lld/trunk/COFF/Config.h +++ lld/trunk/COFF/Config.h @@ -99,9 +99,10 @@ bool ForceUnresolved = false; bool Debug = false; bool DebugDwarf = false; - bool DebugGHashes = false; + bool DebugGHashes = true; bool DebugSymtab = false; bool ShowTiming = false; + bool ShowSummary = false; unsigned DebugTypes = static_cast(DebugType::None); std::vector NatvisFiles; llvm::SmallString<128> PDBAltPath; @@ -197,6 +198,7 @@ bool MinGW = false; bool WarnMissingOrderSymbol = true; bool WarnLocallyDefinedImported = true; + bool WarnDebugInfoUnusable = true; bool Incremental = true; bool IntegrityCheck = false; bool KillAt = false; Index: lld/trunk/COFF/Driver.cpp =================================================================== --- lld/trunk/COFF/Driver.cpp +++ lld/trunk/COFF/Driver.cpp @@ -20,7 +20,9 @@ #include "lld/Common/Driver.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Threads.h" #include "lld/Common/Timer.h" +#include "lld/Common/Summary.h" #include "lld/Common/Version.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSwitch.h" @@ -596,6 +598,22 @@ return DebugTypes; } +static void parseHasher(const opt::InputArgList &Args) { + auto *Arg = Args.getLastArg(OPT_hasher); + if (!Arg) + return; + std::string ArgL = StringRef(Arg->getValue()).lower(); + + auto Hasher = codeview::GloballyHashedType::H; + if (ArgL == "md5") + Hasher = codeview::GloballyHashedType::HashType::MD5; + else if (ArgL == "sha1") + Hasher = codeview::GloballyHashedType::HashType::SHA1; + else if (ArgL == "cityhash") + Hasher = codeview::GloballyHashedType::HashType::CityHash; + codeview::GloballyHashedType::H = Hasher; +} + static std::string getMapFile(const opt::InputArgList &Args) { auto *Arg = Args.getLastArg(OPT_lldmap, OPT_lldmap_file); if (!Arg) @@ -943,6 +961,9 @@ if (Args.hasArg(OPT_show_timing)) Config->ShowTiming = true; + if (Args.hasArg(OPT_summary)) + Config->ShowSummary = true; + ScopedTimer T(Timer::root()); // Handle --version, which is an lld extension. This option is a bit odd // because it doesn't start with "/", but we deliberately chose "--" to @@ -986,11 +1007,17 @@ // Handle /ignore for (auto *Arg : Args.filtered(OPT_ignore)) { - if (StringRef(Arg->getValue()) == "4037") - Config->WarnMissingOrderSymbol = false; - else if (StringRef(Arg->getValue()) == "4217") - Config->WarnLocallyDefinedImported = false; - // Other warning numbers are ignored. + SmallVector Vec; + StringRef(Arg->getValue()).split(Vec, ','); + for (StringRef S : Vec) { + if (S == "4037") + Config->WarnMissingOrderSymbol = false; + else if (S == "4099") + Config->WarnDebugInfoUnusable = false; + else if (S == "4217") + Config->WarnLocallyDefinedImported = false; + // Other warning numbers are ignored. + } } // Handle /out @@ -1018,6 +1045,9 @@ Config->Incremental = true; } + // Handle /hasher + parseHasher(Args); + // Handle /debugtype Config->DebugTypes = parseDebugTypes(Args); @@ -1289,9 +1319,11 @@ Config->TerminalServerAware = !Config->DLL && Args.hasFlag(OPT_tsaware, OPT_tsaware_no, true); Config->DebugDwarf = Debug == DebugKind::Dwarf; - Config->DebugGHashes = Debug == DebugKind::GHash; + //Config->DebugGHashes = Debug == DebugKind::GHash; Config->DebugSymtab = Debug == DebugKind::Symtab; + lld::ThreadsEnabled = Args.hasFlag(OPT_threads, OPT_threads_no, true); + Config->MapFile = getMapFile(Args); if (Config->Incremental && Args.hasArg(OPT_profile)) { @@ -1669,6 +1701,8 @@ Timer::root().stop(); if (Config->ShowTiming) Timer::root().print(); + if (Config->ShowSummary) + Summary::print(Config->ShowTiming); } } // namespace coff Index: lld/trunk/COFF/InputFiles.h =================================================================== --- lld/trunk/COFF/InputFiles.h +++ lld/trunk/COFF/InputFiles.h @@ -15,6 +15,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" @@ -28,7 +29,7 @@ namespace pdb { class DbiModuleDescriptorBuilder; } -} +} // namespace llvm namespace lld { namespace coff { @@ -38,10 +39,10 @@ using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; using llvm::COFF::MachineTypes; using llvm::object::Archive; -using llvm::object::COFFObjectFile; -using llvm::object::COFFSymbolRef; using llvm::object::coff_import_header; using llvm::object::coff_section; +using llvm::object::COFFObjectFile; +using llvm::object::COFFSymbolRef; class Chunk; class Defined; @@ -55,7 +56,14 @@ // The root class of input files. class InputFile { public: - enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind }; + enum Kind { + ArchiveKind, + ObjectKind, + ImportKind, + BitcodeKind, + PDBKind, + PCHKind + }; Kind kind() const { return FileKind; } virtual ~InputFile() {} @@ -106,6 +114,7 @@ // .obj or .o file. This may be a member of an archive file. class ObjFile : public InputFile { public: + explicit ObjFile(Kind K, MemoryBufferRef M) : InputFile(K, M) {} explicit ObjFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {} static bool classof(const InputFile *F) { return F->kind() == ObjectKind; } void parse() override; @@ -119,9 +128,7 @@ // Returns a Symbol object for the SymbolIndex'th symbol in the // underlying object file. - Symbol *getSymbol(uint32_t SymbolIndex) { - return Symbols[SymbolIndex]; - } + Symbol *getSymbol(uint32_t SymbolIndex) { return Symbols[SymbolIndex]; } // Returns the underlying COFF file. COFFObjectFile *getCOFFObj() { return COFFObj.get(); } @@ -156,6 +163,8 @@ // precompiled object. Any difference indicates out-of-date objects. llvm::Optional PCHSignature; + std::vector OwnedHashes; + private: void initializeChunks(); void initializeSymbols(); Index: lld/trunk/COFF/Options.td =================================================================== --- lld/trunk/COFF/Options.td +++ lld/trunk/COFF/Options.td @@ -165,6 +165,13 @@ def lldmap : F<"lldmap">; def lldmap_file : Joined<["/", "-"], "lldmap:">; def show_timing : F<"time">; +def summary : F<"summary">; + +def hasher : P<"hasher", "Select the hashing algorithm for PDB types dedup; either 'MD5', 'SHA1', 'CityHash'">; + +defm threads: B<"threads", + "Run the linker multi-threaded (default)", + "Do not run the linker multi-threaded">; //============================================================================== // The flags below do nothing. They are defined only for link.exe compatibility. Index: lld/trunk/COFF/PDB.cpp =================================================================== --- lld/trunk/COFF/PDB.cpp +++ lld/trunk/COFF/PDB.cpp @@ -15,6 +15,8 @@ #include "Symbols.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Summary.h" +#include "lld/Common/Threads.h" #include "lld/Common/Timer.h" #include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" @@ -53,7 +55,7 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/JamCRC.h" -#include "llvm/Support/Parallel.h" +#include "llvm/Support/Mutex.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" #include @@ -70,12 +72,17 @@ static Timer TotalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root()); static Timer AddObjectsTimer("Add Objects", TotalPdbLinkTimer); +static Timer GlobalHashTimer("Global hashing", AddObjectsTimer); static Timer TypeMergingTimer("Type Merging", AddObjectsTimer); +static Timer HashtableLookupTimer("Hashtable lookup", TypeMergingTimer); static Timer SymbolMergingTimer("Symbol Merging", AddObjectsTimer); static Timer GlobalsLayoutTimer("Globals Stream Layout", TotalPdbLinkTimer); static Timer TpiStreamLayoutTimer("TPI Stream Layout", TotalPdbLinkTimer); static Timer DiskCommitTimer("Commit to Disk", TotalPdbLinkTimer); +void startGlobalHashTimer() { HashtableLookupTimer.start(); } +void stopGlobalHashTimer() { HashtableLookupTimer.stop(); } + namespace { /// Map from type index and item index in a type server PDB to the /// corresponding index in the destination PDB. @@ -86,6 +93,50 @@ bool IsPrecompiledTypeMap = false; }; +// A PDB type server, which might be a dependency of another OBJ +class PDBDependency : public InputFile { +public: + explicit PDBDependency(MemoryBufferRef M) : InputFile(PDBKind, M) { + // Mark this map as a type server map. + IndexMap.IsTypeServerMap = true; + } + static bool classof(const InputFile *F) { return F->kind() == PDBKind; } + + void parse() override {} + + static std::map Instances; + + std::unique_ptr Session; + CVIndexMap IndexMap; + + std::vector TpiHashes; + std::vector IpiHashes; +}; + +std::map PDBDependency::Instances; + +// A PCH (precompiled header OBJ, which might be a dependency of another OBJ +class PCHDependency : public InputFile { +public: + explicit PCHDependency(ObjFile *O) : InputFile(PCHKind, O->MB) { + RefObj = O; + // Mark this map as a precompiled types map. + IndexMap.IsPrecompiledTypeMap = true; + } + static bool classof(const InputFile *F) { return F->kind() == PCHKind; } + + void parse() override {} + + ObjFile *refObj() const { return RefObj; } + + static std::map Instances; + + ObjFile *RefObj; + CVIndexMap IndexMap; +}; + +std::map PCHDependency::Instances; + class DebugSHandler; class PDBLinker { @@ -110,10 +161,12 @@ /// Link CodeView from each object file in the symbol table into the PDB. void addObjectsToPDB(); + Error mergeAllOBJ(); + /// Link CodeView from a single object file into the target (output) PDB. /// When a precompiled headers object is linked, its TPI map might be provided /// externally. - void addObjFile(ObjFile *File, CVIndexMap *ExternIndexMap = nullptr); + void mergeSymbols(ObjFile *File, CVIndexMap *IndexMap); /// Produce a mapping from the type and item indices used in the object /// file to those in the destination PDB. @@ -126,19 +179,31 @@ /// If the object does not use a type server PDB (compiled with /Z7), we merge /// all the type and item records from the .debug$S stream and fill in the /// caller-provided ObjectIndexMap. - Expected mergeDebugT(ObjFile *File, - CVIndexMap *ObjectIndexMap); + + Expected computeHash(InputFile *File); + + Error openDependencies(InputFile *File); + + Expected mergeTypes(InputFile *File, CVIndexMap &TIStorage); + + void mergeTypeStream(ObjFile *File, CVTypeArray &Types, + CVIndexMap &ObjectIndexMap); + + template + unsigned computeTypeHashes( + ObjFile *File, Range &&Types, + ArrayRef PrecompHashes = {}); /// Reads and makes available a PDB. - Expected maybeMergeTypeServerPDB(ObjFile *File, - const CVType &FirstType); + Expected openPDB(ObjFile *File, const CVType &FirstType); + unsigned computePDBHashes(PDBDependency *D); + void mergePDB(PDBDependency *D); /// Merges a precompiled headers TPI map into the current TPI map. The /// precompiled headers object will also be loaded and remapped in the /// process. - Expected - mergeInPrecompHeaderObj(ObjFile *File, const CVType &FirstType, - CVIndexMap *ObjectIndexMap); + Error mergeInPCH(ObjFile *File, CVTypeArray &Stream, + CVIndexMap &ObjectIndexMap); /// Reads and makes available a precompiled headers object. /// @@ -149,14 +214,9 @@ /// /// If the precompiled headers object was already loaded, this function will /// simply return its (remapped) TPI map. - Expected aquirePrecompObj(ObjFile *File, - PrecompRecord Precomp); - - /// Adds a precompiled headers object signature -> TPI mapping. - std::pair - registerPrecompiledHeaders(uint32_t Signature); + Expected openPCH(ObjFile *File); - void mergeSymbolRecords(ObjFile *File, const CVIndexMap &IndexMap, + void mergeSymbolRecords(ObjFile *File, const CVIndexMap *IndexMap, std::vector &StringTableRefs, BinaryStreamRef SymData); @@ -206,23 +266,13 @@ llvm::SmallString<128> NativePath; - /// A list of other PDBs which are loaded during the linking process and which - /// we need to keep around since the linking operation may reference pointers - /// inside of these PDBs. - llvm::SmallVector, 2> LoadedPDBs; - std::vector SectionMap; - /// Type index mappings of type server PDBs that we've loaded so far. - std::map TypeServerIndexMappings; + std::atomic TotalTypesRecords{}; + std::atomic TotalTypesRecordsSize{}; - /// Type index mappings of precompiled objects type map that we've loaded so - /// far. - std::map PrecompTypeIndexMappings; - - /// List of TypeServer PDBs which cannot be loaded. - /// Cached to prevent repeated load attempts. - std::map MissingTypeServerPDBs; + std::atomic GlobalSymbolsCount{}; + std::atomic ModulesSymbolsCount{}; }; class DebugSHandler { @@ -232,7 +282,7 @@ ObjFile &File; /// The result of merging type indices. - const CVIndexMap &IndexMap; + const CVIndexMap *IndexMap; /// The DEBUG_S_STRINGTABLE subsection. These strings are referred to by /// index from other records in the .debug$S section. All of these strings @@ -262,13 +312,13 @@ std::vector StringTableReferences; public: - DebugSHandler(PDBLinker &Linker, ObjFile &File, const CVIndexMap &IndexMap) + DebugSHandler(PDBLinker &Linker, ObjFile &File, const CVIndexMap *IndexMap) : Linker(Linker), File(File), IndexMap(IndexMap) {} void handleDebugS(lld::coff::SectionChunk &DebugS); void finish(); }; -} +} // namespace // Visual Studio's debugger requires absolute paths in various places in the // PDB to work without additional configuration: @@ -381,12 +431,15 @@ // must be same for all objects which depend on the precompiled object. // Recompiling the precompiled headers will generate a new PCH key and thus // invalidate all the dependent objects. -static uint32_t extractPCHSignature(ObjFile *File) { +static void extractPCHSignature(ObjFile *File) { + if (File->PCHSignature) + return; + auto DbgIt = find_if(File->getDebugChunks(), [](SectionChunk *C) { return C->getSectionName() == ".debug$S"; }); - if (!DbgIt) - return 0; + if (DbgIt == File->getDebugChunks().end()) + return; ArrayRef Contents = consumeDebugMagic((*DbgIt)->getContents(), ".debug$S"); @@ -404,111 +457,159 @@ consumeError(Sym.takeError()); continue; } - if (auto ObjName = SymbolDeserializer::deserializeAs(Sym.get())) - return ObjName->Signature; + if (auto ObjName = + SymbolDeserializer::deserializeAs(Sym.get())) { + File->PCHSignature.emplace(ObjName->Signature); + return; + } } - return 0; + return; } -Expected -PDBLinker::mergeDebugT(ObjFile *File, CVIndexMap *ObjectIndexMap) { - ScopedTimer T(TypeMergingTimer); - - bool IsPrecompiledHeader = false; - - ArrayRef Data = getDebugSection(File, ".debug$T"); - if (Data.empty()) { - // Try again, Microsoft precompiled headers use .debug$P instead of - // .debug$T - Data = getDebugSection(File, ".debug$P"); - IsPrecompiledHeader = true; +template +unsigned PDBLinker::computeTypeHashes( + ObjFile *File, Range &&Types, + ArrayRef PrecompHashes) { + if (Config->DebugGHashes) { + Optional> DebugH = getDebugH(File); + if (!DebugH) { + assert(File->OwnedHashes.empty()); + uint64_t LocalRecordsBytes{}; + + File->OwnedHashes = GloballyHashedType::hashTypes( + Types, LocalRecordsBytes, PrecompHashes); + + TotalTypesRecordsSize += LocalRecordsBytes; + + return File->OwnedHashes.size() - PrecompHashes.size(); + } } - if (Data.empty()) - return *ObjectIndexMap; // no debug info + return 0; +} - // Precompiled headers objects need to save the index map for further - // reference by other objects which use the precompiled headers. - if (IsPrecompiledHeader) { - uint32_t PCHSignature = extractPCHSignature(File); - if (PCHSignature == 0) - fatal("No signature found for the precompiled headers OBJ (" + - File->getName() + ")"); +namespace { +enum TypeStreamKind { NoTypeStream, RegularOBJ, PCH, UsingPCH, PDB, UsingPDB }; - // When a precompiled headers object comes first on the command-line, we - // update the mapping here. Otherwise, if an object referencing the - // precompiled headers object comes first, the mapping is created in - // aquirePrecompObj(), thus we would skip this block. - if (!ObjectIndexMap->IsPrecompiledTypeMap) { - auto R = registerPrecompiledHeaders(PCHSignature); - if (R.second) - fatal( - "A precompiled headers OBJ with the same signature was already " - "provided! (" + - File->getName() + ")"); +struct Ident { + TypeStreamKind Kind; + CVTypeArray Types; +}; +} // namespace - ObjectIndexMap = &R.first; - } - } +static Ident identifyTypeStream(InputFile *File) { + if (isa(File)) + return {PDB, {}}; + if (isa(File)) + return {PCH, identifyTypeStream(cast(File)->refObj()).Types}; + + ObjFile *Obj = cast(File); + + extractPCHSignature(Obj); + + bool IsPCH = false; + + auto Data = getDebugSection(Obj, ".debug$P"); + if (!Data.empty()) + IsPCH = true; + else + Data = getDebugSection(Obj, ".debug$T"); + + if (Data.empty()) + return {NoTypeStream, {}}; - BinaryByteStream Stream(Data, support::little); CVTypeArray Types; - BinaryStreamReader Reader(Stream); + BinaryStreamReader Reader(Data, support::little); if (auto EC = Reader.readArray(Types, Reader.getLength())) fatal("Reader::readArray failed: " + toString(std::move(EC))); auto FirstType = Types.begin(); if (FirstType == Types.end()) - return *ObjectIndexMap; + return {NoTypeStream, {}}; - if (FirstType->kind() == LF_TYPESERVER2) { - // Look through type servers. If we've already seen this type server, - // don't merge any type information. - return maybeMergeTypeServerPDB(File, *FirstType); + if (IsPCH) { + return {PCH, Types}; + } else if (FirstType->kind() == LF_TYPESERVER2) { + return {UsingPDB, Types}; } else if (FirstType->kind() == LF_PRECOMP) { - // This object was compiled with /Yu, so process the corresponding - // precompiled headers object (/Yc) first. Some type indices in the current - // object are referencing data in the precompiled headers object, so we need - // both to be loaded. - auto E = mergeInPrecompHeaderObj(File, *FirstType, ObjectIndexMap); + return {UsingPCH, Types}; + } + return {RegularOBJ, Types}; +} + +Error PDBLinker::openDependencies(InputFile *File) { + auto Stream = identifyTypeStream(File); + if (Stream.Kind == UsingPDB) { + auto E = openPDB(cast(File), *Stream.Types.begin()); + if (!E) + return E.takeError(); + } else if (Stream.Kind == PCH) { + auto E = openPCH(cast(File)); if (!E) return E.takeError(); + } + return Error::success(); +} - // Drop LF_PRECOMP record from the input stream, as it needs to be replaced - // with the precompiled headers object type stream. - // Note that we can't just call Types.drop_front(), as we explicitly want to - // rebase the stream. - Types.setUnderlyingStream( - Types.getUnderlyingStream().drop_front(FirstType->RecordData.size())); +Expected PDBLinker::mergeTypes(InputFile *File, + CVIndexMap &TIStorage) { + ScopedTimer T1(TypeMergingTimer); + + auto Stream = identifyTypeStream(File); + if (Stream.Kind == NoTypeStream) + return nullptr; + + if (Stream.Kind == UsingPCH) { + if (auto E = mergeInPCH(cast(File), Stream.Types, TIStorage)) + return std::move(E); + mergeTypeStream(cast(File), Stream.Types, TIStorage); + } else if (Stream.Kind == RegularOBJ) { + mergeTypeStream(cast(File), Stream.Types, TIStorage); + } else if (Stream.Kind == PCH) { + PCHDependency *D = cast(File); + mergeTypeStream(D->refObj(), Stream.Types, D->IndexMap); + return &D->IndexMap; + } else if (Stream.Kind == PDB) { + PDBDependency *D = cast(File); + mergePDB(D); + return &D->IndexMap; + } else if (Stream.Kind == UsingPDB) { + auto E = openPDB(cast(File), *Stream.Types.begin()); + if (!E) + return E.takeError(); + PDBDependency *D = *E; + if (D) + return &D->IndexMap; } + return &TIStorage; +} - // Fill in the temporary, caller-provided ObjectIndexMap. +void PDBLinker::mergeTypeStream(ObjFile *File, CVTypeArray &Types, + CVIndexMap &ObjectIndexMap) { if (Config->DebugGHashes) { ArrayRef Hashes; - std::vector OwnedHashes; if (Optional> DebugH = getDebugH(File)) Hashes = getHashesFromDebugH(*DebugH); else { - OwnedHashes = GloballyHashedType::hashTypes(Types); - Hashes = OwnedHashes; + Hashes = File->OwnedHashes; } if (auto Err = mergeTypeAndIdRecords(GlobalIDTable, GlobalTypeTable, - ObjectIndexMap->TPIMap, Types, Hashes, + ObjectIndexMap.TPIMap, Types, Hashes, File->PCHSignature)) fatal("codeview::mergeTypeAndIdRecords failed: " + toString(std::move(Err))); + } else { if (auto Err = - mergeTypeAndIdRecords(IDTable, TypeTable, ObjectIndexMap->TPIMap, + mergeTypeAndIdRecords(IDTable, TypeTable, ObjectIndexMap.TPIMap, Types, File->PCHSignature)) fatal("codeview::mergeTypeAndIdRecords failed: " + toString(std::move(Err))); } - return *ObjectIndexMap; } -static Expected> -tryToLoadPDB(const codeview::GUID &GuidFromObj, StringRef TSPath) { +static Expected +tryToLoadPDB(const llvm::codeview::GUID &GuidFromObj, StringRef TSPath) { // Ensure the file exists before anything else. We want to return ENOENT, // "file not found", even if the path points to a removable device (in which // case the return message would be EAGAIN, "resource unavailable try again") @@ -520,6 +621,7 @@ if (!MBOrErr) return errorCodeToError(MBOrErr.getError()); + MemoryBufferRef Buffer((*MBOrErr)->getMemBufferRef()); std::unique_ptr ThisSession; if (auto EC = pdb::NativeSession::createFromPdb( MemoryBuffer::getMemBuffer(Driver->takeBuffer(std::move(*MBOrErr)), @@ -539,46 +641,40 @@ // PDB file doesn't mean it matches. For it to match the InfoStream's GUID // must match the GUID specified in the TypeServer2 record. if (ExpectedInfo->getGuid() != GuidFromObj) - return make_error(pdb::pdb_error_code::signature_out_of_date); + return make_error( + pdb::pdb_error_code::signature_out_of_date); - return std::move(NS); + PDBDependency *D = make(Buffer); + D->Session = std::move(NS); + return D; } -Expected -PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, const CVType &FirstType) { +// Look through type servers. If we've already seen this type server, +// don't merge any type information. +Expected PDBLinker::openPDB(ObjFile *DependentFile, + const CVType &FirstType) { TypeServer2Record TS; if (auto EC = TypeDeserializer::deserializeAs(const_cast(FirstType), TS)) fatal("error reading record: " + toString(std::move(EC))); - const codeview::GUID &TSId = TS.getGuid(); + const auto &TSId = TS.getGuid(); StringRef TSPath = TS.getName(); - // First, check if the PDB has previously failed to load. - auto PrevErr = MissingTypeServerPDBs.find(TSId); - if (PrevErr != MissingTypeServerPDBs.end()) - return createFileError( - TSPath, - make_error(PrevErr->second, inconvertibleErrorCode())); - - // Second, check if we already loaded a PDB with this GUID. Return the type - // index mapping if we have it. - auto Insertion = TypeServerIndexMappings.insert({TSId, CVIndexMap()}); - CVIndexMap &IndexMap = Insertion.first->second; - if (!Insertion.second) - return IndexMap; - - // Mark this map as a type server map. - IndexMap.IsTypeServerMap = true; + // Check if we already loaded a PDB with this GUID. + auto I = PDBDependency::Instances.find(TSId); + if (I != PDBDependency::Instances.end()) + return I->second; // Check for a PDB at: // 1. The given file path // 2. Next to the object file or archive file - auto ExpectedSession = handleExpected( + auto ExpectedPDB = handleExpected( tryToLoadPDB(TSId, TSPath), [&]() { - StringRef LocalPath = - !File->ParentName.empty() ? File->ParentName : File->getName(); + StringRef LocalPath = !DependentFile->ParentName.empty() + ? DependentFile->ParentName + : DependentFile->getName(); SmallString<128> Path = sys::path::parent_path(LocalPath); // Currently, type server PDBs are only created by cl, which only runs // on Windows, so we can assume type server paths are Windows style. @@ -596,105 +692,171 @@ return Error(std::move(EC)); }); - if (auto E = ExpectedSession.takeError()) { - TypeServerIndexMappings.erase(TSId); - - // Flatten the error to a string, for later display, if the error occurs - // again on the same PDB. - std::string ErrMsg; - raw_string_ostream S(ErrMsg); - S << E; - MissingTypeServerPDBs.emplace(TSId, S.str()); - + if (auto E = ExpectedPDB.takeError()) { + PDBDependency::Instances.emplace(TSId, nullptr); return createFileError(TSPath, std::move(E)); } - pdb::NativeSession *Session = ExpectedSession->get(); - - // Keep a strong reference to this PDB, so that it's safe to hold pointers - // into the file. - LoadedPDBs.push_back(std::move(*ExpectedSession)); + PDBDependency *D = *ExpectedPDB; + PDBDependency::Instances.insert({TSId, D}); - auto ExpectedTpi = Session->getPDBFile().getPDBTpiStream(); + auto ExpectedTpi = D->Session->getPDBFile().getPDBTpiStream(); if (auto E = ExpectedTpi.takeError()) fatal("Type server does not have TPI stream: " + toString(std::move(E))); - auto ExpectedIpi = Session->getPDBFile().getPDBIpiStream(); + auto ExpectedIpi = D->Session->getPDBFile().getPDBIpiStream(); if (auto E = ExpectedIpi.takeError()) fatal("Type server does not have TPI stream: " + toString(std::move(E))); + return D; +} + +unsigned PDBLinker::computePDBHashes(PDBDependency *D) { if (Config->DebugGHashes) { + auto ExpectedTpi = D->Session->getPDBFile().getPDBTpiStream(); + auto ExpectedIpi = D->Session->getPDBFile().getPDBIpiStream(); // PDBs do not actually store global hashes, so when merging a type server // PDB we have to synthesize global hashes. To do this, we first synthesize // global hashes for the TPI stream, since it is independent, then we // synthesize hashes for the IPI stream, using the hashes for the TPI stream // as inputs. - auto TpiHashes = GloballyHashedType::hashTypes(ExpectedTpi->typeArray()); - auto IpiHashes = - GloballyHashedType::hashIds(ExpectedIpi->typeArray(), TpiHashes); + uint64_t LocalTypesRecordsBytes{}; + uint64_t LocalIDsRecordsBytes{}; + + D->TpiHashes = GloballyHashedType::hashTypes(ExpectedTpi->typeArray(), + LocalTypesRecordsBytes); + D->IpiHashes = GloballyHashedType::hashIds( + ExpectedIpi->typeArray(), D->TpiHashes, LocalIDsRecordsBytes); + + TotalTypesRecordsSize += LocalTypesRecordsBytes + LocalIDsRecordsBytes; + + return D->TpiHashes.size() + D->IpiHashes.size(); + } + return 0; +} - Optional EndPrecomp; +void PDBLinker::mergePDB(PDBDependency *D) { + auto ExpectedTpi = D->Session->getPDBFile().getPDBTpiStream(); + auto ExpectedIpi = D->Session->getPDBFile().getPDBIpiStream(); + if (Config->DebugGHashes) { + Optional PCHSignature; // Merge TPI first, because the IPI stream will reference type indices. - if (auto Err = mergeTypeRecords(GlobalTypeTable, IndexMap.TPIMap, - ExpectedTpi->typeArray(), TpiHashes, EndPrecomp)) + if (auto Err = mergeTypeRecords(GlobalTypeTable, D->IndexMap.TPIMap, + ExpectedTpi->typeArray(), D->TpiHashes, + PCHSignature)) fatal("codeview::mergeTypeRecords failed: " + toString(std::move(Err))); // Merge IPI. - if (auto Err = - mergeIdRecords(GlobalIDTable, IndexMap.TPIMap, IndexMap.IPIMap, - ExpectedIpi->typeArray(), IpiHashes)) + if (auto Err = mergeIdRecords(GlobalIDTable, D->IndexMap.TPIMap, + D->IndexMap.IPIMap, ExpectedIpi->typeArray(), + D->IpiHashes)) fatal("codeview::mergeIdRecords failed: " + toString(std::move(Err))); } else { // Merge TPI first, because the IPI stream will reference type indices. - if (auto Err = mergeTypeRecords(TypeTable, IndexMap.TPIMap, + if (auto Err = mergeTypeRecords(TypeTable, D->IndexMap.TPIMap, ExpectedTpi->typeArray())) fatal("codeview::mergeTypeRecords failed: " + toString(std::move(Err))); // Merge IPI. - if (auto Err = mergeIdRecords(IDTable, IndexMap.TPIMap, IndexMap.IPIMap, - ExpectedIpi->typeArray())) + if (auto Err = mergeIdRecords(IDTable, D->IndexMap.TPIMap, + D->IndexMap.IPIMap, ExpectedIpi->typeArray())) fatal("codeview::mergeIdRecords failed: " + toString(std::move(Err))); } +} - return IndexMap; +static bool equals_path(StringRef path1, StringRef path2) { +#if defined(_WIN32) + return path1.equals_lower(path2); +#else + return path1.equals(path2); +#endif } -Expected -PDBLinker::mergeInPrecompHeaderObj(ObjFile *File, const CVType &FirstType, - CVIndexMap *ObjectIndexMap) { +Expected> +findPCH(ObjFile *File, CVTypeArray &Stream) { + uint32_t Signature = File->PCHSignature.getValueOr(0); + if (!Signature) + fatal("No signature found for the PCH-dependent OBJ (" + File->getName() + + ")"); + PrecompRecord Precomp; - if (auto EC = TypeDeserializer::deserializeAs(const_cast(FirstType), - Precomp)) + if (auto EC = TypeDeserializer::deserializeAs( + const_cast(*Stream.begin()), Precomp)) fatal("error reading record: " + toString(std::move(EC))); - auto E = aquirePrecompObj(File, Precomp); - if (!E) - return E.takeError(); + if (File->PCHSignature.getValue() != Precomp.getSignature()) + fatal("Corrupted file, the signature does not match the precomp record (" + + File->getName() + ")"); + + // link.exe requires that the PCH OBJ must always be provided + // on the command-line, even if its path could be inferred from the precomp + // record. + auto PCH = PCHDependency::Instances.find(Precomp.getSignature()); + if (PCH == PCHDependency::Instances.end()) { + + // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP + // records, we assume the OBJ comes from MSVC. Thusly, the paths embedded in + // the OBJs are in the Windows format. + StringRef PrecompFileName = sys::path::filename( + Precomp.getPrecompFilePath(), sys::path::Style::windows); + + auto MismatchedPCH = find_if(PCHDependency::Instances, [&](auto KV) + { + StringRef PCHFilename = sys::path::filename( + KV.second->getName(), sys::path::Style::windows); + + // Compare based solely on the file name (link.exe behavior) + return equals_path(PCHFilename, PrecompFileName); + }); + + pdb::pdb_error_code EC = MismatchedPCH != PCHDependency::Instances.end() + ? pdb::pdb_error_code::signature_out_of_date + : pdb::pdb_error_code::external_cmdline_ref; + return createFileError(Precomp.getPrecompFilePath().str(), + make_error(EC)); + } + return std::pair{PCH->second, Precomp}; +} + +// This object was compiled with /Yu, the corresponding precompiled headers +// object (/Yc) has to be already loaded. Some type indices in the current +// object are referencing data in the precompiled headers object, so we need +// both to be in memory. +Error PDBLinker::mergeInPCH(ObjFile *File, CVTypeArray &Stream, + CVIndexMap &ObjectIndexMap) { + auto PCH = findPCH(File, Stream); + if (!PCH) + return PCH.takeError(); + + // Drop LF_PRECOMP record from the input stream, as it needs to be + // replaced with the PCH OBJ type stream below. + Stream.drop_front(); - const CVIndexMap &PrecompIndexMap = *E; - assert(PrecompIndexMap.IsPrecompiledTypeMap); + PCHDependency *D = PCH->first; + PrecompRecord &Precomp = PCH->second; - if (PrecompIndexMap.TPIMap.empty()) - return PrecompIndexMap; + const CVIndexMap &PrecompIndexMap = D->IndexMap; + assert(PrecompIndexMap.IsPrecompiledTypeMap); assert(Precomp.getStartTypeIndex() == TypeIndex::FirstNonSimpleIndex); assert(Precomp.getTypesCount() <= PrecompIndexMap.TPIMap.size()); + // Use the previously remapped index map from the precompiled headers. - ObjectIndexMap->TPIMap.append(PrecompIndexMap.TPIMap.begin(), - PrecompIndexMap.TPIMap.begin() + - Precomp.getTypesCount()); - return *ObjectIndexMap; + ObjectIndexMap.TPIMap.append(PrecompIndexMap.TPIMap.begin(), + PrecompIndexMap.TPIMap.begin() + + Precomp.getTypesCount()); + return Error::success(); } -static bool equals_path(StringRef path1, StringRef path2) { +/*static bool equals_path(StringRef path1, StringRef path2) { #if defined(_WIN32) return path1.equals_lower(path2); #else return path1.equals(path2); #endif -} +}*/ // Find by name an OBJ provided on the command line -static ObjFile *findObjByName(StringRef FileNameOnly) { +/*static ObjFile *findObjByName(StringRef FileNameOnly) { SmallString<128> CurrentPath; for (ObjFile *F : ObjFile::Instances) { @@ -705,28 +867,36 @@ return F; } return nullptr; -} +}*/ + +Expected PDBLinker::openPCH(ObjFile *File) { + uint32_t Signature = File->PCHSignature.getValueOr(0); + if (!Signature) + fatal("No signature found for the precompiled headers OBJ (" + + File->getName() + ")"); + + /*if (File->PCHSignature.getValue() != Precomp.getSignature()) + fatal("Corrupted file, the signature does not match the precomp record (" + + File->getName() + ")");*/ + + auto I = PCHDependency::Instances.find(Signature); + if (I != PCHDependency::Instances.end()) { + if (I->second->refObj()->getName() != File->getName()) + fatal("A precompiled headers OBJ with the same signature was already " + "provided! (" + + File->getName() + ")"); + return I->second; + } + + PCHDependency *D = make(File); + PCHDependency::Instances[Signature] = D; -std::pair -PDBLinker::registerPrecompiledHeaders(uint32_t Signature) { - auto Insertion = PrecompTypeIndexMappings.insert({Signature, CVIndexMap()}); - CVIndexMap &IndexMap = Insertion.first->second; - if (!Insertion.second) - return {IndexMap, true}; - // Mark this map as a precompiled types map. - IndexMap.IsPrecompiledTypeMap = true; - return {IndexMap, false}; -} - -Expected -PDBLinker::aquirePrecompObj(ObjFile *File, PrecompRecord Precomp) { - // First, check if we already loaded the precompiled headers object with this - // signature. Return the type index mapping if we've already seen it. - auto R = registerPrecompiledHeaders(Precomp.getSignature()); - if (R.second) - return R.first; + auto P = find(ObjFile::Instances, File); + ObjFile::Instances.erase(P); - CVIndexMap &IndexMap = R.first; + return D; + + /*CVIndexMap &IndexMap = R.first; // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, @@ -744,15 +914,52 @@ addObjFile(PrecompFile, &IndexMap); - if (!PrecompFile->PCHSignature) + if (!PrecompFile->EndPrecomp) fatal(PrecompFile->getName() + " is not a precompiled headers object"); - if (Precomp.getSignature() != PrecompFile->PCHSignature.getValueOr(0)) + if (Precomp.getSignature() != PrecompFile->EndPrecomp->getSignature()) return createFileError( Precomp.getPrecompFilePath().str(), make_error(pdb::pdb_error_code::signature_out_of_date)); - return IndexMap; + return IndexMap;*/ +} + +Expected PDBLinker::computeHash(InputFile *File) { + auto Stream = identifyTypeStream(File); + switch (Stream.Kind) { + case PDB: + return computePDBHashes(cast(File)); + break; + case PCH: { + return computeTypeHashes(cast(File)->refObj(), Stream.Types); + } break; + case UsingPCH: { + auto PCH = findPCH(cast(File), Stream.Types); + if (!PCH) + return PCH.takeError(); + auto PrecompStream = identifyTypeStream(PCH->first); + + // We need to merge the previously computed PCH's hashes, however only the + // ones in the range [0, Precomp.TypesCount], where Precomp is the first + // LF_PRECOMP record read from the current OBJ. + auto PrecompHashes = makeArrayRef(PCH->first->refObj()->OwnedHashes.data(), + PCH->second.getTypesCount()); + + // Don't take into account LF_PRECOMP, because it is replaced (virtually in + // this case) by the PCH's stream + Stream.Types.drop_front(); + + return computeTypeHashes(cast(File), Stream.Types, PrecompHashes); + } break; + case RegularOBJ: + return computeTypeHashes(cast(File), Stream.Types); + break; + case UsingPDB: + case NoTypeStream: + break; + } + return 0; } static bool remapTypeIndex(TypeIndex &TI, ArrayRef TypeIndexMap) { @@ -766,23 +973,30 @@ static void remapTypesInSymbolRecord(ObjFile *File, SymbolKind SymKind, MutableArrayRef RecordBytes, - const CVIndexMap &IndexMap, + const CVIndexMap *IndexMap, ArrayRef TypeRefs) { MutableArrayRef Contents = RecordBytes.drop_front(sizeof(RecordPrefix)); for (const TiReference &Ref : TypeRefs) { - unsigned ByteSize = Ref.Count * sizeof(TypeIndex); - if (Contents.size() < Ref.Offset + ByteSize) + unsigned ByteSize = Ref.count() * sizeof(TypeIndex); + if (Contents.size() < Ref.offset() + ByteSize) fatal("symbol record too short"); + if (!IndexMap) { + log("Cannot merge symbol record of kind 0x" + utohexstr(SymKind) + " in " + + File->getName() + "; No type stream found in the file."); + continue; + } + // This can be an item index or a type index. Choose the appropriate map. - ArrayRef TypeOrItemMap = IndexMap.TPIMap; - bool IsItemIndex = Ref.Kind == TiRefKind::IndexRef; - if (IsItemIndex && IndexMap.IsTypeServerMap) - TypeOrItemMap = IndexMap.IPIMap; + ArrayRef TypeOrItemMap = IndexMap->TPIMap; + bool IsItemIndex = Ref.kind() == TiRefKind::IndexRef; + if (IsItemIndex && IndexMap->IsTypeServerMap) + TypeOrItemMap = IndexMap->IPIMap; MutableArrayRef TIs( - reinterpret_cast(Contents.data() + Ref.Offset), Ref.Count); + reinterpret_cast(Contents.data() + Ref.offset()), + Ref.count()); for (TypeIndex &TI : TIs) { if (!remapTypeIndex(TI, TypeOrItemMap)) { log("ignoring symbol record of kind 0x" + utohexstr(SymKind) + " in " + @@ -856,10 +1070,10 @@ CVSymbol Sym(Kind, RecordData); discoverTypeIndicesInSymbol(Sym, Refs); assert(Refs.size() == 1); - assert(Refs.front().Count == 1); + assert(Refs.front().count() == 1); TypeIndex *TI = - reinterpret_cast(Content.data() + Refs[0].Offset); + reinterpret_cast(Content.data() + Refs[0].offset()); // `TI` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in // the IPI stream, whose `FunctionType` member refers to the TPI stream. // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and @@ -882,7 +1096,7 @@ /// The object file may not be aligned. static MutableArrayRef copyAndAlignSymbol(const CVSymbol &Sym, MutableArrayRef &AlignedMem) { - size_t Size = alignTo(Sym.length(), alignOf(CodeViewContainer::Pdb)); + size_t Size = alignTo(Sym.size(), alignOf(CodeViewContainer::Pdb)); assert(Size >= 4 && "record too short"); assert(Size <= MaxRecordLength && "record too long"); assert(AlignedMem.size() >= Size && "didn't preallocate enough"); @@ -1005,7 +1219,7 @@ } } -void PDBLinker::mergeSymbolRecords(ObjFile *File, const CVIndexMap &IndexMap, +void PDBLinker::mergeSymbolRecords(ObjFile *File, const CVIndexMap *IndexMap, std::vector &StringTableRefs, BinaryStreamRef SymData) { ArrayRef SymsBuffer; @@ -1093,9 +1307,11 @@ // adding the symbol to the module since we may need to get the next // symbol offset, and writing to the module's symbol stream will update // that offset. - if (symbolGoesInGlobalsStream(Sym, Scopes.empty())) + if (symbolGoesInGlobalsStream(Sym, Scopes.empty())) { addGlobalSymbol(Builder.getGsiBuilder(), File->ModuleDBI->getModuleIndex(), CurSymOffset, Sym); + GlobalSymbolsCount++; + } if (symbolGoesInModuleStream(Sym, Scopes.empty())) { // Add symbols to the module in bulk. If this symbol is contiguous @@ -1109,6 +1325,7 @@ BulkSymbols = RecordBytes; } CurSymOffset += Sym.length(); + ModulesSymbolsCount++; } return Error::success(); })); @@ -1264,18 +1481,52 @@ File.ModuleDBI->addDebugSubsection(std::move(NewChecksums)); } -void PDBLinker::addObjFile(ObjFile *File, CVIndexMap *ExternIndexMap) { - if (File->wasProcessedForPDB()) +struct ExitStep : ErrorInfo { + static char ID; +}; + +StringRef getFileNamePath(InputFile *File, SmallString<128> &Path) { + bool InArchive = !File->ParentName.empty(); + Path = InArchive ? File->ParentName : File->getName(); + pdbMakeAbsolute(Path); + StringRef Name = InArchive ? File->getName() : StringRef(Path); + return Name; +} + +/*void PDBLinker::mergeTypes(InputFile *File) { + // Before we can process symbol substreams from .debug$S, we need to process + // type information, file checksums, and the string table. Add type info to + // the PDB first, so that we can get the map from object file type and item + // indices to PDB type and item indices. + CVIndexMap ObjectIndexMap; + auto IndexMapResult = mergeDebugT(File, &ObjectIndexMap); + + if (IndexMapResult && !*IndexMapResult) return; + + // if (!IndexMapResult && IndexMapResult.errorIsA()) + // return; // probably a step that requests exiting + + // If the .debug$T sections fail to merge, assume there is no debug info. + if (!IndexMapResult) { + if (!Config->WarnDebugInfoUnusable) + return; + StringRef FileName = sys::path::filename(Path); + warn("Cannot use debug info for '" + FileName + "' [LNK4099]\n" + + ">>> failed to load reference " + + StringRef(toString(IndexMapResult.takeError()))); + return; + } +}*/ + +static void createModuleDescriptor(pdb::PDBFileBuilder &Builder, ObjFile *File) { + SmallString<128> Path; + StringRef Name = getFileNamePath(File, Path); + // Add a module descriptor for every object file. We need to put an absolute // path to the object into the PDB. If this is a plain object, we make its // path absolute. If it's an object in an archive, we make the archive path // absolute. - bool InArchive = !File->ParentName.empty(); - SmallString<128> Path = InArchive ? File->ParentName : File->getName(); - pdbMakeAbsolute(Path); - StringRef Name = InArchive ? File->getName() : StringRef(Path); - pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder(); File->ModuleDBI = &ExitOnErr(DbiBuilder.addModuleInfo(Name)); File->ModuleDBI->setObjFileName(Path); @@ -1290,27 +1541,14 @@ File->ModuleDBI->setFirstSectionContrib(SC); break; } +} - // Before we can process symbol substreams from .debug$S, we need to process - // type information, file checksums, and the string table. Add type info to - // the PDB first, so that we can get the map from object file type and item - // indices to PDB type and item indices. - CVIndexMap ObjectIndexMap; - auto IndexMapResult = - mergeDebugT(File, ExternIndexMap ? ExternIndexMap : &ObjectIndexMap); - - // If the .debug$T sections fail to merge, assume there is no debug info. - if (!IndexMapResult) { - auto FileName = sys::path::filename(Path); - warn("Cannot use debug info for '" + FileName + "'\n" + - ">>> failed to load reference " + - StringRef(toString(IndexMapResult.takeError()))); - return; - } +void PDBLinker::mergeSymbols(ObjFile *File, CVIndexMap *IndexMap) { ScopedTimer T(SymbolMergingTimer); - DebugSHandler DSH(*this, *File, *IndexMapResult); + pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder(); + DebugSHandler DSH(*this, *File, IndexMap); // Now do all live .debug$S and .debug$F sections. for (SectionChunk *DebugChunk : File->getDebugChunks()) { if (!DebugChunk->Live || DebugChunk->getSize() == 0) @@ -1359,12 +1597,119 @@ return Pub; } +static void warnUnusable(InputFile *F, Error E) { + if (!Config->WarnDebugInfoUnusable) + return; + StringRef FileName = sys::path::filename(F->getName()); + warn("Cannot use debug info for '" + FileName + "' [LNK4099]\n" + + ">>> failed to load reference " + StringRef(toString(std::move(E)))); +} + +template +void eraseElem(Container &C, ElemT E) { + erase_if(C, [&](ElemT E_) { return E_ == E; }); +} + +Error PDBLinker::mergeAllOBJ() { + + Summary::addLine(ObjFile::Instances.size(), "Input OBJ files (expanded from all cmd-line inputs)"); + + // Create module descriptors + for_each(ObjFile::Instances, + [&](ObjFile *Obj) { createModuleDescriptor(Builder, Obj); }); + + // Gather dependencies + for (ObjFile *Obj : std::vector(ObjFile::Instances)) { + if (auto E = openDependencies(Obj)) { + warnUnusable(Obj, std::move(E)); + eraseElem(ObjFile::Instances, Obj); + } + } + + Summary::addLine(PDBDependency::Instances.size(), "Dependent PDB files"); + Summary::addLine(PCHDependency::Instances.size(), "Dependent PCH OBJ files"); + + std::vector Depends; + for_each(PDBDependency::Instances, + [&](auto KV) { Depends.push_back(KV.second); }); + for_each(PCHDependency::Instances, + [&](auto KV) { Depends.push_back(KV.second); }); + + // Compute hashes + sys::SmartMutex InvalidFilesMutex; + std::vector InvalidFiles; + + auto ComputeH = [&](InputFile *F) { + if (!F) + return; + auto E = computeHash(F); + if (!E) { + warnUnusable(F, E.takeError()); + sys::SmartScopedLock Lock(InvalidFilesMutex); + InvalidFiles.push_back(F); + } else { + TotalTypesRecords += *E; + } + }; + + GlobalHashTimer.start(); + // Compute Type GHASH-es for all indirect PDB/PCH files + parallelForEach(Depends, ComputeH); + + // Compute Type GHASH-es for all remaining OBJ files + parallelForEach(ObjFile::Instances, ComputeH); + GlobalHashTimer.stop(); + + for (InputFile *F : InvalidFiles) { + eraseElem(ObjFile::Instances, F); + eraseElem(Depends, F); + } + + // Merge Types in PDB/PCH files + for (InputFile *File : Depends) { + if (!File) + continue; + CVIndexMap TIStorage; + auto R = mergeTypes(File, TIStorage); + if (!R) + return R.takeError(); + + if (isa(File)) + mergeSymbols(cast(File)->refObj(), *R); + } + + // Merge Types and Symbols in all remaining OBJ files + for (ObjFile *File : ObjFile::Instances) { + CVIndexMap TIStorage; + auto R = mergeTypes(File, TIStorage); + if (!R) + return R.takeError(); + + mergeSymbols(File, *R); + } + + // Statistics + Summary::addLine(TotalTypesRecords.load(), + "Input type records (across all OBJ and dependencies)"); + Summary::addLine(TotalTypesRecordsSize.load(), + "Input type records bytes (across all OBJ and dependencies)"); + Summary::addLine(GlobalTypeTable.size() + GlobalIDTable.size(), + "Output merged type records"); + Summary::addLine(GlobalSymbolsCount.load() + ModulesSymbolsCount.load(), + "Output merged symbol records"); + Summary::addLine(PDBStrTab.size(), "Output PDB strings"); + + return Error::success(); +} + // Add all object files to the PDB. Merge .debug$T sections into IpiData and // TpiData. void PDBLinker::addObjectsToPDB() { + ScopedTimer T1(AddObjectsTimer); - for (ObjFile *File : ObjFile::Instances) - addObjFile(File); + auto E = mergeAllOBJ(); + if (E) + fatal("Error merging OBJ: " + toString(std::move(E))); Builder.getStringTableBuilder().setStrings(PDBStrTab); T1.stop(); @@ -1388,7 +1733,7 @@ if (!Publics.empty()) { // Sort the public symbols and add them to the stream. - sort(parallel::par, Publics.begin(), Publics.end(), + parallelSort(Publics, [](const PublicSym32 &L, const PublicSym32 &R) { return L.Name < R.Name; }); Index: lld/trunk/Common/CMakeLists.txt =================================================================== --- lld/trunk/Common/CMakeLists.txt +++ lld/trunk/Common/CMakeLists.txt @@ -8,6 +8,7 @@ Memory.cpp Reproduce.cpp Strings.cpp + Summary.cpp TargetOptionsCommandFlags.cpp Threads.cpp Timer.cpp Index: lld/trunk/Common/Summary.cpp =================================================================== --- lld/trunk/Common/Summary.cpp +++ lld/trunk/Common/Summary.cpp @@ -0,0 +1,47 @@ +//===- Summary.cpp ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Summary.h" +#include "llvm/ADT/Any.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace lld; +using namespace llvm; + +static void printLine(Any Val, StringRef S) { + if (Val.isEqual(0)) + return; + SmallString<80> Str; + llvm::raw_svector_ostream Stream(Str); + Stream << formatv("{0} {1}", fmt_align(Val, AlignStyle::Right, 15), S.data()); + + message(Str); +} + +namespace lld { +namespace Summary { +static std::vector> Lines; +void addLine(Any Val, StringRef S) { Lines.push_back({Val, S.str()}); }; +void print(bool Banner) { + if (Banner) { + message(""); + message(formatv("{0}", fmt_align("Summary", AlignStyle::Center, 80))); + message(std::string(80, '-')); + } + for (auto& L : Lines) + { + printLine(L.first, L.second); + } +} +} // namespace Summary +} // namespace lld Index: lld/trunk/include/lld/Common/Summary.h =================================================================== --- lld/trunk/include/lld/Common/Summary.h +++ lld/trunk/include/lld/Common/Summary.h @@ -0,0 +1,22 @@ +//===- Summary.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COMMON_SUMMARY_H +#define LLD_COMMON_SUMMARY_H + +#include "llvm/ADT/StringRef.h" + +namespace lld { +namespace Summary { +void addLine(llvm::Any Value, llvm::StringRef S); +void print(bool Banner); +} +} // namespace lld + +#endif Index: lld/trunk/include/lld/Common/Threads.h =================================================================== --- lld/trunk/include/lld/Common/Threads.h +++ lld/trunk/include/lld/Common/Threads.h @@ -81,6 +81,13 @@ for_each_n(llvm::parallel::seq, Begin, End, Fn); } +template void parallelSort(R &&Range, FuncTy Fn) { + if (ThreadsEnabled) + sort(llvm::parallel::par, std::begin(Range), std::end(Range), Fn); + else + sort(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn); +} + } // namespace lld #endif Index: llvm/trunk/include/llvm/ADT/Any.h =================================================================== --- llvm/trunk/include/llvm/ADT/Any.h +++ llvm/trunk/include/llvm/ADT/Any.h @@ -31,6 +31,8 @@ virtual ~StorageBase() = default; virtual std::unique_ptr clone() const = 0; virtual const void *id() const = 0; + virtual size_t size() const = 0; + virtual const void *data() const = 0; }; template struct StorageImpl : public StorageBase { @@ -44,6 +46,9 @@ const void *id() const override { return &TypeId::Id; } + size_t size() const override { return sizeof(T); } + const void* data() const override { return &Value; } + T Value; private: @@ -74,7 +79,8 @@ // committee as a potential DR in `std::any` as well, but we're // going ahead and adopting it to work-around usage of `Any` with // types that need to be implicitly convertible from an `Any`. - llvm::negation::type>>, + llvm::negation< + std::is_convertible::type>>, std::is_copy_constructible::type>>::value, int>::type = 0> Any(T &&Value) { @@ -98,6 +104,14 @@ void reset() { Storage.reset(); } + template + bool isEqual(const T &Value) { + assert(hasValue()); + if (sizeof(T) != Storage->size()) + return false; + return memcmp(&Value, Storage->data(), sizeof(T)) == 0; + } + private: template friend T any_cast(const Any &Value); template friend T any_cast(Any &Value); @@ -111,7 +125,6 @@ template const char Any::TypeId::Id = 0; - template bool any_isa(const Any &Value) { if (!Value.Storage) return false; Index: llvm/trunk/include/llvm/ADT/Hashing.h =================================================================== --- llvm/trunk/include/llvm/ADT/Hashing.h +++ llvm/trunk/include/llvm/ADT/Hashing.h @@ -264,11 +264,20 @@ /// seed and the first 64-byte chunk. /// This effectively performs the initial mix. static hash_state create(const char *s, uint64_t seed) { - hash_state state = { - 0, seed, hash_16_bytes(seed, k1), rotate(seed ^ k1, 49), - seed * k1, shift_mix(seed), 0 }; + hash_state S = create(seed); + S.mix(s); + return S; + } + + static hash_state create(uint64_t seed) { + hash_state state = {0, + seed, + hash_16_bytes(seed, k1), + rotate(seed ^ k1, 49), + seed * k1, + shift_mix(seed), + 0}; state.h6 = hash_16_bytes(state.h4, state.h5); - state.mix(s); return state; } Index: llvm/trunk/include/llvm/DebugInfo/CodeView/CVRecord.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/CVRecord.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/CVRecord.h @@ -34,6 +34,7 @@ bool valid() const { return Type != static_cast(0); } uint32_t length() const { return RecordData.size(); } + uint32_t size() const { return RecordData.size(); } Kind kind() const { return Type; } ArrayRef data() const { return RecordData; } StringRef str_data() const { Index: llvm/trunk/include/llvm/DebugInfo/CodeView/GlobalTypeDenseMap.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/GlobalTypeDenseMap.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/GlobalTypeDenseMap.h @@ -0,0 +1,296 @@ +//===- llvm/ADT/GlobalTypeDenseMap.h - Dense probed hash table ------------*- +// C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the GlobalTypeDenseMap class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_GLOBALTYPEDENSEMAP_H +#define LLVM_DEBUGINFO_CODEVIEW_GLOBALTYPEDENSEMAP_H + +#include "llvm/DebugInfo/CodeView/TypeHashing.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/ReverseIteration.h" +#include "llvm/Support/type_traits.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace llvm { +namespace codeview { + +struct GloballyHashedInfo { + static unsigned getHash(uint64_t Key, unsigned BucketMask) { + return (Key >> 32) & BucketMask; + } + void packKeyValue(uint64_t Key, uint32_t Value, + unsigned BucketNo, unsigned BucketMask) { + KV = (Key & ~((uint64_t)BucketMask << 32)) | ((uint64_t)Value << 32); + //auto T = extractKeyValue(BucketNo, BucketMask); + //assert((T == std::pair(Key, Value))); + } + std::pair extractKeyValue(unsigned BucketNo, + unsigned BucketMask) const { + // SHA1 generates keys padded to 0 if the record is too short. The topmost + // 32-bits are reliable. + uint64_t K = + (KV & ~((uint64_t)BucketMask << 32)) | ((uint64_t)BucketNo << 32); + uint32_t V = (KV >> 32) & BucketMask; + return {K, V}; + } + bool isEmpty() const { return !KV; } + unsigned isEqualOrEmpty(const uint64_t Other, unsigned BucketNo, + unsigned BucketMask) const { + if (!KV) + return 2; // empty + auto Extracted = extractKeyValue(BucketNo, BucketMask); + if (Extracted.first == Other) + return 1; // equal + return 0; + } + uint64_t KV; +}; + +template +class GlobalTypeDenseMap { +public: + using size_type = unsigned; + using key_type = KeyT; + using mapped_type = ValueT; + using value_type = BucketT; + + LLVM_NODISCARD bool empty() const { return getNumEntries() == 0; } + unsigned size() const { return getNumEntries(); } + + uint64_t getBucketsMask() const { return getNumBuckets() - 1; } + + // Inserts key,value pair into the map if the key isn't already in the map. + // The value is constructed in-place if the key is not in the map, otherwise + // it is not moved. + template + std::pair try_emplace(const GloballyHashedType &Key, + const uint32_t &Value, + RehashFunc Rehash) { + uint64_t InsertKey = *(uint64_t*)const_cast(Key.Hash.data()); + + auto R = LookupBucketFor(InsertKey); + if (std::get<2>(R)) { + auto KV = + std::get<1>(R)->extractKeyValue(std::get<0>(R), getBucketsMask()); + return {KV.second, false /*already there*/}; + } + + // Otherwise, insert the new element. + InsertIntoBucket({std::get<0>(R), std::get<1>(R)}, InsertKey, Value, Rehash); + return {Value, true /*inserted*/}; + } + +protected: + + /// Returns the number of buckets to allocate to ensure that the + /// GlobalTypeDenseMap can accommodate \p NumEntries without need to grow(). + unsigned getMinBucketToReserveForEntries(unsigned NumEntries) { + // Ensure that "NumEntries * 4 < NumBuckets * 3" + if (NumEntries == 0) + return 0; + // +1 is required because of the strict equality. + // For example if NumEntries is 48, we need to return 401. + return NextPowerOf2(NumEntries * 4 / 3 + 1); + } + +private: + + void incrementNumEntries() { setNumEntries(getNumEntries() + 1); } + + void decrementNumEntries() { setNumEntries(getNumEntries() - 1); } + + BucketT *getBucketsEnd() { return getBuckets() + getNumBuckets(); } + + const BucketT *getBucketsEnd() const { + return getBuckets() + getNumBuckets(); + } + + template + void InsertIntoBucket(std::pair TheBucket, + const uint64_t &Key, const uint32_t &Value, + RehashFunc Rehash) { + // If the load of the hash table is more than 3/4, grow the table. + // + // The later case is tricky. For example, if we had one empty bucket with + // tons of tombstones, failing lookups (e.g. for insertion) would have to + // probe almost the entire table until it found the empty bucket. If the + // table completely filled with tombstones, no lookup would ever succeed, + // causing infinite loops in lookup. + unsigned NewNumEntries = getNumEntries() + 1; + unsigned NumBuckets = getNumBuckets(); + if (LLVM_UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)) { + NumEntries = 0; + grow(NumBuckets * 2, Rehash); + auto R = LookupBucketFor(Key); + assert(!std::get<2>(R)); + TheBucket = {std::get<0>(R), std::get<1>(R)}; + } + assert(std::get<1>(TheBucket)); + + // Only update the state after we've grown our bucket space appropriately + // so that when growing buckets we have self-consistent entry count. + incrementNumEntries(); + + std::get<1>(TheBucket)->packKeyValue(Key, Value, std::get<0>(TheBucket), + getBucketsMask()); + } + + /// LookupBucketFor - Lookup the appropriate bucket for Val, returning it in + /// FoundBucket. If the bucket contains the key and a value, this returns + /// true, otherwise it returns a bucket with an empty marker or tombstone and + /// returns false. + std::tuple + LookupBucketFor(const uint64_t &Key) { + if (getNumBuckets() == 0) + return {0, nullptr, false}; + + assert(Key && "Empty value shouldn't be inserted into map!"); + + BucketT *BucketsPtr = getBuckets(); + const unsigned BucketsMask = getBucketsMask(); + unsigned BucketNo = BucketT::getHash(Key, BucketsMask); + unsigned InitialBucketNo = BucketNo; + unsigned ProbeAmt = 1; + while (true) { + BucketT *ThisBucket = BucketsPtr + BucketNo; + + if (ProbeAmt > MaxProbes) + MaxProbes = ProbeAmt; + if (ProbeAmt <= MAX_PROBES) + Probes[ProbeAmt - 1]++; + + auto R = ThisBucket->isEqualOrEmpty(Key, InitialBucketNo, BucketsMask); + if (LLVM_LIKELY(R)) { + return {InitialBucketNo, ThisBucket, + R == 1 ? true /*bucket equals provided Key*/ + : false /*bucket empty*/}; + } + + // Otherwise, it's a hash collision, continue quadratic probing. + BucketNo += ProbeAmt++; + BucketNo &= BucketsMask; + } + } + +public: + /// Return the approximate size (in bytes) of the actual map. + /// This is just the raw memory used by GlobalTypeDenseMap. + /// If entries are pointers to objects, the size of the referenced objects + /// are not included. + size_t getMemorySize() const { return getNumBuckets() * sizeof(BucketT); } + +private: + + BucketT *Buckets = nullptr; + unsigned NumEntries = 0; + unsigned NumBuckets = 0; + llvm::sys::MemoryBlock MB; + unsigned MaxProbes = 0; + enum { MAX_PROBES = 256 }; + unsigned Probes[MAX_PROBES]{}; + +public: + GlobalTypeDenseMap() { init(0); } + + ~GlobalTypeDenseMap() { + deleteBuckets(MB); + Buckets = nullptr; + NumBuckets = 0; + NumEntries = 0; + } + + void init(unsigned InitNumEntries) { + auto InitBuckets = getMinBucketToReserveForEntries(InitNumEntries); + allocateBuckets(InitBuckets); + } + + template + void grow(unsigned AtLeast, RehashFunc Rehash) { + BucketT *OldBuckets = Buckets; + unsigned NewNumBuckets = std::max( + 64, static_cast(NextPowerOf2(AtLeast - 1))); + allocateBuckets(NewNumBuckets); + assert(Buckets); + if (OldBuckets) + Rehash(); + } + +private: + unsigned getNumEntries() const { return NumEntries; } + + void setNumEntries(unsigned Num) { NumEntries = Num; } + + BucketT *getBuckets() const { return Buckets; } + + unsigned getNumBuckets() const { return NumBuckets; } + + bool allocateBuckets(unsigned Num) { + if (Num == 0) + return false; + + size_t BlockSize = sizeof(BucketT) * Num; + + if (MB.size()) { + BucketT *MBEnd = (BucketT *)((uint8_t *)MB.base() + MB.size()); + if ((Buckets + NumBuckets + Num) <= MBEnd) { + Buckets += NumBuckets; + NumBuckets = Num; + return true; + } + if (BlockSize <= MB.size()) { + ::memset(MB.base(), 0, BlockSize); + Buckets = (BucketT *)MB.base(); + NumBuckets = Num; + return true; + } + } + + deleteBuckets(MB); + + const unsigned TwoMegs = 1 << 21; + const unsigned Nb2MegPages = (BlockSize + TwoMegs - 1) / TwoMegs; + + const unsigned Flags = + sys::Memory::MF_READ | sys::Memory::MF_WRITE | sys::Memory::MF_HUGE; + + std::error_code EC; + MB = llvm::sys::Memory::allocateMappedMemory(Nb2MegPages * TwoMegs, nullptr, + Flags, EC); + Buckets = static_cast(MB.base()); + NumBuckets = Num; + return true; + } + static void deleteBuckets(llvm::sys::MemoryBlock &MB) { + if (MB.size()) { + llvm::sys::Memory::releaseMappedMemory(MB); + } + } +}; +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_GLOBALTYPEDENSEMAP_H Index: llvm/trunk/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h @@ -14,6 +14,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/GlobalTypeDenseMap.h" #include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeHashing.h" @@ -24,11 +25,16 @@ #include #include +extern void startGlobalHashTimer(); +extern void stopGlobalHashTimer(); + namespace llvm { namespace codeview { class ContinuationRecordBuilder; +#define FAST_HASH + class GlobalTypeTableBuilder : public TypeCollection { /// Storage for records. These need to outlive the TypeTableBuilder. BumpPtrAllocator &RecordStorage; @@ -39,7 +45,11 @@ SimpleTypeSerializer SimpleSerializer; /// Hash table. - DenseMap HashedRecords; +#ifdef FAST_HASH + GlobalTypeDenseMap<> HashedRecords; +#else + DenseMap HashedRecords2; +#endif /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). SmallVector, 2> SeenRecords; @@ -61,7 +71,6 @@ uint32_t capacity() override; // public interface - void reset(); TypeIndex nextTypeIndex() const; BumpPtrAllocator &getAllocator() { return RecordStorage; } @@ -72,17 +81,28 @@ template TypeIndex insertRecordAs(GloballyHashedType Hash, size_t RecordSize, CreateFunc Create) { - auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex()); + // startGlobalHashTimer(); +#ifdef FAST_HASH + auto R = HashedRecords.try_emplace(Hash, SeenRecords.size(), + [&]() { rehashMap(); }); +#else + auto R = HashedRecords2.try_emplace(Hash, nextTypeIndex()); + // assert(R.first == R2.first->second.getIndex()); + // assert(R.second == R2.second); +#endif + // stopGlobalHashTimer(); - if (LLVM_UNLIKELY(Result.second)) { + if (LLVM_UNLIKELY(R.second)) { uint8_t *Stable = RecordStorage.Allocate(RecordSize); MutableArrayRef Data(Stable, RecordSize); SeenRecords.push_back(Create(Data)); SeenHashes.push_back(Hash); } - - // Update the caller's copy of Record to point a stable copy. - return Result.first->second; +#ifdef FAST_HASH + return TypeIndex::fromArrayIndex(R.first); +#else + return R.first->second; +#endif } TypeIndex insertRecordBytes(ArrayRef Data); @@ -92,6 +112,9 @@ ArrayRef Data = SimpleSerializer.serialize(Record); return insertRecordBytes(Data); } + +private: + void rehashMap(); }; } // end namespace codeview Index: llvm/trunk/include/llvm/DebugInfo/CodeView/RecordSerialization.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/RecordSerialization.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/RecordSerialization.h @@ -34,6 +34,8 @@ struct RecordPrefix { ulittle16_t RecordLen; // Record length, starting from &RecordKind. ulittle16_t RecordKind; // Record kind enum (SymRecordKind or TypeRecordKind) + + unsigned size() const { return RecordLen + sizeof(RecordKind); } }; /// Reinterpret a byte array as an array of characters. Does not interpret as Index: llvm/trunk/include/llvm/DebugInfo/CodeView/TypeHashing.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/TypeHashing.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -106,26 +106,46 @@ /// Given a sequence of combined type and ID records, compute global hashes /// for each of them, returning the results in a vector of hashed types. template - static std::vector hashTypes(Range &&Records) { + static std::vector + hashTypes(Range &&Records, uint64_t &TotalRecordsBytes, + ArrayRef PrevTypeHashes = {}) { std::vector Hashes; - for (const auto &R : Records) + Hashes.insert(Hashes.end(), PrevTypeHashes.begin(), PrevTypeHashes.end()); + for (const auto &R : Records) { Hashes.push_back(hashType(R, Hashes, Hashes)); - + TotalRecordsBytes += R.size(); + } return Hashes; } + template + static std::vector + hashTypes(Range &&Records, ArrayRef PrevTypeHashes = {}) { + uint64_t TotalRecordsBytes{}; + return hashTypes(Records, TotalRecordsBytes, PrevTypeHashes); + } + /// Given a sequence of combined type and ID records, compute global hashes /// for each of them, returning the results in a vector of hashed types. template static std::vector - hashIds(Range &&Records, ArrayRef TypeHashes) { + hashIds(Range &&Records, ArrayRef TypeHashes, + uint64_t &TotalRecordsBytes) { std::vector IdHashes; - for (const auto &R : Records) + for (const auto &R : Records) { IdHashes.push_back(hashType(R, TypeHashes, IdHashes)); - + TotalRecordsBytes += R.size(); + } return IdHashes; } + template + static std::vector + hashIds(Range &&Records, ArrayRef TypeHashes) { + uint64_t TotalRecordsBytes{}; + return hashIds(Records, TypeHashes, TotalRecordsBytes); + } + static std::vector hashTypeCollection(TypeCollection &Types) { std::vector Hashes; @@ -134,6 +154,14 @@ }); return Hashes; } + + enum HashType + { + CityHash, + SHA1, + MD5 + }; + static HashType H; }; #if defined(_MSC_VER) // is_trivially_copyable is not available in older versions of libc++, but it is Index: llvm/trunk/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h @@ -19,9 +19,16 @@ namespace codeview { enum class TiRefKind { TypeRef, IndexRef }; struct TiReference { - TiRefKind Kind; - uint32_t Offset; - uint32_t Count; + TiReference(TiRefKind K, unsigned O, unsigned C) { + // Offset cannot be more than llvm::codeview::MaxRecordLength + Data = ((unsigned)K & 1) | ((C & 0x7FFF) << 1) | ((O & 0xFFFF) << 16); + } + unsigned offset() const { return Data >> 16; } + unsigned count() const { return (Data >> 1) & 0x7FFF; } + TiRefKind kind() const { return (TiRefKind)(Data & 1); } + +private: + uint32_t Data; }; void discoverTypeIndices(ArrayRef RecordData, @@ -41,7 +48,7 @@ SmallVectorImpl &Refs); bool discoverTypeIndicesInSymbol(ArrayRef RecordData, SmallVectorImpl &Indices); -} -} +} // namespace codeview +} // namespace llvm #endif Index: llvm/trunk/include/llvm/Support/BinaryStreamArray.h =================================================================== --- llvm/trunk/include/llvm/Support/BinaryStreamArray.h +++ llvm/trunk/include/llvm/Support/BinaryStreamArray.h @@ -141,6 +141,12 @@ void drop_front() { Skew += begin()->length(); } + ArrayRef getBytes() const { + ArrayRef Buffer; + cantFail(Stream.readBytes(Skew, Stream.getLength() - Skew, Buffer)); + return Buffer; + } + private: BinaryStreamRef Stream; Extractor E; Index: llvm/trunk/include/llvm/Support/CityHash.h =================================================================== --- llvm/trunk/include/llvm/Support/CityHash.h +++ llvm/trunk/include/llvm/Support/CityHash.h @@ -0,0 +1,103 @@ +//==- SHA1.h - SHA1 implementation for LLVM --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_CITYHASH_H +#define LLVM_SUPPORT_CITYHASH_H + +#include "llvm/ADT/ArrayRef.h" + +namespace llvm { + +/// A class that wraps LLVM's version of the CityHash algorithm. +class CityHash { +public: + CityHash() { init(); } + + /// Reinitialize the internal state + void init(); + + /// Digest more data. + void update(ArrayRef Data); + + /// Digest more data. + void update(StringRef Str) { + update(ArrayRef((uint8_t *)const_cast(Str.data()), + Str.size())); + } + + StringRef final(); + + /// Computes the hash for a given bytes. + static std::array hash(ArrayRef Data); + +private: + void internalUpdateState(); + +private: + hashing::detail::hash_state S; + uint8_t Buffer[64]{}; + unsigned BufferFree = 64; + size_t TotalLength{}; + uint64_t Result{}; + bool Created = false; +}; + +std::array CityHash::hash(ArrayRef Data) { + hash_code H = hash_value(Data); + return *(std::array *)&H; +} + +void CityHash::init() {} + +void CityHash::update(ArrayRef Data) { + const uint8_t *Src = Data.data(); + size_t SrcCount = Data.size(); + TotalLength += SrcCount; + do { + if (BufferFree > 0) { + unsigned Copied = std::min(BufferFree, (unsigned)SrcCount); + memcpy(Buffer, Src, Copied); + BufferFree -= Copied; + SrcCount -= Copied; + Src += Copied; + } + if (BufferFree == 0) { + internalUpdateState(); + } + } while (SrcCount > 0); +} + +StringRef CityHash::final() { + if (TotalLength <= 64) { + Result = hashing::detail::hash_short((const char*)Buffer, TotalLength, + hashing::detail::get_execution_seed()); + } else { + internalUpdateState(); + Result = S.finalize(TotalLength); + } + return StringRef((const char *)&Result, 8); +} + +void CityHash::internalUpdateState() { + if (BufferFree == 64) + return; + if (!Created) { + Created = true; + S = hashing::detail::hash_state::create( + hashing::detail::get_execution_seed()); + } + if (BufferFree > 0) + memset(Buffer + 64 - BufferFree, 0, BufferFree); + + S.mix((const char *)Buffer); + BufferFree = 64; +} +} // namespace llvm + +#endif Index: llvm/trunk/include/llvm/Support/FormatProviders.h =================================================================== --- llvm/trunk/include/llvm/Support/FormatProviders.h +++ llvm/trunk/include/llvm/Support/FormatProviders.h @@ -15,6 +15,7 @@ #ifndef LLVM_SUPPORT_FORMATPROVIDERS_H #define LLVM_SUPPORT_FORMATPROVIDERS_H +#include "llvm/ADT/Any.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -418,6 +419,29 @@ } } }; -} + +/// Implementation of format_provider for llvm::Any. + +#define IMPLEMENT_ANY(T) \ + if (any_isa(V)) { \ + format_provider::format(llvm::any_cast(V), Stream, Style); \ + return; \ + } + +template <> struct format_provider { + static void format(const Any &V, llvm::raw_ostream &Stream, + StringRef Style) { + IMPLEMENT_ANY(uint32_t); + IMPLEMENT_ANY(int32_t); + IMPLEMENT_ANY(uint64_t); + IMPLEMENT_ANY(int64_t); + IMPLEMENT_ANY(float); + IMPLEMENT_ANY(double); + } +}; + +#undef IMPLEMENT_ANY + +} // namespace llvm #endif Index: llvm/trunk/include/llvm/Support/MD5.h =================================================================== --- llvm/trunk/include/llvm/Support/MD5.h +++ llvm/trunk/include/llvm/Support/MD5.h @@ -90,6 +90,11 @@ /// Finishes off the hash and puts the result in result. void final(MD5Result &Result); + StringRef final() { + final(Result); + return {StringRef((char *)Result.Bytes.data(), Result.Bytes.size())}; + } + /// Translates the bytes in \p Res to a hex string that is /// deposited into \p Str. The result will be of length 32. static void stringifyResult(MD5Result &Result, SmallString<32> &Str); @@ -99,6 +104,8 @@ private: const uint8_t *body(ArrayRef Data); + + MD5Result Result; }; inline bool operator==(const MD5::MD5Result &LHS, const MD5::MD5Result &RHS) { Index: llvm/trunk/include/llvm/Support/Memory.h =================================================================== --- llvm/trunk/include/llvm/Support/Memory.h +++ llvm/trunk/include/llvm/Support/Memory.h @@ -46,9 +46,12 @@ class Memory { public: enum ProtectionFlags { - MF_READ = 0x1000000, - MF_WRITE = 0x2000000, - MF_EXEC = 0x4000000 + MF_READ = 0x1000000, + MF_WRITE = 0x2000000, + MF_EXEC = 0x4000000, + MF_RWE_MASK = 0x7000000, + + MF_HUGE = 0x0000001 }; /// This method allocates a block of memory that is suitable for loading Index: llvm/trunk/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp +++ llvm/trunk/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp @@ -87,11 +87,6 @@ return SeenHashes; } -void GlobalTypeTableBuilder::reset() { - HashedRecords.clear(); - SeenRecords.clear(); -} - TypeIndex GlobalTypeTableBuilder::insertRecordBytes(ArrayRef Record) { GloballyHashedType GHT = GloballyHashedType::hashType(Record, SeenHashes, SeenHashes); @@ -112,3 +107,17 @@ TI = insertRecordBytes(C.RecordData); return TI; } + +void GlobalTypeTableBuilder::rehashMap() { +#ifdef FAST_HASH + static bool Reentrance = false; + assert(!Reentrance); + Reentrance = true; + for (size_t I = 0; I < SeenHashes.size(); ++I) { + auto R = HashedRecords.try_emplace(SeenHashes[I], I, []() {}); + (void)R; + assert(R.second /*inserted*/); + } + Reentrance = false; +#endif +} \ No newline at end of file Index: llvm/trunk/lib/DebugInfo/CodeView/TypeHashing.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/CodeView/TypeHashing.cpp +++ llvm/trunk/lib/DebugInfo/CodeView/TypeHashing.cpp @@ -10,6 +10,8 @@ #include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/Support/CityHash.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/SHA1.h" using namespace llvm; @@ -30,29 +32,70 @@ return {llvm::hash_value(RecordData), RecordData}; } +GloballyHashedType::HashType GloballyHashedType::H = GloballyHashedType::HashType::SHA1; + +/*namespace { +struct Hasher { + void update(ArrayRef Data) { + switch (GloballyHashedType::H) { + case GloballyHashedType::HashType::CityHash: + H1.update(Data); + break; + case GloballyHashedType::HashType::SHA1: + H2.update(Data); + break; + case GloballyHashedType::HashType::MD5: + H3.update(Data); + break; + } + } + StringRef final() { + switch (GloballyHashedType::H) { + case GloballyHashedType::HashType::CityHash: + return H1.final(); + case GloballyHashedType::HashType::SHA1: + return H2.final(); + case GloballyHashedType::HashType::MD5: + return H3.final(); + } + return{}; + } + CityHash H1; + SHA1 H2; + MD5 H3; +}; +} // namespace*/ + +static uint32_t MaxDiscover = 0; + +template void append(ArrayT &A, R &&Range) { + A.append(adl_begin(Range), adl_end(Range)); +} + GloballyHashedType GloballyHashedType::hashType(ArrayRef RecordData, ArrayRef PreviousTypes, ArrayRef PreviousIds) { - SmallVector Refs; + SmallVector Refs; discoverTypeIndices(RecordData, Refs); - SHA1 S; - S.init(); + if (Refs.size() > MaxDiscover) + MaxDiscover = Refs.size(); + SmallVector Data; uint32_t Off = 0; - S.update(RecordData.take_front(sizeof(RecordPrefix))); + append(Data, RecordData.take_front(sizeof(RecordPrefix))); RecordData = RecordData.drop_front(sizeof(RecordPrefix)); for (const auto &Ref : Refs) { // Hash any data that comes before this TiRef. - uint32_t PreLen = Ref.Offset - Off; + uint32_t PreLen = Ref.offset() - Off; ArrayRef PreData = RecordData.slice(Off, PreLen); - S.update(PreData); - auto Prev = (Ref.Kind == TiRefKind::IndexRef) ? PreviousIds : PreviousTypes; + append(Data, PreData); + auto Prev = (Ref.kind() == TiRefKind::IndexRef) ? PreviousIds : PreviousTypes; - auto RefData = RecordData.slice(Ref.Offset, Ref.Count * sizeof(TypeIndex)); + auto RefData = RecordData.slice(Ref.offset(), Ref.count() * sizeof(TypeIndex)); // For each type index referenced, add in the previously computed hash // value of that type. ArrayRef Indices( - reinterpret_cast(RefData.data()), Ref.Count); + reinterpret_cast(RefData.data()), Ref.count()); for (TypeIndex TI : Indices) { ArrayRef BytesToHash; if (TI.isSimple() || TI.isNoneType() || TI.toArrayIndex() >= Prev.size()) { @@ -61,15 +104,23 @@ } else { BytesToHash = Prev[TI.toArrayIndex()].Hash; } - S.update(BytesToHash); + append(Data, BytesToHash); } - Off = Ref.Offset + Ref.Count * sizeof(TypeIndex); + Off = Ref.offset() + Ref.count() * sizeof(TypeIndex); } // Don't forget to add in any trailing bytes. auto TrailingBytes = RecordData.drop_front(Off); - S.update(TrailingBytes); + append(Data, TrailingBytes); - return {S.final().take_back(8)}; + switch (GloballyHashedType::H) { + case GloballyHashedType::HashType::CityHash: + return makeArrayRef(CityHash::hash(Data).data(), 8); + case GloballyHashedType::HashType::SHA1: + return makeArrayRef(SHA1::hash(Data).data(), 8); + case GloballyHashedType::HashType::MD5: + return makeArrayRef(MD5::hash(Data).data(), 8); + } + return {}; } Index: llvm/trunk/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp +++ llvm/trunk/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -469,9 +469,9 @@ BinaryStreamReader Reader(RecordData, support::little); for (const auto &Ref : Refs) { - Reader.setOffset(Ref.Offset); + Reader.setOffset(Ref.offset()); FixedStreamArray Run; - cantFail(Reader.readArray(Run, Ref.Count)); + cantFail(Reader.readArray(Run, Ref.count())); Indices.append(Run.begin(), Run.end()); } } Index: llvm/trunk/lib/DebugInfo/CodeView/TypeStreamMerger.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ llvm/trunk/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -356,12 +356,8 @@ } Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) { - BinaryStreamRef Stream = Types.getUnderlyingStream(); - ArrayRef Buffer; - cantFail(Stream.readBytes(0, Stream.getLength(), Buffer)); - return forEachCodeViewRecord( - Buffer, [this](const CVType &T) { return remapType(T); }); + Types.getBytes(), [this](const CVType &T) { return remapType(T); }); } Error TypeStreamMerger::remapType(const CVType &Type) { @@ -401,7 +397,7 @@ ArrayRef TypeStreamMerger::remapIndices(const CVType &OriginalType, MutableArrayRef Storage) { - SmallVector Refs; + SmallVector Refs; discoverTypeIndices(OriginalType.RecordData, Refs); if (Refs.empty()) return OriginalType.RecordData; @@ -413,11 +409,11 @@ for (auto &Ref : Refs) { TypeIndex *DestTIs = - reinterpret_cast(DestContent + Ref.Offset); + reinterpret_cast(DestContent + Ref.offset()); - for (size_t I = 0; I < Ref.Count; ++I) { + for (size_t I = 0; I < Ref.count(); ++I) { TypeIndex &TI = DestTIs[I]; - bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI) + bool Success = (Ref.kind() == TiRefKind::IndexRef) ? remapItemIndex(TI) : remapTypeIndex(TI); if (LLVM_UNLIKELY(!Success)) return {}; Index: llvm/trunk/lib/Support/Windows/Memory.inc =================================================================== --- llvm/trunk/lib/Support/Windows/Memory.inc +++ llvm/trunk/lib/Support/Windows/Memory.inc @@ -23,7 +23,7 @@ namespace { DWORD getWindowsProtectionFlags(unsigned Flags) { - switch (Flags) { + switch (Flags & llvm::sys::Memory::MF_RWE_MASK) { // Contrary to what you might expect, the Windows page protection flags // are not a bitwise combination of RWX values case llvm::sys::Memory::MF_READ: @@ -57,6 +57,31 @@ return Info.dwAllocationGranularity; } +size_t getLargePageSize() { + HANDLE Token = 0; + size_t LargePageMin = GetLargePageMinimum(); + if (LargePageMin) + OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, + &Token); + if (Token) { + LUID Luid; + if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &Luid)) { + TOKEN_PRIVILEGES TP{}; + TP.PrivilegeCount = 1; + TP.Privileges[0].Luid = Luid; + TP.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + if (AdjustTokenPrivileges(Token, FALSE, &TP, 0, 0, 0)) { + DWORD E = GetLastError(); + if (E == ERROR_SUCCESS) { + return LargePageMin; + } + } + } + CloseHandle(Token); + } + return 0; +} + } // namespace namespace llvm { @@ -87,7 +112,13 @@ GranularityCached = Granularity; } - const size_t NumBlocks = (NumBytes+Granularity-1)/Granularity; + static size_t LargePageSize = getLargePageSize(); + unsigned HugePages = ((Flags & MF_HUGE) && LargePageSize) ? MEM_LARGE_PAGES : 0; + if (HugePages) { + Granularity = LargePageSize; + } + + size_t NumBlocks = (NumBytes+Granularity-1)/Granularity; uintptr_t Start = NearBlock ? reinterpret_cast(NearBlock->base()) + NearBlock->size() @@ -99,10 +130,10 @@ Start += Granularity - Start % Granularity; DWORD Protect = getWindowsProtectionFlags(Flags); - + void *PA = ::VirtualAlloc(reinterpret_cast(Start), NumBlocks*Granularity, - MEM_RESERVE | MEM_COMMIT, Protect); + MEM_RESERVE | MEM_COMMIT | HugePages, Protect); if (PA == NULL) { if (NearBlock) { // Try again without the NearBlock hint Index: llvm/trunk/tools/llvm-pdbutil/DumpOutputStyle.cpp =================================================================== --- llvm/trunk/tools/llvm-pdbutil/DumpOutputStyle.cpp +++ llvm/trunk/tools/llvm-pdbutil/DumpOutputStyle.cpp @@ -1551,7 +1551,6 @@ Pipeline.addCallbackToPipeline(Dumper); CVSymbolVisitor Visitor(Pipeline); - BinaryStreamRef SymStream = Records.getSymbolArray().getUnderlyingStream(); if (auto E = Visitor.visitSymbolStream(Records.getSymbolArray(), 0)) return E; return Error::success(); @@ -1696,7 +1695,6 @@ Pipeline.addCallbackToPipeline(Dumper); CVSymbolVisitor Visitor(Pipeline); - BinaryStreamRef SymStream = ExpectedSyms->getSymbolArray().getUnderlyingStream(); for (uint32_t PubSymOff : Table) { Index: llvm/trunk/unittests/DebugInfo/CodeView/TypeIndexDiscoveryTest.cpp =================================================================== --- llvm/trunk/unittests/DebugInfo/CodeView/TypeIndexDiscoveryTest.cpp +++ llvm/trunk/unittests/DebugInfo/CodeView/TypeIndexDiscoveryTest.cpp @@ -83,7 +83,7 @@ auto &R = Refs[RecordIndex]; uint32_t Count = 0; for (auto &Ref : R) { - Count += Ref.Count; + Count += Ref.count(); } return Count; } @@ -93,10 +93,10 @@ RecordData = RecordData.drop_front(sizeof(RecordPrefix)); auto &RefList = Refs[RecordIndex]; for (auto &Ref : RefList) { - uint32_t Offset = Ref.Offset; + uint32_t Offset = Ref.offset(); ArrayRef Loc = RecordData.drop_front(Offset); ArrayRef Indices( - reinterpret_cast(Loc.data()), Ref.Count); + reinterpret_cast(Loc.data()), Ref.count()); if (llvm::any_of(Indices, [TI](const TypeIndex &Other) { return Other == TI; })) return true;