diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 46959334e667..557bdd9c04b3 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -1,484 +1,1137 @@ //===- DebugTypes.cpp -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "DebugTypes.h" #include "Chunks.h" #include "Driver.h" #include "InputFiles.h" +#include "PDB.h" #include "TypeMerger.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Timer.h" +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" #include "llvm/DebugInfo/PDB/GenericError.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/TpiHashing.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" using namespace llvm; using namespace llvm::codeview; using namespace lld; using namespace lld::coff; namespace { class TypeServerIpiSource; // The TypeServerSource class represents a PDB type server, a file referenced by // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ // files, therefore there must be only once instance per OBJ lot. The file path // is discovered from the dependent OBJ's debug type stream. The // TypeServerSource object is then queued and loaded by the COFF Driver. The // debug type stream for such PDB files will be merged first in the final PDB, // before any dependent OBJ. class TypeServerSource : public TpiSource { public: explicit TypeServerSource(PDBInputFile *f) : TpiSource(PDB, nullptr), pdbInputFile(f) { if (f->loadErr && *f->loadErr) return; pdb::PDBFile &file = f->session->getPDBFile(); auto expectedInfo = file.getPDBInfoStream(); if (!expectedInfo) return; auto it = mappings.emplace(expectedInfo->getGuid(), this); assert(it.second); (void)it; } Error mergeDebugT(TypeMerger *m) override; + + void loadGHashes() override; + void remapTpiWithGHashes(GHashState *g) override; + bool isDependency() const override { return true; } PDBInputFile *pdbInputFile = nullptr; // TpiSource for IPI stream. TypeServerIpiSource *ipiSrc = nullptr; static std::map mappings; }; // Companion to TypeServerSource. Stores the index map for the IPI stream in the // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the // invariant of one type index space per source. class TypeServerIpiSource : public TpiSource { public: explicit TypeServerIpiSource() : TpiSource(PDBIpi, nullptr) {} friend class TypeServerSource; - // IPI merging is handled in TypeServerSource::mergeDebugT, since it depends - // directly on type merging. + // All of the TpiSource methods are no-ops. The parent TypeServerSource + // handles both TPI and IPI. Error mergeDebugT(TypeMerger *m) override { return Error::success(); } - + void loadGHashes() override {} + void remapTpiWithGHashes(GHashState *g) override {} bool isDependency() const override { return true; } }; // This class represents the debug type stream of an OBJ file that depends on a // PDB type server (see TypeServerSource). class UseTypeServerSource : public TpiSource { + Expected getTypeServerSource(); + public: UseTypeServerSource(ObjFile *f, TypeServer2Record ts) : TpiSource(UsingPDB, f), typeServerDependency(ts) {} Error mergeDebugT(TypeMerger *m) override; + // No need to load ghashes from /Zi objects. + void loadGHashes() override {} + void remapTpiWithGHashes(GHashState *g) override; + // Information about the PDB type server dependency, that needs to be loaded // in before merging this OBJ. TypeServer2Record typeServerDependency; }; // This class represents the debug type stream of a Microsoft precompiled // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output // PDB, before any other OBJs that depend on this. Note that only MSVC generate // such files, clang does not. class PrecompSource : public TpiSource { public: PrecompSource(ObjFile *f) : TpiSource(PCH, f) { if (!f->pchSignature || !*f->pchSignature) fatal(toString(f) + " claims to be a PCH object, but does not have a valid signature"); auto it = mappings.emplace(*f->pchSignature, this); if (!it.second) fatal("a PCH object with the same signature has already been provided (" + toString(it.first->second->file) + " and " + toString(file) + ")"); } + void loadGHashes() override; + bool isDependency() const override { return true; } static std::map mappings; }; // This class represents the debug type stream of an OBJ file that depends on a // Microsoft precompiled headers OBJ (see PrecompSource). class UsePrecompSource : public TpiSource { public: UsePrecompSource(ObjFile *f, PrecompRecord precomp) : TpiSource(UsingPCH, f), precompDependency(precomp) {} Error mergeDebugT(TypeMerger *m) override; + void loadGHashes() override; + void remapTpiWithGHashes(GHashState *g) override; + +private: + Error mergeInPrecompHeaderObj(); + +public: // Information about the Precomp OBJ dependency, that needs to be loaded in // before merging this OBJ. PrecompRecord precompDependency; }; } // namespace -static std::vector gc; +std::vector TpiSource::instances; +ArrayRef TpiSource::dependencySources; +ArrayRef TpiSource::objectSources; -TpiSource::TpiSource(TpiKind k, ObjFile *f) : kind(k), file(f) { - gc.push_back(this); +TpiSource::TpiSource(TpiKind k, ObjFile *f) + : kind(k), tpiSrcIdx(instances.size()), file(f) { + instances.push_back(this); } // Vtable key method. -TpiSource::~TpiSource() = default; +TpiSource::~TpiSource() { + // Silence any assertions about unchecked errors. + consumeError(std::move(typeMergingError)); +} + +void TpiSource::sortDependencies() { + // Order dependencies first, but preserve the existing order. + std::vector deps; + std::vector objs; + for (TpiSource *s : instances) + (s->isDependency() ? deps : objs).push_back(s); + uint32_t numDeps = deps.size(); + uint32_t numObjs = objs.size(); + instances = std::move(deps); + instances.insert(instances.end(), objs.begin(), objs.end()); + for (uint32_t i = 0, e = instances.size(); i < e; ++i) + instances[i]->tpiSrcIdx = i; + dependencySources = makeArrayRef(instances.data(), numDeps); + objectSources = makeArrayRef(instances.data() + numDeps, numObjs); +} TpiSource *lld::coff::makeTpiSource(ObjFile *file) { return make(TpiSource::Regular, file); } TpiSource *lld::coff::makeTypeServerSource(PDBInputFile *pdbInputFile) { // Type server sources come in pairs: the TPI stream, and the IPI stream. auto *tpiSource = make(pdbInputFile); if (pdbInputFile->session->getPDBFile().hasPDBIpiStream()) tpiSource->ipiSrc = make(); return tpiSource; } TpiSource *lld::coff::makeUseTypeServerSource(ObjFile *file, TypeServer2Record ts) { return make(file, ts); } TpiSource *lld::coff::makePrecompSource(ObjFile *file) { return make(file); } TpiSource *lld::coff::makeUsePrecompSource(ObjFile *file, PrecompRecord precomp) { return make(file, precomp); } -void TpiSource::forEachSource(llvm::function_ref fn) { - for_each(gc, fn); -} - std::map TypeServerSource::mappings; std::map PrecompSource::mappings; +bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const { + if (ti.isSimple()) + return true; + + // This can be an item index or a type index. Choose the appropriate map. + ArrayRef tpiOrIpiMap = + (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap; + if (ti.toArrayIndex() >= tpiOrIpiMap.size()) + return false; + ti = tpiOrIpiMap[ti.toArrayIndex()]; + return true; +} + +void TpiSource::remapRecord(MutableArrayRef rec, + ArrayRef typeRefs) { + MutableArrayRef contents = rec.drop_front(sizeof(RecordPrefix)); + for (const TiReference &ref : typeRefs) { + unsigned byteSize = ref.Count * sizeof(TypeIndex); + if (contents.size() < ref.Offset + byteSize) + fatal("symbol record too short"); + + MutableArrayRef indices( + reinterpret_cast(contents.data() + ref.Offset), ref.Count); + for (TypeIndex &ti : indices) { + if (!remapTypeIndex(ti, ref.Kind)) { + if (config->verbose) { + uint16_t kind = + reinterpret_cast(rec.data())->RecordKind; + StringRef fname = file ? file->getName() : ""; + log("failed to remap type index in record of kind 0x" + + utohexstr(kind) + " in " + fname + " with bad " + + (ref.Kind == TiRefKind::IndexRef ? "item" : "type") + + " index 0x" + utohexstr(ti.getIndex())); + } + ti = TypeIndex(SimpleTypeKind::NotTranslated); + continue; + } + } + } +} + +void TpiSource::remapTypesInTypeRecord(MutableArrayRef rec) { + // TODO: Handle errors similar to symbols. + SmallVector typeRefs; + discoverTypeIndices(CVType(rec), typeRefs); + remapRecord(rec, typeRefs); +} + +bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef rec) { + // Discover type index references in the record. Skip it if we don't + // know where they are. + SmallVector typeRefs; + if (!discoverTypeIndicesInSymbol(rec, typeRefs)) + return false; + remapRecord(rec, typeRefs); + return true; +} + // A COFF .debug$H section is currently a clang extension. This function checks // if a .debug$H section is in a format that we expect / understand, so that we // can ignore any sections which are coincidentally also named .debug$H but do // not contain a format we recognize. static bool canUseDebugH(ArrayRef debugH) { if (debugH.size() < sizeof(object::debug_h_header)) return false; auto *header = reinterpret_cast(debugH.data()); debugH = debugH.drop_front(sizeof(object::debug_h_header)); return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC && header->Version == 0 && header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1_8) && (debugH.size() % 8 == 0); } static Optional> getDebugH(ObjFile *file) { SectionChunk *sec = SectionChunk::findByName(file->getDebugChunks(), ".debug$H"); if (!sec) return llvm::None; ArrayRef contents = sec->getContents(); if (!canUseDebugH(contents)) return None; return contents; } static ArrayRef getHashesFromDebugH(ArrayRef debugH) { assert(canUseDebugH(debugH)); - debugH = debugH.drop_front(sizeof(object::debug_h_header)); uint32_t count = debugH.size() / sizeof(GloballyHashedType); return {reinterpret_cast(debugH.data()), count}; } // Merge .debug$T for a generic object file. Error TpiSource::mergeDebugT(TypeMerger *m) { + assert(!config->debugGHashes && + "use remapTpiWithGHashes when ghash is enabled"); + CVTypeArray types; BinaryStreamReader reader(file->debugTypes, support::little); cantFail(reader.readArray(types, reader.getLength())); - if (config->debugGHashes) { - ArrayRef hashes; - std::vector ownedHashes; - if (Optional> debugH = getDebugH(file)) - hashes = getHashesFromDebugH(*debugH); - else { - ownedHashes = GloballyHashedType::hashTypes(types); - hashes = ownedHashes; - } - - if (auto err = mergeTypeAndIdRecords(m->globalIDTable, m->globalTypeTable, - indexMapStorage, types, hashes, - file->pchSignature)) - fatal("codeview::mergeTypeAndIdRecords failed: " + - toString(std::move(err))); - } else { - if (auto err = - mergeTypeAndIdRecords(m->idTable, m->typeTable, indexMapStorage, - types, file->pchSignature)) - fatal("codeview::mergeTypeAndIdRecords failed: " + - toString(std::move(err))); - } + if (auto err = mergeTypeAndIdRecords( + m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature)) + fatal("codeview::mergeTypeAndIdRecords failed: " + + toString(std::move(err))); // In an object, there is only one mapping for both types and items. tpiMap = indexMapStorage; ipiMap = indexMapStorage; if (config->showSummary) { // Count how many times we saw each type record in our input. This // calculation requires a second pass over the type records to classify each // record as a type or index. This is slow, but this code executes when // collecting statistics. m->tpiCounts.resize(m->getTypeTable().size()); m->ipiCounts.resize(m->getIDTable().size()); uint32_t srcIdx = 0; for (CVType &ty : types) { TypeIndex dstIdx = tpiMap[srcIdx++]; // Type merging may fail, so a complex source type may become the simple // NotTranslated type, which cannot be used as an array index. if (dstIdx.isSimple()) continue; SmallVectorImpl &counts = isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts; ++counts[dstIdx.toArrayIndex()]; } } return Error::success(); } // Merge types from a type server PDB. Error TypeServerSource::mergeDebugT(TypeMerger *m) { + assert(!config->debugGHashes && + "use remapTpiWithGHashes when ghash is enabled"); + pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); Expected expectedTpi = pdbFile.getPDBTpiStream(); if (auto e = expectedTpi.takeError()) fatal("Type server does not have TPI stream: " + toString(std::move(e))); pdb::TpiStream *maybeIpi = nullptr; if (pdbFile.hasPDBIpiStream()) { Expected expectedIpi = pdbFile.getPDBIpiStream(); if (auto e = expectedIpi.takeError()) fatal("Error getting type server IPI stream: " + toString(std::move(e))); maybeIpi = &*expectedIpi; } - if (config->debugGHashes) { - // PDBs do not actually store global hashes, so when merging a type server - // PDB we have to synthesize global hashes. To do this, we first synthesize - // global hashes for the TPI stream, since it is independent, then we - // synthesize hashes for the IPI stream, using the hashes for the TPI stream - // as inputs. - auto tpiHashes = GloballyHashedType::hashTypes(expectedTpi->typeArray()); - Optional endPrecomp; - // Merge TPI first, because the IPI stream will reference type indices. - if (auto err = - mergeTypeRecords(m->globalTypeTable, indexMapStorage, - expectedTpi->typeArray(), tpiHashes, endPrecomp)) - fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); - tpiMap = indexMapStorage; - - // Merge IPI. - if (maybeIpi) { - auto ipiHashes = - GloballyHashedType::hashIds(maybeIpi->typeArray(), tpiHashes); - if (auto err = - mergeIdRecords(m->globalIDTable, tpiMap, ipiSrc->indexMapStorage, - maybeIpi->typeArray(), ipiHashes)) - fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); - ipiMap = ipiSrc->indexMapStorage; - } - } else { - // Merge TPI first, because the IPI stream will reference type indices. - if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, - expectedTpi->typeArray())) - fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); - tpiMap = indexMapStorage; - - // Merge IPI. - if (maybeIpi) { - if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, - maybeIpi->typeArray())) - fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); - ipiMap = ipiSrc->indexMapStorage; - } + // Merge TPI first, because the IPI stream will reference type indices. + if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, + expectedTpi->typeArray())) + fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); + tpiMap = indexMapStorage; + + // Merge IPI. + if (maybeIpi) { + if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, + maybeIpi->typeArray())) + fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); + ipiMap = ipiSrc->indexMapStorage; } if (config->showSummary) { // Count how many times we saw each type record in our input. If a // destination type index is present in the source to destination type index // map, that means we saw it once in the input. Add it to our histogram. m->tpiCounts.resize(m->getTypeTable().size()); m->ipiCounts.resize(m->getIDTable().size()); for (TypeIndex ti : tpiMap) if (!ti.isSimple()) ++m->tpiCounts[ti.toArrayIndex()]; for (TypeIndex ti : ipiMap) if (!ti.isSimple()) ++m->ipiCounts[ti.toArrayIndex()]; } return Error::success(); } -Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { +Expected UseTypeServerSource::getTypeServerSource() { const codeview::GUID &tsId = typeServerDependency.getGuid(); StringRef tsPath = typeServerDependency.getName(); TypeServerSource *tsSrc; auto it = TypeServerSource::mappings.find(tsId); if (it != TypeServerSource::mappings.end()) { tsSrc = it->second; } else { // The file failed to load, lookup by name PDBInputFile *pdb = PDBInputFile::findFromRecordPath(tsPath, file); if (!pdb) return createFileError(tsPath, errorCodeToError(std::error_code( ENOENT, std::generic_category()))); // If an error occurred during loading, throw it now if (pdb->loadErr && *pdb->loadErr) return createFileError(tsPath, std::move(*pdb->loadErr)); tsSrc = (TypeServerSource *)pdb->debugTypesObj; } + return tsSrc; +} - pdb::PDBFile &pdbSession = tsSrc->pdbInputFile->session->getPDBFile(); +Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { + Expected tsSrc = getTypeServerSource(); + if (!tsSrc) + return tsSrc.takeError(); + + pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile(); auto expectedInfo = pdbSession.getPDBInfoStream(); if (!expectedInfo) return expectedInfo.takeError(); // Just because a file with a matching name was found and it was an actual // PDB file doesn't mean it matches. For it to match the InfoStream's GUID // must match the GUID specified in the TypeServer2 record. if (expectedInfo->getGuid() != typeServerDependency.getGuid()) return createFileError( - tsPath, + typeServerDependency.getName(), make_error(pdb::pdb_error_code::signature_out_of_date)); // Reuse the type index map of the type server. - tpiMap = tsSrc->tpiMap; - ipiMap = tsSrc->ipiMap; + tpiMap = (*tsSrc)->tpiMap; + ipiMap = (*tsSrc)->ipiMap; return Error::success(); } static bool equalsPath(StringRef path1, StringRef path2) { #if defined(_WIN32) return path1.equals_lower(path2); #else return path1.equals(path2); #endif } // Find by name an OBJ provided on the command line static PrecompSource *findObjByName(StringRef fileNameOnly) { SmallString<128> currentPath; for (auto kv : PrecompSource::mappings) { StringRef currentFileName = sys::path::filename(kv.second->file->getName(), sys::path::Style::windows); // Compare based solely on the file name (link.exe behavior) if (equalsPath(currentFileName, fileNameOnly)) return kv.second; } return nullptr; } -static Expected findPrecompMap(ObjFile *file, - PrecompRecord &pr) { +static PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr) { // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, // the paths embedded in the OBJs are in the Windows format. SmallString<128> prFileName = sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows); - PrecompSource *precomp; auto it = PrecompSource::mappings.find(pr.getSignature()); if (it != PrecompSource::mappings.end()) { - precomp = it->second; - } else { - // Lookup by name - precomp = findObjByName(prFileName); + return it->second; } + // Lookup by name + return findObjByName(prFileName); +} + +static Expected findPrecompMap(ObjFile *file, + PrecompRecord &pr) { + PrecompSource *precomp = findPrecompSource(file, pr); if (!precomp) return createFileError( - prFileName, + pr.getPrecompFilePath(), make_error(pdb::pdb_error_code::no_matching_pch)); if (pr.getSignature() != file->pchSignature) return createFileError( toString(file), make_error(pdb::pdb_error_code::no_matching_pch)); if (pr.getSignature() != *precomp->file->pchSignature) return createFileError( toString(precomp->file), make_error(pdb::pdb_error_code::no_matching_pch)); return precomp; } /// Merges a precompiled headers TPI map into the current TPI map. The /// precompiled headers object will also be loaded and remapped in the /// process. -static Error -mergeInPrecompHeaderObj(ObjFile *file, - SmallVectorImpl &indexMapStorage, - PrecompRecord &precomp) { - auto e = findPrecompMap(file, precomp); +Error UsePrecompSource::mergeInPrecompHeaderObj() { + auto e = findPrecompMap(file, precompDependency); if (!e) return e.takeError(); PrecompSource *precompSrc = *e; if (precompSrc->tpiMap.empty()) return Error::success(); - assert(precomp.getStartTypeIndex() == TypeIndex::FirstNonSimpleIndex); - assert(precomp.getTypesCount() <= precompSrc->tpiMap.size()); + assert(precompDependency.getStartTypeIndex() == + TypeIndex::FirstNonSimpleIndex); + assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size()); // Use the previously remapped index map from the precompiled headers. indexMapStorage.append(precompSrc->tpiMap.begin(), - precompSrc->tpiMap.begin() + precomp.getTypesCount()); + precompSrc->tpiMap.begin() + + precompDependency.getTypesCount()); + + if (config->debugGHashes) + funcIdToType = precompSrc->funcIdToType; // FIXME: Save copy + return Error::success(); } Error UsePrecompSource::mergeDebugT(TypeMerger *m) { // This object was compiled with /Yu, so process the corresponding // precompiled headers object (/Yc) first. Some type indices in the current // object are referencing data in the precompiled headers object, so we need // both to be loaded. - if (Error e = - mergeInPrecompHeaderObj(file, indexMapStorage, precompDependency)) + if (Error e = mergeInPrecompHeaderObj()) return e; return TpiSource::mergeDebugT(m); } uint32_t TpiSource::countTypeServerPDBs() { return TypeServerSource::mappings.size(); } uint32_t TpiSource::countPrecompObjs() { return PrecompSource::mappings.size(); } void TpiSource::clear() { - gc.clear(); + // Clean up any owned ghash allocations. + clearGHashes(); + TpiSource::instances.clear(); TypeServerSource::mappings.clear(); PrecompSource::mappings.clear(); } + +//===----------------------------------------------------------------------===// +// Parellel GHash type merging implementation. +//===----------------------------------------------------------------------===// + +void TpiSource::loadGHashes() { + if (Optional> debugH = getDebugH(file)) { + ghashes = getHashesFromDebugH(*debugH); + ownedGHashes = false; + } else { + CVTypeArray types; + BinaryStreamReader reader(file->debugTypes, support::little); + cantFail(reader.readArray(types, reader.getLength())); + assignGHashesFromVector(GloballyHashedType::hashTypes(types)); + } + + fillIsItemIndexFromDebugT(); +} + +// Copies ghashes from a vector into an array. These are long lived, so it's +// worth the time to copy these into an appropriately sized vector to reduce +// memory usage. +void TpiSource::assignGHashesFromVector( + std::vector &&hashVec) { + GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()]; + memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType)); + ghashes = makeArrayRef(hashes, hashVec.size()); + ownedGHashes = true; +} + +// Faster way to iterate type records. forEachTypeChecked is faster than +// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops. +static void forEachTypeChecked(ArrayRef types, + function_ref fn) { + checkError( + forEachCodeViewRecord(types, [fn](const CVType &ty) -> Error { + fn(ty); + return Error::success(); + })); +} + +// Walk over file->debugTypes and fill in the isItemIndex bit vector. +// TODO: Store this information in .debug$H so that we don't have to recompute +// it. This is the main bottleneck slowing down parallel ghashing with one +// thread over single-threaded ghashing. +void TpiSource::fillIsItemIndexFromDebugT() { + uint32_t index = 0; + isItemIndex.resize(ghashes.size()); + forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { + if (isIdRecord(ty.kind())) + isItemIndex.set(index); + ++index; + }); +} + +void TpiSource::mergeTypeRecord(CVType ty) { + // Decide if the merged type goes into TPI or IPI. + bool isItem = isIdRecord(ty.kind()); + MergedInfo &merged = isItem ? mergedIpi : mergedTpi; + + // Copy the type into our mutable buffer. + assert(ty.length() <= codeview::MaxRecordLength); + size_t offset = merged.recs.size(); + size_t newSize = alignTo(ty.length(), 4); + merged.recs.resize(offset + newSize); + auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize); + memcpy(newRec.data(), ty.data().data(), newSize); + + // Fix up the record prefix and padding bytes if it required resizing. + if (newSize != ty.length()) { + reinterpret_cast(newRec.data())->RecordLen = newSize - 2; + for (size_t i = ty.length(); i < newSize; ++i) + newRec[i] = LF_PAD0 + (newSize - i); + } + + // Remap the type indices in the new record. + remapTypesInTypeRecord(newRec); + uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec))); + merged.recSizes.push_back(static_cast(newSize)); + merged.recHashes.push_back(pdbHash); +} + +void TpiSource::mergeUniqueTypeRecords(ArrayRef typeRecords, + TypeIndex beginIndex) { + // Re-sort the list of unique types by index. + if (kind == PDB) + assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end())); + else + llvm::sort(uniqueTypes); + + // Accumulate all the unique types into one buffer in mergedTypes. + uint32_t ghashIndex = 0; + auto nextUniqueIndex = uniqueTypes.begin(); + assert(mergedTpi.recs.empty()); + assert(mergedIpi.recs.empty()); + forEachTypeChecked(typeRecords, [&](const CVType &ty) { + if (nextUniqueIndex != uniqueTypes.end() && + *nextUniqueIndex == ghashIndex) { + mergeTypeRecord(ty); + ++nextUniqueIndex; + } + if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { + bool success = ty.length() >= 12; + TypeIndex srcFuncIdIndex = beginIndex + ghashIndex; + TypeIndex funcId = srcFuncIdIndex; + TypeIndex funcType; + if (success) { + funcType = *reinterpret_cast(&ty.data()[8]); + success &= remapTypeIndex(funcId, TiRefKind::IndexRef); + success &= remapTypeIndex(funcType, TiRefKind::TypeRef); + } + if (success) { + funcIdToType.insert({funcId, funcType}); + } else { + StringRef fname = file ? file->getName() : ""; + warn("corrupt LF_[M]FUNC_ID record 0x" + + utohexstr(srcFuncIdIndex.getIndex()) + " in " + fname); + } + } + ++ghashIndex; + }); + assert(nextUniqueIndex == uniqueTypes.end() && + "failed to merge all desired records"); + assert(uniqueTypes.size() == + mergedTpi.recSizes.size() + mergedIpi.recSizes.size() && + "missing desired record"); +} + +void TpiSource::remapTpiWithGHashes(GHashState *g) { + assert(config->debugGHashes && "ghashes must be enabled"); + fillMapFromGHashes(g, indexMapStorage); + tpiMap = indexMapStorage; + ipiMap = indexMapStorage; + mergeUniqueTypeRecords(file->debugTypes); + // TODO: Free all unneeded ghash resources now that we have a full index map. +} + +// PDBs do not actually store global hashes, so when merging a type server +// PDB we have to synthesize global hashes. To do this, we first synthesize +// global hashes for the TPI stream, since it is independent, then we +// synthesize hashes for the IPI stream, using the hashes for the TPI stream +// as inputs. +void TypeServerSource::loadGHashes() { + // Don't hash twice. + if (!ghashes.empty()) + return; + pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); + + // Hash TPI stream. + Expected expectedTpi = pdbFile.getPDBTpiStream(); + if (auto e = expectedTpi.takeError()) + fatal("Type server does not have TPI stream: " + toString(std::move(e))); + assignGHashesFromVector( + GloballyHashedType::hashTypes(expectedTpi->typeArray())); + isItemIndex.resize(ghashes.size()); + + // Hash IPI stream, which depends on TPI ghashes. + if (!pdbFile.hasPDBIpiStream()) + return; + Expected expectedIpi = pdbFile.getPDBIpiStream(); + if (auto e = expectedIpi.takeError()) + fatal("error retreiving IPI stream: " + toString(std::move(e))); + ipiSrc->assignGHashesFromVector( + GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes)); + + // The IPI stream isItemIndex bitvector should be all ones. + ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size()); + ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size()); +} + +// Flatten discontiguous PDB type arrays to bytes so that we can use +// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from +// type servers is faster than iterating all object files compiled with /Z7 with +// CVTypeArray, which has high overheads due to the virtual interface of +// BinaryStream::readBytes. +static ArrayRef typeArrayToBytes(const CVTypeArray &types) { + BinaryStreamRef stream = types.getUnderlyingStream(); + ArrayRef debugTypes; + checkError(stream.readBytes(0, stream.getLength(), debugTypes)); + return debugTypes; +} + +// Merge types from a type server PDB. +void TypeServerSource::remapTpiWithGHashes(GHashState *g) { + assert(config->debugGHashes && "ghashes must be enabled"); + + // IPI merging depends on TPI, so do TPI first, then do IPI. No need to + // propagate errors, those should've been handled during ghash loading. + pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); + pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream()); + fillMapFromGHashes(g, indexMapStorage); + tpiMap = indexMapStorage; + mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray())); + if (pdbFile.hasPDBIpiStream()) { + pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream()); + ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size()); + ipiSrc->fillMapFromGHashes(g, ipiSrc->indexMapStorage); + ipiMap = ipiSrc->indexMapStorage; + ipiSrc->tpiMap = tpiMap; + ipiSrc->ipiMap = ipiMap; + ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray())); + funcIdToType = ipiSrc->funcIdToType; // FIXME: Save copy + } +} + +void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { + // No remapping to do with /Zi objects. Simply use the index map from the type + // server. Errors should have been reported earlier. Symbols from this object + // will be ignored. + Expected maybeTsSrc = getTypeServerSource(); + if (!maybeTsSrc) { + typeMergingError = + joinErrors(std::move(typeMergingError), maybeTsSrc.takeError()); + return; + } + TypeServerSource *tsSrc = *maybeTsSrc; + tpiMap = tsSrc->tpiMap; + ipiMap = tsSrc->ipiMap; + funcIdToType = tsSrc->funcIdToType; // FIXME: Save copy +} + +void PrecompSource::loadGHashes() { + if (getDebugH(file)) { + warn("ignoring .debug$H section; pch with ghash is not implemented"); + } + + uint32_t ghashIdx = 0; + std::vector hashVec; + forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { + // Remember the index of the LF_ENDPRECOMP record so it can be excluded from + // the PDB. There must be an entry in the list of ghashes so that the type + // indexes of the following records in the /Yc PCH object line up. + if (ty.kind() == LF_ENDPRECOMP) + endPrecompGHashIdx = ghashIdx; + + hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); + isItemIndex.push_back(isIdRecord(ty.kind())); + ++ghashIdx; + }); + assignGHashesFromVector(std::move(hashVec)); +} + +void UsePrecompSource::loadGHashes() { + PrecompSource *pchSrc = findPrecompSource(file, precompDependency); + if (!pchSrc) + return; + + // To compute ghashes of a /Yu object file, we need to build on the the + // ghashes of the /Yc PCH object. After we are done hashing, discard the + // ghashes from the PCH source so we don't unnecessarily try to deduplicate + // them. + std::vector hashVec = + pchSrc->ghashes.take_front(precompDependency.getTypesCount()); + forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { + hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); + isItemIndex.push_back(isIdRecord(ty.kind())); + }); + hashVec.erase(hashVec.begin(), + hashVec.begin() + precompDependency.getTypesCount()); + assignGHashesFromVector(std::move(hashVec)); +} + +void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { + // This object was compiled with /Yu, so process the corresponding + // precompiled headers object (/Yc) first. Some type indices in the current + // object are referencing data in the precompiled headers object, so we need + // both to be loaded. + if (Error e = mergeInPrecompHeaderObj()) { + typeMergingError = joinErrors(std::move(typeMergingError), std::move(e)); + return; + } + + fillMapFromGHashes(g, indexMapStorage); + tpiMap = indexMapStorage; + ipiMap = indexMapStorage; + mergeUniqueTypeRecords(file->debugTypes, + TypeIndex(precompDependency.getStartTypeIndex() + + precompDependency.getTypesCount())); +} + +namespace { +/// A concurrent hash table for global type hashing. It is based on this paper: +/// Concurrent Hash Tables: Fast and General(?)! +/// https://dl.acm.org/doi/10.1145/3309206 +/// +/// This hash table is meant to be used in two phases: +/// 1. concurrent insertions +/// 2. concurrent reads +/// It does not support lookup, deletion, or rehashing. It uses linear probing. +/// +/// The paper describes storing a key-value pair in two machine words. +/// Generally, the values stored in this map are type indices, and we can use +/// those values to recover the ghash key from a side table. This allows us to +/// shrink the table entries further at the cost of some loads, and sidesteps +/// the need for a 128 bit atomic compare-and-swap operation. +/// +/// During insertion, a priority function is used to decide which insertion +/// should be preferred. This ensures that the output is deterministic. For +/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred. +/// +class GHashCell; +struct GHashTable { + GHashCell *table = nullptr; + uint32_t tableSize = 0; + + GHashTable() = default; + ~GHashTable(); + + /// Initialize the table with the given size. Because the table cannot be + /// resized, the initial size of the table must be large enough to contain all + /// inputs, or insertion may not be able to find an empty cell. + void init(uint32_t newTableSize); + + /// Insert the cell with the given ghash into the table. Return the insertion + /// position in the table. It is safe for the caller to store the insertion + /// position because the table cannot be resized. + uint32_t insert(GloballyHashedType ghash, GHashCell newCell); +}; + +/// A ghash table cell for deduplicating types from TpiSources. +class GHashCell { + uint64_t data = 0; + +public: + GHashCell() = default; + + // Construct data most to least significant so that sorting works well: + // - isItem + // - tpiSrcIdx + // - ghashIdx + // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a + // non-zero representation. + GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx) + : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) | + ghashIdx) { + assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure"); + assert(ghashIdx == getGHashIdx() && "round trip failure"); + } + + explicit GHashCell(uint64_t data) : data(data) {} + + // The empty cell is all zeros. + bool isEmpty() const { return data == 0ULL; } + + /// Extract the tpiSrcIdx. + uint32_t getTpiSrcIdx() const { + return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1; + } + + /// Extract the index into the ghash array of the TpiSource. + uint32_t getGHashIdx() const { return (uint32_t)data; } + + bool isItem() const { return data & (1ULL << 63U); } + + /// Get the ghash key for this cell. + GloballyHashedType getGHash() const { + return TpiSource::instances[getTpiSrcIdx()]->ghashes[getGHashIdx()]; + } + + /// The priority function for the cell. The data is stored such that lower + /// tpiSrcIdx and ghashIdx values are preferred, which means that type record + /// from earlier sources are more likely to prevail. + friend inline bool operator<(const GHashCell &l, const GHashCell &r) { + return l.data < r.data; + } +}; +} // namespace + +namespace lld { +namespace coff { +/// This type is just a wrapper around GHashTable with external linkage so it +/// can be used from a header. +struct GHashState { + GHashTable table; +}; +} // namespace coff +} // namespace lld + +GHashTable::~GHashTable() { delete[] table; } + +void GHashTable::init(uint32_t newTableSize) { + table = new GHashCell[newTableSize]; + memset(table, 0, newTableSize * sizeof(GHashCell)); + tableSize = newTableSize; +} + +uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) { + assert(!newCell.isEmpty() && "cannot insert empty cell value"); + + // FIXME: The low bytes of SHA1 have low entropy for short records, which + // type records are. Swap the byte order for better entropy. A better ghash + // won't need this. + uint32_t startIdx = + ByteSwap_64(*reinterpret_cast(&ghash)) % tableSize; + + // Do a linear probe starting at startIdx. + uint32_t idx = startIdx; + while (true) { + // Run a compare and swap loop. There are four cases: + // - cell is empty: CAS into place and return + // - cell has matching key, earlier priority: do nothing, return + // - cell has matching key, later priority: CAS into place and return + // - cell has non-matching key: hash collision, probe next cell + auto *cellPtr = reinterpret_cast *>(&table[idx]); + GHashCell oldCell(cellPtr->load()); + while (oldCell.isEmpty() || oldCell.getGHash() == ghash) { + // Check if there is an existing ghash entry with a higher priority + // (earlier ordering). If so, this is a duplicate, we are done. + if (!oldCell.isEmpty() && oldCell < newCell) + return idx; + // Either the cell is empty, or our value is higher priority. Try to + // compare and swap. If it succeeds, we are done. + if (cellPtr->compare_exchange_weak(oldCell, newCell)) + return idx; + // If the CAS failed, check this cell again. + } + + // Advance the probe. Wrap around to the beginning if we run off the end. + ++idx; + idx = idx == tableSize ? 0 : idx; + if (idx == startIdx) { + // If this becomes an issue, we could mark failure and rehash from the + // beginning with a bigger table. There is no difference between rehashing + // internally and starting over. + report_fatal_error("ghash table is full"); + } + } + llvm_unreachable("left infloop"); +} + +TypeMerger::TypeMerger(llvm::BumpPtrAllocator &alloc) + : typeTable(alloc), idTable(alloc) {} + +TypeMerger::~TypeMerger() = default; + +void TypeMerger::mergeTypesWithGHash() { + // Load ghashes. Do type servers and PCH objects first. + { + ScopedTimer t1(loadGHashTimer); + parallelForEach(TpiSource::dependencySources, + [&](TpiSource *source) { source->loadGHashes(); }); + parallelForEach(TpiSource::objectSources, + [&](TpiSource *source) { source->loadGHashes(); }); + } + + ScopedTimer t2(mergeGHashTimer); + GHashState ghashState; + + // Estimate the size of hash table needed to deduplicate ghashes. This *must* + // be larger than the number of unique types, or hash table insertion may not + // be able to find a vacant slot. Summing the input types guarantees this, but + // it is a gross overestimate. The table size could be reduced to save memory, + // but it would require implementing rehashing, and this table is generally + // small compared to total memory usage, at eight bytes per input type record, + // and most input type records are larger than eight bytes. + size_t tableSize = 0; + for (TpiSource *source : TpiSource::instances) + tableSize += source->ghashes.size(); + + // Cap the table size so that we can use 32-bit cell indices. Type indices are + // also 32-bit, so this is an inherent PDB file format limit anyway. + tableSize = std::min(size_t(INT32_MAX), tableSize); + ghashState.table.init(static_cast(tableSize)); + + // Insert ghashes in parallel. During concurrent insertion, we cannot observe + // the contents of the hash table cell, but we can remember the insertion + // position. Because the table does not rehash, the position will not change + // under insertion. After insertion is done, the value of the cell can be read + // to retreive the final PDB type index. + parallelForEachN(0, TpiSource::instances.size(), [&](size_t tpiSrcIdx) { + TpiSource *source = TpiSource::instances[tpiSrcIdx]; + source->indexMapStorage.resize(source->ghashes.size()); + for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) { + if (source->shouldOmitFromPdb(i)) { + source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated); + continue; + } + GloballyHashedType ghash = source->ghashes[i]; + bool isItem = source->isItemIndex.test(i); + uint32_t cellIdx = + ghashState.table.insert(ghash, GHashCell(isItem, tpiSrcIdx, i)); + + // Store the ghash cell index as a type index in indexMapStorage. Later + // we will replace it with the PDB type index. + source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx); + } + }); + + // Collect all non-empty cells and sort them. This will implicitly assign + // destination type indices, and partition the entries into type records and + // item records. It arranges types in this order: + // - type records + // - source 0, type 0... + // - source 1, type 1... + // - item records + // - source 0, type 1... + // - source 1, type 0... + std::vector entries; + for (const GHashCell &cell : + makeArrayRef(ghashState.table.table, tableSize)) { + if (!cell.isEmpty()) + entries.push_back(cell); + } + parallelSort(entries, std::less()); + log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n", + double(entries.size()) / tableSize, entries.size(), tableSize)); + + // Find out how many type and item indices there are. + auto mid = + std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0)); + assert((mid == entries.end() || mid->isItem()) && + (mid == entries.begin() || !std::prev(mid)->isItem()) && + "midpoint is not midpoint"); + uint32_t numTypes = std::distance(entries.begin(), mid); + uint32_t numItems = std::distance(mid, entries.end()); + log("Tpi record count: " + Twine(numTypes)); + log("Ipi record count: " + Twine(numItems)); + + // Make a list of the "unique" type records to merge for each tpi source. Type + // merging will skip indices not on this list. Store the destination PDB type + // index for these unique types in the tpiMap for each source. The entries for + // non-unique types will be filled in prior to type merging. + for (uint32_t i = 0, e = entries.size(); i < e; ++i) { + auto &cell = entries[i]; + uint32_t tpiSrcIdx = cell.getTpiSrcIdx(); + TpiSource *source = TpiSource::instances[tpiSrcIdx]; + source->uniqueTypes.push_back(cell.getGHashIdx()); + + // Update the ghash table to store the destination PDB type index in the + // table. + uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes; + uint32_t ghashCellIndex = + source->indexMapStorage[cell.getGHashIdx()].toArrayIndex(); + ghashState.table.table[ghashCellIndex] = + GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex); + } + + // In parallel, remap all types. + for_each(TpiSource::dependencySources, [&](TpiSource *source) { + source->remapTpiWithGHashes(&ghashState); + }); + parallelForEach(TpiSource::objectSources, [&](TpiSource *source) { + source->remapTpiWithGHashes(&ghashState); + }); + + TpiSource::clearGHashes(); +} + +/// Given the index into the ghash table for a particular type, return the type +/// index for that type in the output PDB. +static TypeIndex loadPdbTypeIndexFromCell(GHashState *g, + uint32_t ghashCellIdx) { + GHashCell cell = g->table.table[ghashCellIdx]; + return TypeIndex::fromArrayIndex(cell.getGHashIdx()); +} + +// Fill in a TPI or IPI index map using ghashes. For each source type, use its +// ghash to lookup its final type index in the PDB, and store that in the map. +void TpiSource::fillMapFromGHashes(GHashState *g, + SmallVectorImpl &mapToFill) { + for (size_t i = 0, e = ghashes.size(); i < e; ++i) { + TypeIndex fakeCellIndex = indexMapStorage[i]; + if (fakeCellIndex.isSimple()) + mapToFill[i] = fakeCellIndex; + else + mapToFill[i] = loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex()); + } +} + +void TpiSource::clearGHashes() { + for (TpiSource *src : TpiSource::instances) { + if (src->ownedGHashes) + delete[] src->ghashes.data(); + src->ghashes = {}; + src->isItemIndex.clear(); + src->uniqueTypes.clear(); + } +} diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h index f97c0f761744..17368244e589 100644 --- a/lld/COFF/DebugTypes.h +++ b/lld/COFF/DebugTypes.h @@ -1,92 +1,198 @@ //===- DebugTypes.h ---------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_DEBUGTYPES_H #define LLD_COFF_DEBUGTYPES_H #include "lld/Common/LLVM.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" namespace llvm { namespace codeview { -class PrecompRecord; -class TypeServer2Record; +struct GloballyHashedType; } // namespace codeview namespace pdb { class NativeSession; +class TpiStream; } } // namespace llvm namespace lld { namespace coff { +using llvm::codeview::GloballyHashedType; using llvm::codeview::TypeIndex; class ObjFile; class PDBInputFile; class TypeMerger; +struct GHashState; class TpiSource { public: - enum TpiKind { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB }; + enum TpiKind : uint8_t { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB }; TpiSource(TpiKind k, ObjFile *f); virtual ~TpiSource(); /// Produce a mapping from the type and item indices used in the object /// file to those in the destination PDB. /// /// If the object file uses a type server PDB (compiled with /Zi), merge TPI /// and IPI from the type server PDB and return a map for it. Each unique type /// server PDB is merged at most once, so this may return an existing index /// mapping. /// /// If the object does not use a type server PDB (compiled with /Z7), we merge /// all the type and item records from the .debug$S stream and fill in the /// caller-provided ObjectIndexMap. virtual Error mergeDebugT(TypeMerger *m); + /// Load global hashes, either by hashing types directly, or by loading them + /// from LLVM's .debug$H section. + virtual void loadGHashes(); + + /// Use global hashes to merge type information. + virtual void remapTpiWithGHashes(GHashState *g); + + // Remap a type index in place. + bool remapTypeIndex(TypeIndex &ti, llvm::codeview::TiRefKind refKind) const; + +protected: + void remapRecord(MutableArrayRef rec, + ArrayRef typeRefs); + + void mergeTypeRecord(llvm::codeview::CVType ty); + + // Merge the type records listed in uniqueTypes. beginIndex is the TypeIndex + // of the first record in this source, typically 0x1000. When PCHs are + // involved, it may start higher. + void mergeUniqueTypeRecords( + ArrayRef debugTypes, + TypeIndex beginIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex)); + + // Use the ghash table to construct a map from source type index to + // destination PDB type index. Usable for either TPI or IPI. + void fillMapFromGHashes(GHashState *m, + llvm::SmallVectorImpl &indexMap); + + // Copies ghashes from a vector into an array. These are long lived, so it's + // worth the time to copy these into an appropriately sized vector to reduce + // memory usage. + void assignGHashesFromVector(std::vector &&hashVec); + + // Walk over file->debugTypes and fill in the isItemIndex bit vector. + void fillIsItemIndexFromDebugT(); + +public: + bool remapTypesInSymbolRecord(MutableArrayRef rec); + + void remapTypesInTypeRecord(MutableArrayRef rec); + /// Is this a dependent file that needs to be processed first, before other /// OBJs? virtual bool isDependency() const { return false; } - static void forEachSource(llvm::function_ref fn); + /// Returns true if this type record should be omitted from the PDB, even if + /// it is unique. This prevents a record from being added to the input ghash + /// table. + bool shouldOmitFromPdb(uint32_t ghashIdx) { + return ghashIdx == endPrecompGHashIdx; + } + + /// All sources of type information in the program. + static std::vector instances; + + /// Dependency type sources, such as type servers or PCH object files. These + /// must be processed before objects that rely on them. Set by + /// TpiSources::sortDependencies. + static ArrayRef dependencySources; + + /// Object file sources. These must be processed after dependencySources. + static ArrayRef objectSources; + + /// Sorts the dependencies and reassigns TpiSource indices. + static void sortDependencies(); static uint32_t countTypeServerPDBs(); static uint32_t countPrecompObjs(); + /// Free heap allocated ghashes. + static void clearGHashes(); + /// Clear global data structures for TpiSources. static void clear(); const TpiKind kind; + bool ownedGHashes = true; + uint32_t tpiSrcIdx = 0; + +protected: + /// The ghash index (zero based, not 0x1000-based) of the LF_ENDPRECOMP record + /// in this object, if one exists. This is the all ones value otherwise. It is + /// recorded here so that it can be omitted from the final ghash table. + uint32_t endPrecompGHashIdx = ~0U; + +public: ObjFile *file; + /// An error encountered during type merging, if any. + Error typeMergingError = Error::success(); + // Storage for tpiMap or ipiMap, depending on the kind of source. llvm::SmallVector indexMapStorage; // Source type index to PDB type index mapping for type and item records. // These mappings will be the same for /Z7 objects, and distinct for /Zi // objects. llvm::ArrayRef tpiMap; llvm::ArrayRef ipiMap; + + /// Array of global type hashes, indexed by TypeIndex. May be calculated on + /// demand, or present in input object files. + llvm::ArrayRef ghashes; + + /// When ghashing is used, record the mapping from LF_[M]FUNC_ID to function + /// type index here. Both indices are PDB indices, not object type indexes. + llvm::DenseMap funcIdToType; + + /// Indicates if a type record is an item index or a type index. + llvm::BitVector isItemIndex; + + /// A list of all "unique" type indices which must be merged into the final + /// PDB. GHash type deduplication produces this list, and it should be + /// considerably smaller than the input. + std::vector uniqueTypes; + + struct MergedInfo { + std::vector recs; + std::vector recSizes; + std::vector recHashes; + }; + + MergedInfo mergedTpi; + MergedInfo mergedIpi; }; TpiSource *makeTpiSource(ObjFile *file); TpiSource *makeTypeServerSource(PDBInputFile *pdbInputFile); TpiSource *makeUseTypeServerSource(ObjFile *file, llvm::codeview::TypeServer2Record ts); TpiSource *makePrecompSource(ObjFile *file); TpiSource *makeUsePrecompSource(ObjFile *file, llvm::codeview::PrecompRecord ts); } // namespace coff } // namespace lld #endif diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index fb496a1c106f..56717de226c2 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1,2149 +1,2149 @@ //===- Driver.cpp ---------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Driver.h" #include "Config.h" #include "DebugTypes.h" #include "ICF.h" #include "InputFiles.h" #include "MarkLive.h" #include "MinGW.h" #include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" #include "lld/Common/Args.h" #include "lld/Common/Driver.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" #include "lld/Common/Timer.h" #include "lld/Common/Version.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/COFFModuleDefinition.h" #include "llvm/Object/WindowsMachineFlag.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ToolDrivers/llvm-lib/LibDriver.h" #include #include #include using namespace llvm; using namespace llvm::object; using namespace llvm::COFF; using llvm::sys::Process; namespace lld { namespace coff { static Timer inputFileTimer("Input File Reading", Timer::root()); Configuration *config; LinkerDriver *driver; bool link(ArrayRef args, bool canExitEarly, raw_ostream &stdoutOS, raw_ostream &stderrOS) { lld::stdoutOS = &stdoutOS; lld::stderrOS = &stderrOS; errorHandler().cleanupCallback = []() { + TpiSource::clear(); freeArena(); ObjFile::instances.clear(); PDBInputFile::instances.clear(); ImportFile::instances.clear(); BitcodeFile::instances.clear(); memset(MergeChunk::instances, 0, sizeof(MergeChunk::instances)); - TpiSource::clear(); OutputSection::clear(); }; errorHandler().logName = args::getFilenameWithoutExe(args[0]); errorHandler().errorLimitExceededMsg = "too many errors emitted, stopping now" " (use /errorlimit:0 to see all errors)"; errorHandler().exitEarly = canExitEarly; stderrOS.enable_colors(stderrOS.has_colors()); config = make(); symtab = make(); driver = make(); driver->link(args); // Call exit() if we can to avoid calling destructors. if (canExitEarly) exitLld(errorCount() ? 1 : 0); bool ret = errorCount() == 0; if (!canExitEarly) errorHandler().reset(); return ret; } // Parse options of the form "old;new". static std::pair getOldNewOptions(opt::InputArgList &args, unsigned id) { auto *arg = args.getLastArg(id); if (!arg) return {"", ""}; StringRef s = arg->getValue(); std::pair ret = s.split(';'); if (ret.second.empty()) error(arg->getSpelling() + " expects 'old;new' format, but got " + s); return ret; } // Drop directory components and replace extension with // ".exe", ".dll" or ".sys". static std::string getOutputPath(StringRef path) { StringRef ext = ".exe"; if (config->dll) ext = ".dll"; else if (config->driver) ext = ".sys"; return (sys::path::stem(path) + ext).str(); } // Returns true if S matches /crtend.?\.o$/. static bool isCrtend(StringRef s) { if (!s.endswith(".o")) return false; s = s.drop_back(2); if (s.endswith("crtend")) return true; return !s.empty() && s.drop_back().endswith("crtend"); } // ErrorOr is not default constructible, so it cannot be used as the type // parameter of a future. // FIXME: We could open the file in createFutureForFile and avoid needing to // return an error here, but for the moment that would cost us a file descriptor // (a limited resource on Windows) for the duration that the future is pending. using MBErrPair = std::pair, std::error_code>; // Create a std::future that opens and maps a file using the best strategy for // the host platform. static std::future createFutureForFile(std::string path) { #if _WIN32 // On Windows, file I/O is relatively slow so it is best to do this // asynchronously. auto strategy = std::launch::async; #else auto strategy = std::launch::deferred; #endif return std::async(strategy, [=]() { auto mbOrErr = MemoryBuffer::getFile(path, /*FileSize*/ -1, /*RequiresNullTerminator*/ false); if (!mbOrErr) return MBErrPair{nullptr, mbOrErr.getError()}; return MBErrPair{std::move(*mbOrErr), std::error_code()}; }); } // Symbol names are mangled by prepending "_" on x86. static StringRef mangle(StringRef sym) { assert(config->machine != IMAGE_FILE_MACHINE_UNKNOWN); if (config->machine == I386) return saver.save("_" + sym); return sym; } static bool findUnderscoreMangle(StringRef sym) { Symbol *s = symtab->findMangle(mangle(sym)); return s && !isa(s); } MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr mb) { MemoryBufferRef mbref = *mb; make>(std::move(mb)); // take ownership if (driver->tar) driver->tar->append(relativeToRoot(mbref.getBufferIdentifier()), mbref.getBuffer()); return mbref; } void LinkerDriver::addBuffer(std::unique_ptr mb, bool wholeArchive, bool lazy) { StringRef filename = mb->getBufferIdentifier(); MemoryBufferRef mbref = takeBuffer(std::move(mb)); filePaths.push_back(filename); // File type is detected by contents, not by file extension. switch (identify_magic(mbref.getBuffer())) { case file_magic::windows_resource: resources.push_back(mbref); break; case file_magic::archive: if (wholeArchive) { std::unique_ptr file = CHECK(Archive::create(mbref), filename + ": failed to parse archive"); Archive *archive = file.get(); make>(std::move(file)); // take ownership int memberIndex = 0; for (MemoryBufferRef m : getArchiveMembers(archive)) addArchiveBuffer(m, "", filename, memberIndex++); return; } symtab->addFile(make(mbref)); break; case file_magic::bitcode: if (lazy) symtab->addFile(make(mbref)); else symtab->addFile(make(mbref, "", 0)); break; case file_magic::coff_object: case file_magic::coff_import_library: if (lazy) symtab->addFile(make(mbref)); else symtab->addFile(make(mbref)); break; case file_magic::pdb: symtab->addFile(make(mbref)); break; case file_magic::coff_cl_gl_object: error(filename + ": is not a native COFF file. Recompile without /GL"); break; case file_magic::pecoff_executable: if (filename.endswith_lower(".dll")) { error(filename + ": bad file type. Did you specify a DLL instead of an " "import library?"); break; } LLVM_FALLTHROUGH; default: error(mbref.getBufferIdentifier() + ": unknown file type"); break; } } void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive, bool lazy) { auto future = std::make_shared>( createFutureForFile(std::string(path))); std::string pathStr = std::string(path); enqueueTask([=]() { auto mbOrErr = future->get(); if (mbOrErr.second) { std::string msg = "could not open '" + pathStr + "': " + mbOrErr.second.message(); // Check if the filename is a typo for an option flag. OptTable thinks // that all args that are not known options and that start with / are // filenames, but e.g. `/nodefaultlibs` is more likely a typo for // the option `/nodefaultlib` than a reference to a file in the root // directory. std::string nearest; if (optTable.findNearest(pathStr, nearest) > 1) error(msg); else error(msg + "; did you mean '" + nearest + "'"); } else driver->addBuffer(std::move(mbOrErr.first), wholeArchive, lazy); }); } void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName, StringRef parentName, uint64_t offsetInArchive) { file_magic magic = identify_magic(mb.getBuffer()); if (magic == file_magic::coff_import_library) { InputFile *imp = make(mb); imp->parentName = parentName; symtab->addFile(imp); return; } InputFile *obj; if (magic == file_magic::coff_object) { obj = make(mb); } else if (magic == file_magic::bitcode) { obj = make(mb, parentName, offsetInArchive); } else { error("unknown file type: " + mb.getBufferIdentifier()); return; } obj->parentName = parentName; symtab->addFile(obj); log("Loaded " + toString(obj) + " for " + symName); } void LinkerDriver::enqueueArchiveMember(const Archive::Child &c, const Archive::Symbol &sym, StringRef parentName) { auto reportBufferError = [=](Error &&e, StringRef childName) { fatal("could not get the buffer for the member defining symbol " + toCOFFString(sym) + ": " + parentName + "(" + childName + "): " + toString(std::move(e))); }; if (!c.getParent()->isThin()) { uint64_t offsetInArchive = c.getChildOffset(); Expected mbOrErr = c.getMemoryBufferRef(); if (!mbOrErr) reportBufferError(mbOrErr.takeError(), check(c.getFullName())); MemoryBufferRef mb = mbOrErr.get(); enqueueTask([=]() { driver->addArchiveBuffer(mb, toCOFFString(sym), parentName, offsetInArchive); }); return; } std::string childName = CHECK( c.getFullName(), "could not get the filename for the member defining symbol " + toCOFFString(sym)); auto future = std::make_shared>( createFutureForFile(childName)); enqueueTask([=]() { auto mbOrErr = future->get(); if (mbOrErr.second) reportBufferError(errorCodeToError(mbOrErr.second), childName); // Pass empty string as archive name so that the original filename is // used as the buffer identifier. driver->addArchiveBuffer(takeBuffer(std::move(mbOrErr.first)), toCOFFString(sym), "", /*OffsetInArchive=*/0); }); } static bool isDecorated(StringRef sym) { return sym.startswith("@") || sym.contains("@@") || sym.startswith("?") || (!config->mingw && sym.contains('@')); } // Parses .drectve section contents and returns a list of files // specified by /defaultlib. void LinkerDriver::parseDirectives(InputFile *file) { StringRef s = file->getDirectives(); if (s.empty()) return; log("Directives: " + toString(file) + ": " + s); ArgParser parser; // .drectve is always tokenized using Windows shell rules. // /EXPORT: option can appear too many times, processing in fastpath. ParsedDirectives directives = parser.parseDirectives(s); for (StringRef e : directives.exports) { // If a common header file contains dllexported function // declarations, many object files may end up with having the // same /EXPORT options. In order to save cost of parsing them, // we dedup them first. if (!directivesExports.insert(e).second) continue; Export exp = parseExport(e); if (config->machine == I386 && config->mingw) { if (!isDecorated(exp.name)) exp.name = saver.save("_" + exp.name); if (!exp.extName.empty() && !isDecorated(exp.extName)) exp.extName = saver.save("_" + exp.extName); } exp.directives = true; config->exports.push_back(exp); } // Handle /include: in bulk. for (StringRef inc : directives.includes) addUndefined(inc); for (auto *arg : directives.args) { switch (arg->getOption().getID()) { case OPT_aligncomm: parseAligncomm(arg->getValue()); break; case OPT_alternatename: parseAlternateName(arg->getValue()); break; case OPT_defaultlib: if (Optional path = findLib(arg->getValue())) enqueuePath(*path, false, false); break; case OPT_entry: config->entry = addUndefined(mangle(arg->getValue())); break; case OPT_failifmismatch: checkFailIfMismatch(arg->getValue(), file); break; case OPT_incl: addUndefined(arg->getValue()); break; case OPT_merge: parseMerge(arg->getValue()); break; case OPT_nodefaultlib: config->noDefaultLibs.insert(doFindLib(arg->getValue()).lower()); break; case OPT_section: parseSection(arg->getValue()); break; case OPT_subsystem: parseSubsystem(arg->getValue(), &config->subsystem, &config->majorOSVersion, &config->minorOSVersion); break; // Only add flags here that link.exe accepts in // `#pragma comment(linker, "/flag")`-generated sections. case OPT_editandcontinue: case OPT_guardsym: case OPT_throwingnew: break; default: error(arg->getSpelling() + " is not allowed in .drectve"); } } } // Find file from search paths. You can omit ".obj", this function takes // care of that. Note that the returned path is not guaranteed to exist. StringRef LinkerDriver::doFindFile(StringRef filename) { bool hasPathSep = (filename.find_first_of("/\\") != StringRef::npos); if (hasPathSep) return filename; bool hasExt = filename.contains('.'); for (StringRef dir : searchPaths) { SmallString<128> path = dir; sys::path::append(path, filename); if (sys::fs::exists(path.str())) return saver.save(path.str()); if (!hasExt) { path.append(".obj"); if (sys::fs::exists(path.str())) return saver.save(path.str()); } } return filename; } static Optional getUniqueID(StringRef path) { sys::fs::UniqueID ret; if (sys::fs::getUniqueID(path, ret)) return None; return ret; } // Resolves a file path. This never returns the same path // (in that case, it returns None). Optional LinkerDriver::findFile(StringRef filename) { StringRef path = doFindFile(filename); if (Optional id = getUniqueID(path)) { bool seen = !visitedFiles.insert(*id).second; if (seen) return None; } if (path.endswith_lower(".lib")) visitedLibs.insert(std::string(sys::path::filename(path))); return path; } // MinGW specific. If an embedded directive specified to link to // foo.lib, but it isn't found, try libfoo.a instead. StringRef LinkerDriver::doFindLibMinGW(StringRef filename) { if (filename.contains('/') || filename.contains('\\')) return filename; SmallString<128> s = filename; sys::path::replace_extension(s, ".a"); StringRef libName = saver.save("lib" + s.str()); return doFindFile(libName); } // Find library file from search path. StringRef LinkerDriver::doFindLib(StringRef filename) { // Add ".lib" to Filename if that has no file extension. bool hasExt = filename.contains('.'); if (!hasExt) filename = saver.save(filename + ".lib"); StringRef ret = doFindFile(filename); // For MinGW, if the find above didn't turn up anything, try // looking for a MinGW formatted library name. if (config->mingw && ret == filename) return doFindLibMinGW(filename); return ret; } // Resolves a library path. /nodefaultlib options are taken into // consideration. This never returns the same path (in that case, // it returns None). Optional LinkerDriver::findLib(StringRef filename) { if (config->noDefaultLibAll) return None; if (!visitedLibs.insert(filename.lower()).second) return None; StringRef path = doFindLib(filename); if (config->noDefaultLibs.count(path.lower())) return None; if (Optional id = getUniqueID(path)) if (!visitedFiles.insert(*id).second) return None; return path; } // Parses LIB environment which contains a list of search paths. void LinkerDriver::addLibSearchPaths() { Optional envOpt = Process::GetEnv("LIB"); if (!envOpt.hasValue()) return; StringRef env = saver.save(*envOpt); while (!env.empty()) { StringRef path; std::tie(path, env) = env.split(';'); searchPaths.push_back(path); } } Symbol *LinkerDriver::addUndefined(StringRef name) { Symbol *b = symtab->addUndefined(name); if (!b->isGCRoot) { b->isGCRoot = true; config->gcroot.push_back(b); } return b; } StringRef LinkerDriver::mangleMaybe(Symbol *s) { // If the plain symbol name has already been resolved, do nothing. Undefined *unmangled = dyn_cast(s); if (!unmangled) return ""; // Otherwise, see if a similar, mangled symbol exists in the symbol table. Symbol *mangled = symtab->findMangle(unmangled->getName()); if (!mangled) return ""; // If we find a similar mangled symbol, make this an alias to it and return // its name. log(unmangled->getName() + " aliased to " + mangled->getName()); unmangled->weakAlias = symtab->addUndefined(mangled->getName()); return mangled->getName(); } // Windows specific -- find default entry point name. // // There are four different entry point functions for Windows executables, // each of which corresponds to a user-defined "main" function. This function // infers an entry point from a user-defined "main" function. StringRef LinkerDriver::findDefaultEntry() { assert(config->subsystem != IMAGE_SUBSYSTEM_UNKNOWN && "must handle /subsystem before calling this"); if (config->mingw) return mangle(config->subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI ? "WinMainCRTStartup" : "mainCRTStartup"); if (config->subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) { if (findUnderscoreMangle("wWinMain")) { if (!findUnderscoreMangle("WinMain")) return mangle("wWinMainCRTStartup"); warn("found both wWinMain and WinMain; using latter"); } return mangle("WinMainCRTStartup"); } if (findUnderscoreMangle("wmain")) { if (!findUnderscoreMangle("main")) return mangle("wmainCRTStartup"); warn("found both wmain and main; using latter"); } return mangle("mainCRTStartup"); } WindowsSubsystem LinkerDriver::inferSubsystem() { if (config->dll) return IMAGE_SUBSYSTEM_WINDOWS_GUI; if (config->mingw) return IMAGE_SUBSYSTEM_WINDOWS_CUI; // Note that link.exe infers the subsystem from the presence of these // functions even if /entry: or /nodefaultlib are passed which causes them // to not be called. bool haveMain = findUnderscoreMangle("main"); bool haveWMain = findUnderscoreMangle("wmain"); bool haveWinMain = findUnderscoreMangle("WinMain"); bool haveWWinMain = findUnderscoreMangle("wWinMain"); if (haveMain || haveWMain) { if (haveWinMain || haveWWinMain) { warn(std::string("found ") + (haveMain ? "main" : "wmain") + " and " + (haveWinMain ? "WinMain" : "wWinMain") + "; defaulting to /subsystem:console"); } return IMAGE_SUBSYSTEM_WINDOWS_CUI; } if (haveWinMain || haveWWinMain) return IMAGE_SUBSYSTEM_WINDOWS_GUI; return IMAGE_SUBSYSTEM_UNKNOWN; } static uint64_t getDefaultImageBase() { if (config->is64()) return config->dll ? 0x180000000 : 0x140000000; return config->dll ? 0x10000000 : 0x400000; } static std::string createResponseFile(const opt::InputArgList &args, ArrayRef filePaths, ArrayRef searchPaths) { SmallString<0> data; raw_svector_ostream os(data); for (auto *arg : args) { switch (arg->getOption().getID()) { case OPT_linkrepro: case OPT_reproduce: case OPT_INPUT: case OPT_defaultlib: case OPT_libpath: case OPT_manifest: case OPT_manifest_colon: case OPT_manifestdependency: case OPT_manifestfile: case OPT_manifestinput: case OPT_manifestuac: break; case OPT_implib: case OPT_pdb: case OPT_pdbstripped: case OPT_out: os << arg->getSpelling() << sys::path::filename(arg->getValue()) << "\n"; break; default: os << toString(*arg) << "\n"; } } for (StringRef path : searchPaths) { std::string relPath = relativeToRoot(path); os << "/libpath:" << quote(relPath) << "\n"; } for (StringRef path : filePaths) os << quote(relativeToRoot(path)) << "\n"; return std::string(data.str()); } enum class DebugKind { Unknown, None, Full, FastLink, GHash, Dwarf, Symtab }; static DebugKind parseDebugKind(const opt::InputArgList &args) { auto *a = args.getLastArg(OPT_debug, OPT_debug_opt); if (!a) return DebugKind::None; if (a->getNumValues() == 0) return DebugKind::Full; DebugKind debug = StringSwitch(a->getValue()) .CaseLower("none", DebugKind::None) .CaseLower("full", DebugKind::Full) .CaseLower("fastlink", DebugKind::FastLink) // LLD extensions .CaseLower("ghash", DebugKind::GHash) .CaseLower("dwarf", DebugKind::Dwarf) .CaseLower("symtab", DebugKind::Symtab) .Default(DebugKind::Unknown); if (debug == DebugKind::FastLink) { warn("/debug:fastlink unsupported; using /debug:full"); return DebugKind::Full; } if (debug == DebugKind::Unknown) { error("/debug: unknown option: " + Twine(a->getValue())); return DebugKind::None; } return debug; } static unsigned parseDebugTypes(const opt::InputArgList &args) { unsigned debugTypes = static_cast(DebugType::None); if (auto *a = args.getLastArg(OPT_debugtype)) { SmallVector types; StringRef(a->getValue()) .split(types, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); for (StringRef type : types) { unsigned v = StringSwitch(type.lower()) .Case("cv", static_cast(DebugType::CV)) .Case("pdata", static_cast(DebugType::PData)) .Case("fixup", static_cast(DebugType::Fixup)) .Default(0); if (v == 0) { warn("/debugtype: unknown option '" + type + "'"); continue; } debugTypes |= v; } return debugTypes; } // Default debug types debugTypes = static_cast(DebugType::CV); if (args.hasArg(OPT_driver)) debugTypes |= static_cast(DebugType::PData); if (args.hasArg(OPT_profile)) debugTypes |= static_cast(DebugType::Fixup); return debugTypes; } static std::string getMapFile(const opt::InputArgList &args, opt::OptSpecifier os, opt::OptSpecifier osFile) { auto *arg = args.getLastArg(os, osFile); if (!arg) return ""; if (arg->getOption().getID() == osFile.getID()) return arg->getValue(); assert(arg->getOption().getID() == os.getID()); StringRef outFile = config->outputFile; return (outFile.substr(0, outFile.rfind('.')) + ".map").str(); } static std::string getImplibPath() { if (!config->implib.empty()) return std::string(config->implib); SmallString<128> out = StringRef(config->outputFile); sys::path::replace_extension(out, ".lib"); return std::string(out.str()); } // The import name is calculated as follows: // // | LIBRARY w/ ext | LIBRARY w/o ext | no LIBRARY // -----+----------------+---------------------+------------------ // LINK | {value} | {value}.{.dll/.exe} | {output name} // LIB | {value} | {value}.dll | {output name}.dll // static std::string getImportName(bool asLib) { SmallString<128> out; if (config->importName.empty()) { out.assign(sys::path::filename(config->outputFile)); if (asLib) sys::path::replace_extension(out, ".dll"); } else { out.assign(config->importName); if (!sys::path::has_extension(out)) sys::path::replace_extension(out, (config->dll || asLib) ? ".dll" : ".exe"); } return std::string(out.str()); } static void createImportLibrary(bool asLib) { std::vector exports; for (Export &e1 : config->exports) { COFFShortExport e2; e2.Name = std::string(e1.name); e2.SymbolName = std::string(e1.symbolName); e2.ExtName = std::string(e1.extName); e2.Ordinal = e1.ordinal; e2.Noname = e1.noname; e2.Data = e1.data; e2.Private = e1.isPrivate; e2.Constant = e1.constant; exports.push_back(e2); } auto handleError = [](Error &&e) { handleAllErrors(std::move(e), [](ErrorInfoBase &eib) { error(eib.message()); }); }; std::string libName = getImportName(asLib); std::string path = getImplibPath(); if (!config->incremental) { handleError(writeImportLibrary(libName, path, exports, config->machine, config->mingw)); return; } // If the import library already exists, replace it only if the contents // have changed. ErrorOr> oldBuf = MemoryBuffer::getFile( path, /*FileSize*/ -1, /*RequiresNullTerminator*/ false); if (!oldBuf) { handleError(writeImportLibrary(libName, path, exports, config->machine, config->mingw)); return; } SmallString<128> tmpName; if (std::error_code ec = sys::fs::createUniqueFile(path + ".tmp-%%%%%%%%.lib", tmpName)) fatal("cannot create temporary file for import library " + path + ": " + ec.message()); if (Error e = writeImportLibrary(libName, tmpName, exports, config->machine, config->mingw)) { handleError(std::move(e)); return; } std::unique_ptr newBuf = check(MemoryBuffer::getFile( tmpName, /*FileSize*/ -1, /*RequiresNullTerminator*/ false)); if ((*oldBuf)->getBuffer() != newBuf->getBuffer()) { oldBuf->reset(); handleError(errorCodeToError(sys::fs::rename(tmpName, path))); } else { sys::fs::remove(tmpName); } } static void parseModuleDefs(StringRef path) { std::unique_ptr mb = CHECK( MemoryBuffer::getFile(path, -1, false, true), "could not open " + path); COFFModuleDefinition m = check(parseCOFFModuleDefinition( mb->getMemBufferRef(), config->machine, config->mingw)); if (config->outputFile.empty()) config->outputFile = std::string(saver.save(m.OutputFile)); config->importName = std::string(saver.save(m.ImportName)); if (m.ImageBase) config->imageBase = m.ImageBase; if (m.StackReserve) config->stackReserve = m.StackReserve; if (m.StackCommit) config->stackCommit = m.StackCommit; if (m.HeapReserve) config->heapReserve = m.HeapReserve; if (m.HeapCommit) config->heapCommit = m.HeapCommit; if (m.MajorImageVersion) config->majorImageVersion = m.MajorImageVersion; if (m.MinorImageVersion) config->minorImageVersion = m.MinorImageVersion; if (m.MajorOSVersion) config->majorOSVersion = m.MajorOSVersion; if (m.MinorOSVersion) config->minorOSVersion = m.MinorOSVersion; for (COFFShortExport e1 : m.Exports) { Export e2; // In simple cases, only Name is set. Renamed exports are parsed // and set as "ExtName = Name". If Name has the form "OtherDll.Func", // it shouldn't be a normal exported function but a forward to another // DLL instead. This is supported by both MS and GNU linkers. if (!e1.ExtName.empty() && e1.ExtName != e1.Name && StringRef(e1.Name).contains('.')) { e2.name = saver.save(e1.ExtName); e2.forwardTo = saver.save(e1.Name); config->exports.push_back(e2); continue; } e2.name = saver.save(e1.Name); e2.extName = saver.save(e1.ExtName); e2.ordinal = e1.Ordinal; e2.noname = e1.Noname; e2.data = e1.Data; e2.isPrivate = e1.Private; e2.constant = e1.Constant; config->exports.push_back(e2); } } void LinkerDriver::enqueueTask(std::function task) { taskQueue.push_back(std::move(task)); } bool LinkerDriver::run() { ScopedTimer t(inputFileTimer); bool didWork = !taskQueue.empty(); while (!taskQueue.empty()) { taskQueue.front()(); taskQueue.pop_front(); } return didWork; } // Parse an /order file. If an option is given, the linker places // COMDAT sections in the same order as their names appear in the // given file. static void parseOrderFile(StringRef arg) { // For some reason, the MSVC linker requires a filename to be // preceded by "@". if (!arg.startswith("@")) { error("malformed /order option: '@' missing"); return; } // Get a list of all comdat sections for error checking. DenseSet set; for (Chunk *c : symtab->getChunks()) if (auto *sec = dyn_cast(c)) if (sec->sym) set.insert(sec->sym->getName()); // Open a file. StringRef path = arg.substr(1); std::unique_ptr mb = CHECK( MemoryBuffer::getFile(path, -1, false, true), "could not open " + path); // Parse a file. An order file contains one symbol per line. // All symbols that were not present in a given order file are // considered to have the lowest priority 0 and are placed at // end of an output section. for (StringRef arg : args::getLines(mb->getMemBufferRef())) { std::string s(arg); if (config->machine == I386 && !isDecorated(s)) s = "_" + s; if (set.count(s) == 0) { if (config->warnMissingOrderSymbol) warn("/order:" + arg + ": missing symbol: " + s + " [LNK4037]"); } else config->order[s] = INT_MIN + config->order.size(); } } static void parseCallGraphFile(StringRef path) { std::unique_ptr mb = CHECK( MemoryBuffer::getFile(path, -1, false, true), "could not open " + path); // Build a map from symbol name to section. DenseMap map; for (ObjFile *file : ObjFile::instances) for (Symbol *sym : file->getSymbols()) if (sym) map[sym->getName()] = sym; auto findSection = [&](StringRef name) -> SectionChunk * { Symbol *sym = map.lookup(name); if (!sym) { if (config->warnMissingOrderSymbol) warn(path + ": no such symbol: " + name); return nullptr; } if (DefinedCOFF *dr = dyn_cast_or_null(sym)) return dyn_cast_or_null(dr->getChunk()); return nullptr; }; for (StringRef line : args::getLines(*mb)) { SmallVector fields; line.split(fields, ' '); uint64_t count; if (fields.size() != 3 || !to_integer(fields[2], count)) { error(path + ": parse error"); return; } if (SectionChunk *from = findSection(fields[0])) if (SectionChunk *to = findSection(fields[1])) config->callGraphProfile[{from, to}] += count; } } static void readCallGraphsFromObjectFiles() { for (ObjFile *obj : ObjFile::instances) { if (obj->callgraphSec) { ArrayRef contents; cantFail( obj->getCOFFObj()->getSectionContents(obj->callgraphSec, contents)); BinaryStreamReader reader(contents, support::little); while (!reader.empty()) { uint32_t fromIndex, toIndex; uint64_t count; if (Error err = reader.readInteger(fromIndex)) fatal(toString(obj) + ": Expected 32-bit integer"); if (Error err = reader.readInteger(toIndex)) fatal(toString(obj) + ": Expected 32-bit integer"); if (Error err = reader.readInteger(count)) fatal(toString(obj) + ": Expected 64-bit integer"); auto *fromSym = dyn_cast_or_null(obj->getSymbol(fromIndex)); auto *toSym = dyn_cast_or_null(obj->getSymbol(toIndex)); if (!fromSym || !toSym) continue; auto *from = dyn_cast_or_null(fromSym->getChunk()); auto *to = dyn_cast_or_null(toSym->getChunk()); if (from && to) config->callGraphProfile[{from, to}] += count; } } } } static void markAddrsig(Symbol *s) { if (auto *d = dyn_cast_or_null(s)) if (SectionChunk *c = dyn_cast_or_null(d->getChunk())) c->keepUnique = true; } static void findKeepUniqueSections() { // Exported symbols could be address-significant in other executables or DSOs, // so we conservatively mark them as address-significant. for (Export &r : config->exports) markAddrsig(r.sym); // Visit the address-significance table in each object file and mark each // referenced symbol as address-significant. for (ObjFile *obj : ObjFile::instances) { ArrayRef syms = obj->getSymbols(); if (obj->addrsigSec) { ArrayRef contents; cantFail( obj->getCOFFObj()->getSectionContents(obj->addrsigSec, contents)); const uint8_t *cur = contents.begin(); while (cur != contents.end()) { unsigned size; const char *err; uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err); if (err) fatal(toString(obj) + ": could not decode addrsig section: " + err); if (symIndex >= syms.size()) fatal(toString(obj) + ": invalid symbol index in addrsig section"); markAddrsig(syms[symIndex]); cur += size; } } else { // If an object file does not have an address-significance table, // conservatively mark all of its symbols as address-significant. for (Symbol *s : syms) markAddrsig(s); } } } // link.exe replaces each %foo% in altPath with the contents of environment // variable foo, and adds the two magic env vars _PDB (expands to the basename // of pdb's output path) and _EXT (expands to the extension of the output // binary). // lld only supports %_PDB% and %_EXT% and warns on references to all other env // vars. static void parsePDBAltPath(StringRef altPath) { SmallString<128> buf; StringRef pdbBasename = sys::path::filename(config->pdbPath, sys::path::Style::windows); StringRef binaryExtension = sys::path::extension(config->outputFile, sys::path::Style::windows); if (!binaryExtension.empty()) binaryExtension = binaryExtension.substr(1); // %_EXT% does not include '.'. // Invariant: // +--------- cursor ('a...' might be the empty string). // | +----- firstMark // | | +- secondMark // v v v // a...%...%... size_t cursor = 0; while (cursor < altPath.size()) { size_t firstMark, secondMark; if ((firstMark = altPath.find('%', cursor)) == StringRef::npos || (secondMark = altPath.find('%', firstMark + 1)) == StringRef::npos) { // Didn't find another full fragment, treat rest of string as literal. buf.append(altPath.substr(cursor)); break; } // Found a full fragment. Append text in front of first %, and interpret // text between first and second % as variable name. buf.append(altPath.substr(cursor, firstMark - cursor)); StringRef var = altPath.substr(firstMark, secondMark - firstMark + 1); if (var.equals_lower("%_pdb%")) buf.append(pdbBasename); else if (var.equals_lower("%_ext%")) buf.append(binaryExtension); else { warn("only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " + var + " as literal"); buf.append(var); } cursor = secondMark + 1; } config->pdbAltPath = buf; } /// Convert resource files and potentially merge input resource object /// trees into one resource tree. /// Call after ObjFile::Instances is complete. void LinkerDriver::convertResources() { std::vector resourceObjFiles; for (ObjFile *f : ObjFile::instances) { if (f->isResourceObjFile()) resourceObjFiles.push_back(f); } if (!config->mingw && (resourceObjFiles.size() > 1 || (resourceObjFiles.size() == 1 && !resources.empty()))) { error((!resources.empty() ? "internal .obj file created from .res files" : toString(resourceObjFiles[1])) + ": more than one resource obj file not allowed, already got " + toString(resourceObjFiles.front())); return; } if (resources.empty() && resourceObjFiles.size() <= 1) { // No resources to convert, and max one resource object file in // the input. Keep that preconverted resource section as is. for (ObjFile *f : resourceObjFiles) f->includeResourceChunks(); return; } ObjFile *f = make(convertResToCOFF(resources, resourceObjFiles)); symtab->addFile(f); f->includeResourceChunks(); } // In MinGW, if no symbols are chosen to be exported, then all symbols are // automatically exported by default. This behavior can be forced by the // -export-all-symbols option, so that it happens even when exports are // explicitly specified. The automatic behavior can be disabled using the // -exclude-all-symbols option, so that lld-link behaves like link.exe rather // than MinGW in the case that nothing is explicitly exported. void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) { if (!config->dll) return; if (!args.hasArg(OPT_export_all_symbols)) { if (!config->exports.empty()) return; if (args.hasArg(OPT_exclude_all_symbols)) return; } AutoExporter exporter; for (auto *arg : args.filtered(OPT_wholearchive_file)) if (Optional path = doFindFile(arg->getValue())) exporter.addWholeArchive(*path); symtab->forEachSymbol([&](Symbol *s) { auto *def = dyn_cast(s); if (!exporter.shouldExport(def)) return; Export e; e.name = def->getName(); e.sym = def; if (Chunk *c = def->getChunk()) if (!(c->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE)) e.data = true; config->exports.push_back(e); }); } // lld has a feature to create a tar file containing all input files as well as // all command line options, so that other people can run lld again with exactly // the same inputs. This feature is accessible via /linkrepro and /reproduce. // // /linkrepro and /reproduce are very similar, but /linkrepro takes a directory // name while /reproduce takes a full path. We have /linkrepro for compatibility // with Microsoft link.exe. Optional getReproduceFile(const opt::InputArgList &args) { if (auto *arg = args.getLastArg(OPT_reproduce)) return std::string(arg->getValue()); if (auto *arg = args.getLastArg(OPT_linkrepro)) { SmallString<64> path = StringRef(arg->getValue()); sys::path::append(path, "repro.tar"); return std::string(path); } return None; } void LinkerDriver::link(ArrayRef argsArr) { ScopedTimer rootTimer(Timer::root()); // Needed for LTO. InitializeAllTargetInfos(); InitializeAllTargets(); InitializeAllTargetMCs(); InitializeAllAsmParsers(); InitializeAllAsmPrinters(); // If the first command line argument is "/lib", link.exe acts like lib.exe. // We call our own implementation of lib.exe that understands bitcode files. if (argsArr.size() > 1 && StringRef(argsArr[1]).equals_lower("/lib")) { if (llvm::libDriverMain(argsArr.slice(1)) != 0) fatal("lib failed"); return; } // Parse command line options. ArgParser parser; opt::InputArgList args = parser.parse(argsArr); // Parse and evaluate -mllvm options. std::vector v; v.push_back("lld-link (LLVM option parsing)"); for (auto *arg : args.filtered(OPT_mllvm)) v.push_back(arg->getValue()); cl::ResetAllOptionOccurrences(); cl::ParseCommandLineOptions(v.size(), v.data()); // Handle /errorlimit early, because error() depends on it. if (auto *arg = args.getLastArg(OPT_errorlimit)) { int n = 20; StringRef s = arg->getValue(); if (s.getAsInteger(10, n)) error(arg->getSpelling() + " number expected, but got " + s); errorHandler().errorLimit = n; } // Handle /help if (args.hasArg(OPT_help)) { printHelp(argsArr[0]); return; } // /threads: takes a positive integer and provides the default value for // /opt:lldltojobs=. if (auto *arg = args.getLastArg(OPT_threads)) { StringRef v(arg->getValue()); unsigned threads = 0; if (!llvm::to_integer(v, threads, 0) || threads == 0) error(arg->getSpelling() + ": expected a positive integer, but got '" + arg->getValue() + "'"); parallel::strategy = hardware_concurrency(threads); config->thinLTOJobs = v.str(); } if (args.hasArg(OPT_show_timing)) config->showTiming = true; config->showSummary = args.hasArg(OPT_summary); // Handle --version, which is an lld extension. This option is a bit odd // because it doesn't start with "/", but we deliberately chose "--" to // avoid conflict with /version and for compatibility with clang-cl. if (args.hasArg(OPT_dash_dash_version)) { lld::outs() << getLLDVersion() << "\n"; return; } // Handle /lldmingw early, since it can potentially affect how other // options are handled. config->mingw = args.hasArg(OPT_lldmingw); // Handle /linkrepro and /reproduce. if (Optional path = getReproduceFile(args)) { Expected> errOrWriter = TarWriter::create(*path, sys::path::stem(*path)); if (errOrWriter) { tar = std::move(*errOrWriter); } else { error("/linkrepro: failed to open " + *path + ": " + toString(errOrWriter.takeError())); } } if (!args.hasArg(OPT_INPUT, OPT_wholearchive_file)) { if (args.hasArg(OPT_deffile)) config->noEntry = true; else fatal("no input files"); } // Construct search path list. searchPaths.push_back(""); for (auto *arg : args.filtered(OPT_libpath)) searchPaths.push_back(arg->getValue()); if (!args.hasArg(OPT_lldignoreenv)) addLibSearchPaths(); // Handle /ignore for (auto *arg : args.filtered(OPT_ignore)) { SmallVector vec; StringRef(arg->getValue()).split(vec, ','); for (StringRef s : vec) { if (s == "4037") config->warnMissingOrderSymbol = false; else if (s == "4099") config->warnDebugInfoUnusable = false; else if (s == "4217") config->warnLocallyDefinedImported = false; else if (s == "longsections") config->warnLongSectionNames = false; // Other warning numbers are ignored. } } // Handle /out if (auto *arg = args.getLastArg(OPT_out)) config->outputFile = arg->getValue(); // Handle /verbose if (args.hasArg(OPT_verbose)) config->verbose = true; errorHandler().verbose = config->verbose; // Handle /force or /force:unresolved if (args.hasArg(OPT_force, OPT_force_unresolved)) config->forceUnresolved = true; // Handle /force or /force:multiple if (args.hasArg(OPT_force, OPT_force_multiple)) config->forceMultiple = true; // Handle /force or /force:multipleres if (args.hasArg(OPT_force, OPT_force_multipleres)) config->forceMultipleRes = true; // Handle /debug DebugKind debug = parseDebugKind(args); if (debug == DebugKind::Full || debug == DebugKind::Dwarf || debug == DebugKind::GHash) { config->debug = true; config->incremental = true; } // Handle /demangle config->demangle = args.hasFlag(OPT_demangle, OPT_demangle_no); // Handle /debugtype config->debugTypes = parseDebugTypes(args); // Handle /driver[:uponly|:wdm]. config->driverUponly = args.hasArg(OPT_driver_uponly) || args.hasArg(OPT_driver_uponly_wdm) || args.hasArg(OPT_driver_wdm_uponly); config->driverWdm = args.hasArg(OPT_driver_wdm) || args.hasArg(OPT_driver_uponly_wdm) || args.hasArg(OPT_driver_wdm_uponly); config->driver = config->driverUponly || config->driverWdm || args.hasArg(OPT_driver); // Handle /pdb bool shouldCreatePDB = (debug == DebugKind::Full || debug == DebugKind::GHash); if (shouldCreatePDB) { if (auto *arg = args.getLastArg(OPT_pdb)) config->pdbPath = arg->getValue(); if (auto *arg = args.getLastArg(OPT_pdbaltpath)) config->pdbAltPath = arg->getValue(); if (args.hasArg(OPT_natvis)) config->natvisFiles = args.getAllArgValues(OPT_natvis); if (args.hasArg(OPT_pdbstream)) { for (const StringRef value : args.getAllArgValues(OPT_pdbstream)) { const std::pair nameFile = value.split("="); const StringRef name = nameFile.first; const std::string file = nameFile.second.str(); config->namedStreams[name] = file; } } if (auto *arg = args.getLastArg(OPT_pdb_source_path)) config->pdbSourcePath = arg->getValue(); } // Handle /pdbstripped if (args.hasArg(OPT_pdbstripped)) warn("ignoring /pdbstripped flag, it is not yet supported"); // Handle /noentry if (args.hasArg(OPT_noentry)) { if (args.hasArg(OPT_dll)) config->noEntry = true; else error("/noentry must be specified with /dll"); } // Handle /dll if (args.hasArg(OPT_dll)) { config->dll = true; config->manifestID = 2; } // Handle /dynamicbase and /fixed. We can't use hasFlag for /dynamicbase // because we need to explicitly check whether that option or its inverse was // present in the argument list in order to handle /fixed. auto *dynamicBaseArg = args.getLastArg(OPT_dynamicbase, OPT_dynamicbase_no); if (dynamicBaseArg && dynamicBaseArg->getOption().getID() == OPT_dynamicbase_no) config->dynamicBase = false; // MSDN claims "/FIXED:NO is the default setting for a DLL, and /FIXED is the // default setting for any other project type.", but link.exe defaults to // /FIXED:NO for exe outputs as well. Match behavior, not docs. bool fixed = args.hasFlag(OPT_fixed, OPT_fixed_no, false); if (fixed) { if (dynamicBaseArg && dynamicBaseArg->getOption().getID() == OPT_dynamicbase) { error("/fixed must not be specified with /dynamicbase"); } else { config->relocatable = false; config->dynamicBase = false; } } // Handle /appcontainer config->appContainer = args.hasFlag(OPT_appcontainer, OPT_appcontainer_no, false); // Handle /machine if (auto *arg = args.getLastArg(OPT_machine)) { config->machine = getMachineType(arg->getValue()); if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) fatal(Twine("unknown /machine argument: ") + arg->getValue()); } // Handle /nodefaultlib: for (auto *arg : args.filtered(OPT_nodefaultlib)) config->noDefaultLibs.insert(doFindLib(arg->getValue()).lower()); // Handle /nodefaultlib if (args.hasArg(OPT_nodefaultlib_all)) config->noDefaultLibAll = true; // Handle /base if (auto *arg = args.getLastArg(OPT_base)) parseNumbers(arg->getValue(), &config->imageBase); // Handle /filealign if (auto *arg = args.getLastArg(OPT_filealign)) { parseNumbers(arg->getValue(), &config->fileAlign); if (!isPowerOf2_64(config->fileAlign)) error("/filealign: not a power of two: " + Twine(config->fileAlign)); } // Handle /stack if (auto *arg = args.getLastArg(OPT_stack)) parseNumbers(arg->getValue(), &config->stackReserve, &config->stackCommit); // Handle /guard:cf if (auto *arg = args.getLastArg(OPT_guard)) parseGuard(arg->getValue()); // Handle /heap if (auto *arg = args.getLastArg(OPT_heap)) parseNumbers(arg->getValue(), &config->heapReserve, &config->heapCommit); // Handle /version if (auto *arg = args.getLastArg(OPT_version)) parseVersion(arg->getValue(), &config->majorImageVersion, &config->minorImageVersion); // Handle /subsystem if (auto *arg = args.getLastArg(OPT_subsystem)) parseSubsystem(arg->getValue(), &config->subsystem, &config->majorOSVersion, &config->minorOSVersion); // Handle /timestamp if (llvm::opt::Arg *arg = args.getLastArg(OPT_timestamp, OPT_repro)) { if (arg->getOption().getID() == OPT_repro) { config->timestamp = 0; config->repro = true; } else { config->repro = false; StringRef value(arg->getValue()); if (value.getAsInteger(0, config->timestamp)) fatal(Twine("invalid timestamp: ") + value + ". Expected 32-bit integer"); } } else { config->repro = false; config->timestamp = time(nullptr); } // Handle /alternatename for (auto *arg : args.filtered(OPT_alternatename)) parseAlternateName(arg->getValue()); // Handle /include for (auto *arg : args.filtered(OPT_incl)) addUndefined(arg->getValue()); // Handle /implib if (auto *arg = args.getLastArg(OPT_implib)) config->implib = arg->getValue(); // Handle /opt. bool doGC = debug == DebugKind::None || args.hasArg(OPT_profile); unsigned icfLevel = args.hasArg(OPT_profile) ? 0 : 1; // 0: off, 1: limited, 2: on unsigned tailMerge = 1; for (auto *arg : args.filtered(OPT_opt)) { std::string str = StringRef(arg->getValue()).lower(); SmallVector vec; StringRef(str).split(vec, ','); for (StringRef s : vec) { if (s == "ref") { doGC = true; } else if (s == "noref") { doGC = false; } else if (s == "icf" || s.startswith("icf=")) { icfLevel = 2; } else if (s == "noicf") { icfLevel = 0; } else if (s == "lldtailmerge") { tailMerge = 2; } else if (s == "nolldtailmerge") { tailMerge = 0; } else if (s.startswith("lldlto=")) { StringRef optLevel = s.substr(7); if (optLevel.getAsInteger(10, config->ltoo) || config->ltoo > 3) error("/opt:lldlto: invalid optimization level: " + optLevel); } else if (s.startswith("lldltojobs=")) { StringRef jobs = s.substr(11); if (!get_threadpool_strategy(jobs)) error("/opt:lldltojobs: invalid job count: " + jobs); config->thinLTOJobs = jobs.str(); } else if (s.startswith("lldltopartitions=")) { StringRef n = s.substr(17); if (n.getAsInteger(10, config->ltoPartitions) || config->ltoPartitions == 0) error("/opt:lldltopartitions: invalid partition count: " + n); } else if (s != "lbr" && s != "nolbr") error("/opt: unknown option: " + s); } } // Limited ICF is enabled if GC is enabled and ICF was never mentioned // explicitly. // FIXME: LLD only implements "limited" ICF, i.e. it only merges identical // code. If the user passes /OPT:ICF explicitly, LLD should merge identical // comdat readonly data. if (icfLevel == 1 && !doGC) icfLevel = 0; config->doGC = doGC; config->doICF = icfLevel > 0; config->tailMerge = (tailMerge == 1 && config->doICF) || tailMerge == 2; // Handle /lldsavetemps if (args.hasArg(OPT_lldsavetemps)) config->saveTemps = true; // Handle /kill-at if (args.hasArg(OPT_kill_at)) config->killAt = true; // Handle /lldltocache if (auto *arg = args.getLastArg(OPT_lldltocache)) config->ltoCache = arg->getValue(); // Handle /lldsavecachepolicy if (auto *arg = args.getLastArg(OPT_lldltocachepolicy)) config->ltoCachePolicy = CHECK( parseCachePruningPolicy(arg->getValue()), Twine("/lldltocachepolicy: invalid cache policy: ") + arg->getValue()); // Handle /failifmismatch for (auto *arg : args.filtered(OPT_failifmismatch)) checkFailIfMismatch(arg->getValue(), nullptr); // Handle /merge for (auto *arg : args.filtered(OPT_merge)) parseMerge(arg->getValue()); // Add default section merging rules after user rules. User rules take // precedence, but we will emit a warning if there is a conflict. parseMerge(".idata=.rdata"); parseMerge(".didat=.rdata"); parseMerge(".edata=.rdata"); parseMerge(".xdata=.rdata"); parseMerge(".bss=.data"); if (config->mingw) { parseMerge(".ctors=.rdata"); parseMerge(".dtors=.rdata"); parseMerge(".CRT=.rdata"); } // Handle /section for (auto *arg : args.filtered(OPT_section)) parseSection(arg->getValue()); // Handle /align if (auto *arg = args.getLastArg(OPT_align)) { parseNumbers(arg->getValue(), &config->align); if (!isPowerOf2_64(config->align)) error("/align: not a power of two: " + StringRef(arg->getValue())); if (!args.hasArg(OPT_driver)) warn("/align specified without /driver; image may not run"); } // Handle /aligncomm for (auto *arg : args.filtered(OPT_aligncomm)) parseAligncomm(arg->getValue()); // Handle /manifestdependency. This enables /manifest unless /manifest:no is // also passed. if (auto *arg = args.getLastArg(OPT_manifestdependency)) { config->manifestDependency = arg->getValue(); config->manifest = Configuration::SideBySide; } // Handle /manifest and /manifest: if (auto *arg = args.getLastArg(OPT_manifest, OPT_manifest_colon)) { if (arg->getOption().getID() == OPT_manifest) config->manifest = Configuration::SideBySide; else parseManifest(arg->getValue()); } // Handle /manifestuac if (auto *arg = args.getLastArg(OPT_manifestuac)) parseManifestUAC(arg->getValue()); // Handle /manifestfile if (auto *arg = args.getLastArg(OPT_manifestfile)) config->manifestFile = arg->getValue(); // Handle /manifestinput for (auto *arg : args.filtered(OPT_manifestinput)) config->manifestInput.push_back(arg->getValue()); if (!config->manifestInput.empty() && config->manifest != Configuration::Embed) { fatal("/manifestinput: requires /manifest:embed"); } config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files); config->thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) || args.hasArg(OPT_thinlto_index_only_arg); config->thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_arg); config->thinLTOPrefixReplace = getOldNewOptions(args, OPT_thinlto_prefix_replace); config->thinLTOObjectSuffixReplace = getOldNewOptions(args, OPT_thinlto_object_suffix_replace); config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path); // Handle miscellaneous boolean flags. config->allowBind = args.hasFlag(OPT_allowbind, OPT_allowbind_no, true); config->allowIsolation = args.hasFlag(OPT_allowisolation, OPT_allowisolation_no, true); config->incremental = args.hasFlag(OPT_incremental, OPT_incremental_no, !config->doGC && !config->doICF && !args.hasArg(OPT_order) && !args.hasArg(OPT_profile)); config->integrityCheck = args.hasFlag(OPT_integritycheck, OPT_integritycheck_no, false); config->cetCompat = args.hasFlag(OPT_cetcompat, OPT_cetcompat_no, false); config->nxCompat = args.hasFlag(OPT_nxcompat, OPT_nxcompat_no, true); for (auto *arg : args.filtered(OPT_swaprun)) parseSwaprun(arg->getValue()); config->terminalServerAware = !config->dll && args.hasFlag(OPT_tsaware, OPT_tsaware_no, true); config->debugDwarf = debug == DebugKind::Dwarf; config->debugGHashes = debug == DebugKind::GHash; config->debugSymtab = debug == DebugKind::Symtab; config->autoImport = args.hasFlag(OPT_auto_import, OPT_auto_import_no, config->mingw); config->pseudoRelocs = args.hasFlag( OPT_runtime_pseudo_reloc, OPT_runtime_pseudo_reloc_no, config->mingw); config->callGraphProfileSort = args.hasFlag( OPT_call_graph_profile_sort, OPT_call_graph_profile_sort_no, true); // Don't warn about long section names, such as .debug_info, for mingw or // when -debug:dwarf is requested. if (config->mingw || config->debugDwarf) config->warnLongSectionNames = false; config->lldmapFile = getMapFile(args, OPT_lldmap, OPT_lldmap_file); config->mapFile = getMapFile(args, OPT_map, OPT_map_file); if (config->lldmapFile != "" && config->lldmapFile == config->mapFile) { warn("/lldmap and /map have the same output file '" + config->mapFile + "'.\n>>> ignoring /lldmap"); config->lldmapFile.clear(); } if (config->incremental && args.hasArg(OPT_profile)) { warn("ignoring '/incremental' due to '/profile' specification"); config->incremental = false; } if (config->incremental && args.hasArg(OPT_order)) { warn("ignoring '/incremental' due to '/order' specification"); config->incremental = false; } if (config->incremental && config->doGC) { warn("ignoring '/incremental' because REF is enabled; use '/opt:noref' to " "disable"); config->incremental = false; } if (config->incremental && config->doICF) { warn("ignoring '/incremental' because ICF is enabled; use '/opt:noicf' to " "disable"); config->incremental = false; } if (errorCount()) return; std::set wholeArchives; for (auto *arg : args.filtered(OPT_wholearchive_file)) if (Optional path = doFindFile(arg->getValue())) if (Optional id = getUniqueID(*path)) wholeArchives.insert(*id); // A predicate returning true if a given path is an argument for // /wholearchive:, or /wholearchive is enabled globally. // This function is a bit tricky because "foo.obj /wholearchive:././foo.obj" // needs to be handled as "/wholearchive:foo.obj foo.obj". auto isWholeArchive = [&](StringRef path) -> bool { if (args.hasArg(OPT_wholearchive_flag)) return true; if (Optional id = getUniqueID(path)) return wholeArchives.count(*id); return false; }; // Create a list of input files. These can be given as OPT_INPUT options // and OPT_wholearchive_file options, and we also need to track OPT_start_lib // and OPT_end_lib. bool inLib = false; for (auto *arg : args) { switch (arg->getOption().getID()) { case OPT_end_lib: if (!inLib) error("stray " + arg->getSpelling()); inLib = false; break; case OPT_start_lib: if (inLib) error("nested " + arg->getSpelling()); inLib = true; break; case OPT_wholearchive_file: if (Optional path = findFile(arg->getValue())) enqueuePath(*path, true, inLib); break; case OPT_INPUT: if (Optional path = findFile(arg->getValue())) enqueuePath(*path, isWholeArchive(*path), inLib); break; default: // Ignore other options. break; } } // Process files specified as /defaultlib. These should be enequeued after // other files, which is why they are in a separate loop. for (auto *arg : args.filtered(OPT_defaultlib)) if (Optional path = findLib(arg->getValue())) enqueuePath(*path, false, false); // Windows specific -- Create a resource file containing a manifest file. if (config->manifest == Configuration::Embed) addBuffer(createManifestRes(), false, false); // Read all input files given via the command line. run(); if (errorCount()) return; // We should have inferred a machine type by now from the input files, but if // not we assume x64. if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) { warn("/machine is not specified. x64 is assumed"); config->machine = AMD64; } config->wordsize = config->is64() ? 8 : 4; // Handle /safeseh, x86 only, on by default, except for mingw. if (config->machine == I386) { config->safeSEH = args.hasFlag(OPT_safeseh, OPT_safeseh_no, !config->mingw); config->noSEH = args.hasArg(OPT_noseh); } // Handle /functionpadmin for (auto *arg : args.filtered(OPT_functionpadmin, OPT_functionpadmin_opt)) parseFunctionPadMin(arg, config->machine); if (tar) tar->append("response.txt", createResponseFile(args, filePaths, ArrayRef(searchPaths).slice(1))); // Handle /largeaddressaware config->largeAddressAware = args.hasFlag( OPT_largeaddressaware, OPT_largeaddressaware_no, config->is64()); // Handle /highentropyva config->highEntropyVA = config->is64() && args.hasFlag(OPT_highentropyva, OPT_highentropyva_no, true); if (!config->dynamicBase && (config->machine == ARMNT || config->machine == ARM64)) error("/dynamicbase:no is not compatible with " + machineToStr(config->machine)); // Handle /export for (auto *arg : args.filtered(OPT_export)) { Export e = parseExport(arg->getValue()); if (config->machine == I386) { if (!isDecorated(e.name)) e.name = saver.save("_" + e.name); if (!e.extName.empty() && !isDecorated(e.extName)) e.extName = saver.save("_" + e.extName); } config->exports.push_back(e); } // Handle /def if (auto *arg = args.getLastArg(OPT_deffile)) { // parseModuleDefs mutates Config object. parseModuleDefs(arg->getValue()); } // Handle generation of import library from a def file. if (!args.hasArg(OPT_INPUT, OPT_wholearchive_file)) { fixupExports(); createImportLibrary(/*asLib=*/true); return; } // Windows specific -- if no /subsystem is given, we need to infer // that from entry point name. Must happen before /entry handling, // and after the early return when just writing an import library. if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) { config->subsystem = inferSubsystem(); if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) fatal("subsystem must be defined"); } // Handle /entry and /dll if (auto *arg = args.getLastArg(OPT_entry)) { config->entry = addUndefined(mangle(arg->getValue())); } else if (!config->entry && !config->noEntry) { if (args.hasArg(OPT_dll)) { StringRef s = (config->machine == I386) ? "__DllMainCRTStartup@12" : "_DllMainCRTStartup"; config->entry = addUndefined(s); } else if (config->driverWdm) { // /driver:wdm implies /entry:_NtProcessStartup config->entry = addUndefined(mangle("_NtProcessStartup")); } else { // Windows specific -- If entry point name is not given, we need to // infer that from user-defined entry name. StringRef s = findDefaultEntry(); if (s.empty()) fatal("entry point must be defined"); config->entry = addUndefined(s); log("Entry name inferred: " + s); } } // Handle /delayload for (auto *arg : args.filtered(OPT_delayload)) { config->delayLoads.insert(StringRef(arg->getValue()).lower()); if (config->machine == I386) { config->delayLoadHelper = addUndefined("___delayLoadHelper2@8"); } else { config->delayLoadHelper = addUndefined("__delayLoadHelper2"); } } // Set default image name if neither /out or /def set it. if (config->outputFile.empty()) { config->outputFile = getOutputPath( (*args.filtered(OPT_INPUT, OPT_wholearchive_file).begin())->getValue()); } // Fail early if an output file is not writable. if (auto e = tryCreateFile(config->outputFile)) { error("cannot open output file " + config->outputFile + ": " + e.message()); return; } if (shouldCreatePDB) { // Put the PDB next to the image if no /pdb flag was passed. if (config->pdbPath.empty()) { config->pdbPath = config->outputFile; sys::path::replace_extension(config->pdbPath, ".pdb"); } // The embedded PDB path should be the absolute path to the PDB if no // /pdbaltpath flag was passed. if (config->pdbAltPath.empty()) { config->pdbAltPath = config->pdbPath; // It's important to make the path absolute and remove dots. This path // will eventually be written into the PE header, and certain Microsoft // tools won't work correctly if these assumptions are not held. sys::fs::make_absolute(config->pdbAltPath); sys::path::remove_dots(config->pdbAltPath); } else { // Don't do this earlier, so that Config->OutputFile is ready. parsePDBAltPath(config->pdbAltPath); } } // Set default image base if /base is not given. if (config->imageBase == uint64_t(-1)) config->imageBase = getDefaultImageBase(); symtab->addSynthetic(mangle("__ImageBase"), nullptr); if (config->machine == I386) { symtab->addAbsolute("___safe_se_handler_table", 0); symtab->addAbsolute("___safe_se_handler_count", 0); } symtab->addAbsolute(mangle("__guard_fids_count"), 0); symtab->addAbsolute(mangle("__guard_fids_table"), 0); symtab->addAbsolute(mangle("__guard_flags"), 0); symtab->addAbsolute(mangle("__guard_iat_count"), 0); symtab->addAbsolute(mangle("__guard_iat_table"), 0); symtab->addAbsolute(mangle("__guard_longjmp_count"), 0); symtab->addAbsolute(mangle("__guard_longjmp_table"), 0); // Needed for MSVC 2017 15.5 CRT. symtab->addAbsolute(mangle("__enclave_config"), 0); if (config->pseudoRelocs) { symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0); symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0); } if (config->mingw) { symtab->addAbsolute(mangle("__CTOR_LIST__"), 0); symtab->addAbsolute(mangle("__DTOR_LIST__"), 0); } // This code may add new undefined symbols to the link, which may enqueue more // symbol resolution tasks, so we need to continue executing tasks until we // converge. do { // Windows specific -- if entry point is not found, // search for its mangled names. if (config->entry) mangleMaybe(config->entry); // Windows specific -- Make sure we resolve all dllexported symbols. for (Export &e : config->exports) { if (!e.forwardTo.empty()) continue; e.sym = addUndefined(e.name); if (!e.directives) e.symbolName = mangleMaybe(e.sym); } // Add weak aliases. Weak aliases is a mechanism to give remaining // undefined symbols final chance to be resolved successfully. for (auto pair : config->alternateNames) { StringRef from = pair.first; StringRef to = pair.second; Symbol *sym = symtab->find(from); if (!sym) continue; if (auto *u = dyn_cast(sym)) if (!u->weakAlias) u->weakAlias = symtab->addUndefined(to); } // If any inputs are bitcode files, the LTO code generator may create // references to library functions that are not explicit in the bitcode // file's symbol table. If any of those library functions are defined in a // bitcode file in an archive member, we need to arrange to use LTO to // compile those archive members by adding them to the link beforehand. if (!BitcodeFile::instances.empty()) for (auto *s : lto::LTO::getRuntimeLibcallSymbols()) symtab->addLibcall(s); // Windows specific -- if __load_config_used can be resolved, resolve it. if (symtab->findUnderscore("_load_config_used")) addUndefined(mangle("_load_config_used")); } while (run()); if (args.hasArg(OPT_include_optional)) { // Handle /includeoptional for (auto *arg : args.filtered(OPT_include_optional)) if (dyn_cast_or_null(symtab->find(arg->getValue()))) addUndefined(arg->getValue()); while (run()); } if (config->autoImport) { // MinGW specific. // Load any further object files that might be needed for doing automatic // imports. // // For cases with no automatically imported symbols, this iterates once // over the symbol table and doesn't do anything. // // For the normal case with a few automatically imported symbols, this // should only need to be run once, since each new object file imported // is an import library and wouldn't add any new undefined references, // but there's nothing stopping the __imp_ symbols from coming from a // normal object file as well (although that won't be used for the // actual autoimport later on). If this pass adds new undefined references, // we won't iterate further to resolve them. symtab->loadMinGWAutomaticImports(); run(); } // At this point, we should not have any symbols that cannot be resolved. // If we are going to do codegen for link-time optimization, check for // unresolvable symbols first, so we don't spend time generating code that // will fail to link anyway. if (!BitcodeFile::instances.empty() && !config->forceUnresolved) symtab->reportUnresolvable(); if (errorCount()) return; // Do LTO by compiling bitcode input files to a set of native COFF files then // link those files (unless -thinlto-index-only was given, in which case we // resolve symbols and write indices, but don't generate native code or link). symtab->addCombinedLTOObjects(); // If -thinlto-index-only is given, we should create only "index // files" and not object files. Index file creation is already done // in addCombinedLTOObject, so we are done if that's the case. if (config->thinLTOIndexOnly) return; // If we generated native object files from bitcode files, this resolves // references to the symbols we use from them. run(); // Resolve remaining undefined symbols and warn about imported locals. symtab->resolveRemainingUndefines(); if (errorCount()) return; config->hadExplicitExports = !config->exports.empty(); if (config->mingw) { // In MinGW, all symbols are automatically exported if no symbols // are chosen to be exported. maybeExportMinGWSymbols(args); // Make sure the crtend.o object is the last object file. This object // file can contain terminating section chunks that need to be placed // last. GNU ld processes files and static libraries explicitly in the // order provided on the command line, while lld will pull in needed // files from static libraries only after the last object file on the // command line. for (auto i = ObjFile::instances.begin(), e = ObjFile::instances.end(); i != e; i++) { ObjFile *file = *i; if (isCrtend(file->getName())) { ObjFile::instances.erase(i); ObjFile::instances.push_back(file); break; } } } // Windows specific -- when we are creating a .dll file, we also // need to create a .lib file. In MinGW mode, we only do that when the // -implib option is given explicitly, for compatibility with GNU ld. if (!config->exports.empty() || config->dll) { fixupExports(); if (!config->mingw || !config->implib.empty()) createImportLibrary(/*asLib=*/false); assignExportOrdinals(); } // Handle /output-def (MinGW specific). if (auto *arg = args.getLastArg(OPT_output_def)) writeDefFile(arg->getValue()); // Set extra alignment for .comm symbols for (auto pair : config->alignComm) { StringRef name = pair.first; uint32_t alignment = pair.second; Symbol *sym = symtab->find(name); if (!sym) { warn("/aligncomm symbol " + name + " not found"); continue; } // If the symbol isn't common, it must have been replaced with a regular // symbol, which will carry its own alignment. auto *dc = dyn_cast(sym); if (!dc) continue; CommonChunk *c = dc->getChunk(); c->setAlignment(std::max(c->getAlignment(), alignment)); } // Windows specific -- Create a side-by-side manifest file. if (config->manifest == Configuration::SideBySide) createSideBySideManifest(); // Handle /order. We want to do this at this moment because we // need a complete list of comdat sections to warn on nonexistent // functions. if (auto *arg = args.getLastArg(OPT_order)) { if (args.hasArg(OPT_call_graph_ordering_file)) error("/order and /call-graph-order-file may not be used together"); parseOrderFile(arg->getValue()); config->callGraphProfileSort = false; } // Handle /call-graph-ordering-file and /call-graph-profile-sort (default on). if (config->callGraphProfileSort) { if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) { parseCallGraphFile(arg->getValue()); } readCallGraphsFromObjectFiles(); } // Handle /print-symbol-order. if (auto *arg = args.getLastArg(OPT_print_symbol_order)) config->printSymbolOrder = arg->getValue(); // Identify unreferenced COMDAT sections. if (config->doGC) markLive(symtab->getChunks()); // Needs to happen after the last call to addFile(). convertResources(); // Identify identical COMDAT sections to merge them. if (config->doICF) { findKeepUniqueSections(); doICF(symtab->getChunks()); } // Write the result. writeResult(); // Stop early so we can print the results. rootTimer.stop(); if (config->showTiming) Timer::root().print(); } } // namespace coff } // namespace lld diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index bfa7bd8148df..21a1341f7844 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -1,1577 +1,1575 @@ //===- PDB.cpp ------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "PDB.h" #include "Chunks.h" #include "Config.h" #include "DebugTypes.h" #include "Driver.h" #include "SymbolTable.h" #include "Symbols.h" #include "TypeMerger.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Timer.h" #include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/RecordName.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h" #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/PDB/GenericError.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/DebugInfo/PDB/PDB.h" #include "llvm/Object/COFF.h" #include "llvm/Object/CVDebugRecord.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/CRC.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" #include using namespace llvm; using namespace llvm::codeview; using namespace lld; using namespace lld::coff; using llvm::object::coff_section; static ExitOnError exitOnErr; static Timer totalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root()); - +Timer lld::coff::loadGHashTimer("Global Type Hashing", totalPdbLinkTimer); +Timer lld::coff::mergeGHashTimer("GHash Type Merging", totalPdbLinkTimer); static Timer addObjectsTimer("Add Objects", totalPdbLinkTimer); static Timer typeMergingTimer("Type Merging", addObjectsTimer); static Timer symbolMergingTimer("Symbol Merging", addObjectsTimer); static Timer publicsLayoutTimer("Publics Stream Layout", totalPdbLinkTimer); static Timer tpiStreamLayoutTimer("TPI Stream Layout", totalPdbLinkTimer); static Timer diskCommitTimer("Commit to Disk", totalPdbLinkTimer); namespace { class DebugSHandler; class PDBLinker { friend DebugSHandler; public: PDBLinker(SymbolTable *symtab) : symtab(symtab), builder(bAlloc), tMerger(bAlloc) { // This isn't strictly necessary, but link.exe usually puts an empty string // as the first "valid" string in the string table, so we do the same in // order to maintain as much byte-for-byte compatibility as possible. pdbStrTab.insert(""); } /// Emit the basic PDB structure: initial streams, headers, etc. void initialize(llvm::codeview::DebugInfo *buildId); /// Add natvis files specified on the command line. void addNatvisFiles(); /// Add named streams specified on the command line. void addNamedStreams(); /// Link CodeView from each object file in the symbol table into the PDB. void addObjectsToPDB(); /// Add every live, defined public symbol to the PDB. void addPublicsToPDB(); /// Link info for each import file in the symbol table into the PDB. void addImportFilesToPDB(ArrayRef outputSections); /// Link CodeView from a single object file into the target (output) PDB. /// When a precompiled headers object is linked, its TPI map might be provided /// externally. void addDebug(TpiSource *source); - bool mergeTypeRecords(TpiSource *source); - void addDebugSymbols(TpiSource *source); void mergeSymbolRecords(TpiSource *source, std::vector &stringTableRefs, BinaryStreamRef symData); /// Add the section map and section contributions to the PDB. void addSections(ArrayRef outputSections, ArrayRef sectionTable); /// Write the PDB to disk and store the Guid generated for it in *Guid. void commit(codeview::GUID *guid); // Print statistics regarding the final PDB void printStats(); private: SymbolTable *symtab; pdb::PDBFileBuilder builder; TypeMerger tMerger; /// PDBs use a single global string table for filenames in the file checksum /// table. DebugStringTableSubsection pdbStrTab; llvm::SmallString<128> nativePath; // For statistics uint64_t globalSymbols = 0; uint64_t moduleSymbols = 0; uint64_t publicSymbols = 0; }; class DebugSHandler { PDBLinker &linker; /// The object file whose .debug$S sections we're processing. ObjFile &file; /// The result of merging type indices. TpiSource *source; /// The DEBUG_S_STRINGTABLE subsection. These strings are referred to by /// index from other records in the .debug$S section. All of these strings /// need to be added to the global PDB string table, and all references to /// these strings need to have their indices re-written to refer to the /// global PDB string table. DebugStringTableSubsectionRef cvStrTab; /// The DEBUG_S_FILECHKSMS subsection. As above, these are referred to /// by other records in the .debug$S section and need to be merged into the /// PDB. DebugChecksumsSubsectionRef checksums; /// The DEBUG_S_FRAMEDATA subsection(s). There can be more than one of /// these and they need not appear in any specific order. However, they /// contain string table references which need to be re-written, so we /// collect them all here and re-write them after all subsections have been /// discovered and processed. std::vector newFpoFrames; /// Pointers to raw memory that we determine have string table references /// that need to be re-written. We first process all .debug$S subsections /// to ensure that we can handle subsections written in any order, building /// up this list as we go. At the end, we use the string table (which must /// have been discovered by now else it is an error) to re-write these /// references. std::vector stringTableReferences; void mergeInlineeLines(const DebugSubsectionRecord &inlineeLines); public: DebugSHandler(PDBLinker &linker, ObjFile &file, TpiSource *source) : linker(linker), file(file), source(source) {} void handleDebugS(ArrayRef relocatedDebugContents); void finish(); }; } // Visual Studio's debugger requires absolute paths in various places in the // PDB to work without additional configuration: // https://docs.microsoft.com/en-us/visualstudio/debugger/debug-source-files-common-properties-solution-property-pages-dialog-box static void pdbMakeAbsolute(SmallVectorImpl &fileName) { // The default behavior is to produce paths that are valid within the context // of the machine that you perform the link on. If the linker is running on // a POSIX system, we will output absolute POSIX paths. If the linker is // running on a Windows system, we will output absolute Windows paths. If the // user desires any other kind of behavior, they should explicitly pass // /pdbsourcepath, in which case we will treat the exact string the user // passed in as the gospel and not normalize, canonicalize it. if (sys::path::is_absolute(fileName, sys::path::Style::windows) || sys::path::is_absolute(fileName, sys::path::Style::posix)) return; // It's not absolute in any path syntax. Relative paths necessarily refer to // the local file system, so we can make it native without ending up with a // nonsensical path. if (config->pdbSourcePath.empty()) { sys::path::native(fileName); sys::fs::make_absolute(fileName); return; } // Try to guess whether /PDBSOURCEPATH is a unix path or a windows path. // Since PDB's are more of a Windows thing, we make this conservative and only // decide that it's a unix path if we're fairly certain. Specifically, if // it starts with a forward slash. SmallString<128> absoluteFileName = config->pdbSourcePath; sys::path::Style guessedStyle = absoluteFileName.startswith("/") ? sys::path::Style::posix : sys::path::Style::windows; sys::path::append(absoluteFileName, guessedStyle, fileName); sys::path::native(absoluteFileName, guessedStyle); sys::path::remove_dots(absoluteFileName, true, guessedStyle); fileName = std::move(absoluteFileName); } static void addTypeInfo(pdb::TpiStreamBuilder &tpiBuilder, TypeCollection &typeTable) { // Start the TPI or IPI stream header. tpiBuilder.setVersionHeader(pdb::PdbTpiV80); // Flatten the in memory type table and hash each type. typeTable.ForEachRecord([&](TypeIndex ti, const CVType &type) { auto hash = pdb::hashTypeRecord(type); if (auto e = hash.takeError()) fatal("type hashing error"); tpiBuilder.addTypeRecord(type.RecordData, *hash); }); } -static bool remapTypeIndex(TypeIndex &ti, ArrayRef typeIndexMap) { - if (ti.isSimple()) - return true; - if (ti.toArrayIndex() >= typeIndexMap.size()) - return false; - ti = typeIndexMap[ti.toArrayIndex()]; - return true; -} - -static void remapTypesInSymbolRecord(ObjFile *file, SymbolKind symKind, - MutableArrayRef recordBytes, - TpiSource *source, - ArrayRef typeRefs) { - MutableArrayRef contents = - recordBytes.drop_front(sizeof(RecordPrefix)); - for (const TiReference &ref : typeRefs) { - unsigned byteSize = ref.Count * sizeof(TypeIndex); - if (contents.size() < ref.Offset + byteSize) - fatal("symbol record too short"); - - // This can be an item index or a type index. Choose the appropriate map. - bool isItemIndex = ref.Kind == TiRefKind::IndexRef; - ArrayRef typeOrItemMap = - isItemIndex ? source->ipiMap : source->tpiMap; - - MutableArrayRef tIs( - reinterpret_cast(contents.data() + ref.Offset), ref.Count); - for (TypeIndex &ti : tIs) { - if (!remapTypeIndex(ti, typeOrItemMap)) { - log("ignoring symbol record of kind 0x" + utohexstr(symKind) + " in " + - file->getName() + " with bad " + (isItemIndex ? "item" : "type") + - " index 0x" + utohexstr(ti.getIndex())); - ti = TypeIndex(SimpleTypeKind::NotTranslated); - continue; - } - } - } +static void addGHashTypeInfo(pdb::PDBFileBuilder &builder) { + // Start the TPI or IPI stream header. + builder.getTpiBuilder().setVersionHeader(pdb::PdbTpiV80); + builder.getIpiBuilder().setVersionHeader(pdb::PdbTpiV80); + for_each(TpiSource::instances, [&](TpiSource *source) { + builder.getTpiBuilder().addTypeRecords(source->mergedTpi.recs, + source->mergedTpi.recSizes, + source->mergedTpi.recHashes); + builder.getIpiBuilder().addTypeRecords(source->mergedIpi.recs, + source->mergedIpi.recSizes, + source->mergedIpi.recHashes); + }); } static void recordStringTableReferenceAtOffset(MutableArrayRef contents, uint32_t offset, std::vector &strTableRefs) { contents = contents.drop_front(offset).take_front(sizeof(support::ulittle32_t)); ulittle32_t *index = reinterpret_cast(contents.data()); strTableRefs.push_back(index); } static void recordStringTableReferences(SymbolKind kind, MutableArrayRef contents, std::vector &strTableRefs) { // For now we only handle S_FILESTATIC, but we may need the same logic for // S_DEFRANGE and S_DEFRANGE_SUBFIELD. However, I cannot seem to generate any // PDBs that contain these types of records, so because of the uncertainty // they are omitted here until we can prove that it's necessary. switch (kind) { case SymbolKind::S_FILESTATIC: // FileStaticSym::ModFileOffset recordStringTableReferenceAtOffset(contents, 8, strTableRefs); break; case SymbolKind::S_DEFRANGE: case SymbolKind::S_DEFRANGE_SUBFIELD: log("Not fixing up string table reference in S_DEFRANGE / " "S_DEFRANGE_SUBFIELD record"); break; default: break; } } static SymbolKind symbolKind(ArrayRef recordData) { const RecordPrefix *prefix = reinterpret_cast(recordData.data()); return static_cast(uint16_t(prefix->RecordKind)); } /// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32 static void translateIdSymbols(MutableArrayRef &recordData, - TypeCollection &idTable) { + TypeMerger &tMerger, TpiSource *source) { RecordPrefix *prefix = reinterpret_cast(recordData.data()); SymbolKind kind = symbolKind(recordData); if (kind == SymbolKind::S_PROC_ID_END) { prefix->RecordKind = SymbolKind::S_END; return; } // In an object file, GPROC32_ID has an embedded reference which refers to the // single object file type index namespace. This has already been translated // to the PDB file's ID stream index space, but we need to convert this to a // symbol that refers to the type stream index space. So we remap again from // ID index space to type index space. if (kind == SymbolKind::S_GPROC32_ID || kind == SymbolKind::S_LPROC32_ID) { SmallVector refs; auto content = recordData.drop_front(sizeof(RecordPrefix)); CVSymbol sym(recordData); discoverTypeIndicesInSymbol(sym, refs); assert(refs.size() == 1); assert(refs.front().Count == 1); TypeIndex *ti = reinterpret_cast(content.data() + refs[0].Offset); // `ti` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in // the IPI stream, whose `FunctionType` member refers to the TPI stream. - // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and + // Note that LF_FUNC_ID and LF_MFUNC_ID have the same record layout, and // in both cases we just need the second type index. if (!ti->isSimple() && !ti->isNoneType()) { - CVType funcIdData = idTable.getType(*ti); - ArrayRef tiBuf = funcIdData.data().slice(8, 4); - assert(tiBuf.size() == 4 && "corrupt LF_[MEM]FUNC_ID record"); - *ti = *reinterpret_cast(tiBuf.data()); + if (config->debugGHashes) { + auto idToType = source->funcIdToType.find(*ti); + if (idToType == source->funcIdToType.end()) { + warn(formatv("S_[GL]PROC32_ID record in {0} refers to PDB item " + "index {1:X} which is not a LF_[M]FUNC_ID record", + source->file->getName(), ti->getIndex())); + *ti = TypeIndex(SimpleTypeKind::NotTranslated); + } else { + *ti = idToType->second; + } + } else { + CVType funcIdData = tMerger.getIDTable().getType(*ti); + ArrayRef tiBuf = funcIdData.data().slice(8, 4); + assert(tiBuf.size() == 4 && "corrupt LF_[M]FUNC_ID record"); + *ti = *reinterpret_cast(tiBuf.data()); + } } kind = (kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32 : SymbolKind::S_LPROC32; prefix->RecordKind = uint16_t(kind); } } /// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned. /// The object file may not be aligned. static MutableArrayRef copyAndAlignSymbol(const CVSymbol &sym, MutableArrayRef &alignedMem) { size_t size = alignTo(sym.length(), alignOf(CodeViewContainer::Pdb)); assert(size >= 4 && "record too short"); assert(size <= MaxRecordLength && "record too long"); assert(alignedMem.size() >= size && "didn't preallocate enough"); // Copy the symbol record and zero out any padding bytes. MutableArrayRef newData = alignedMem.take_front(size); alignedMem = alignedMem.drop_front(size); memcpy(newData.data(), sym.data().data(), sym.length()); memset(newData.data() + sym.length(), 0, size - sym.length()); // Update the record prefix length. It should point to the beginning of the // next record. auto *prefix = reinterpret_cast(newData.data()); prefix->RecordLen = size - 2; return newData; } struct ScopeRecord { ulittle32_t ptrParent; ulittle32_t ptrEnd; }; struct SymbolScope { ScopeRecord *openingRecord; uint32_t scopeOffset; }; static void scopeStackOpen(SmallVectorImpl &stack, uint32_t curOffset, CVSymbol &sym) { assert(symbolOpensScope(sym.kind())); SymbolScope s; s.scopeOffset = curOffset; s.openingRecord = const_cast( reinterpret_cast(sym.content().data())); s.openingRecord->ptrParent = stack.empty() ? 0 : stack.back().scopeOffset; stack.push_back(s); } static void scopeStackClose(SmallVectorImpl &stack, uint32_t curOffset, InputFile *file) { if (stack.empty()) { warn("symbol scopes are not balanced in " + file->getName()); return; } SymbolScope s = stack.pop_back_val(); s.openingRecord->ptrEnd = curOffset; } static bool symbolGoesInModuleStream(const CVSymbol &sym, bool isGlobalScope) { switch (sym.kind()) { case SymbolKind::S_GDATA32: case SymbolKind::S_CONSTANT: case SymbolKind::S_GTHREAD32: // We really should not be seeing S_PROCREF and S_LPROCREF in the first place // since they are synthesized by the linker in response to S_GPROC32 and // S_LPROC32, but if we do see them, don't put them in the module stream I // guess. case SymbolKind::S_PROCREF: case SymbolKind::S_LPROCREF: return false; // S_UDT records go in the module stream if it is not a global S_UDT. case SymbolKind::S_UDT: return !isGlobalScope; // S_GDATA32 does not go in the module stream, but S_LDATA32 does. case SymbolKind::S_LDATA32: case SymbolKind::S_LTHREAD32: default: return true; } } static bool symbolGoesInGlobalsStream(const CVSymbol &sym, bool isFunctionScope) { switch (sym.kind()) { case SymbolKind::S_CONSTANT: case SymbolKind::S_GDATA32: case SymbolKind::S_GTHREAD32: case SymbolKind::S_GPROC32: case SymbolKind::S_LPROC32: // We really should not be seeing S_PROCREF and S_LPROCREF in the first place // since they are synthesized by the linker in response to S_GPROC32 and // S_LPROC32, but if we do see them, copy them straight through. case SymbolKind::S_PROCREF: case SymbolKind::S_LPROCREF: return true; // Records that go in the globals stream, unless they are function-local. case SymbolKind::S_UDT: case SymbolKind::S_LDATA32: case SymbolKind::S_LTHREAD32: return !isFunctionScope; default: return false; } } static void addGlobalSymbol(pdb::GSIStreamBuilder &builder, uint16_t modIndex, unsigned symOffset, const CVSymbol &sym) { switch (sym.kind()) { case SymbolKind::S_CONSTANT: case SymbolKind::S_UDT: case SymbolKind::S_GDATA32: case SymbolKind::S_GTHREAD32: case SymbolKind::S_LTHREAD32: case SymbolKind::S_LDATA32: case SymbolKind::S_PROCREF: case SymbolKind::S_LPROCREF: builder.addGlobalSymbol(sym); break; case SymbolKind::S_GPROC32: case SymbolKind::S_LPROC32: { SymbolRecordKind k = SymbolRecordKind::ProcRefSym; if (sym.kind() == SymbolKind::S_LPROC32) k = SymbolRecordKind::LocalProcRef; ProcRefSym ps(k); ps.Module = modIndex; // For some reason, MSVC seems to add one to this value. ++ps.Module; ps.Name = getSymbolName(sym); ps.SumName = 0; ps.SymOffset = symOffset; builder.addGlobalSymbol(ps); break; } default: llvm_unreachable("Invalid symbol kind!"); } } void PDBLinker::mergeSymbolRecords(TpiSource *source, std::vector &stringTableRefs, BinaryStreamRef symData) { ObjFile *file = source->file; ArrayRef symsBuffer; cantFail(symData.readBytes(0, symData.getLength(), symsBuffer)); SmallVector scopes; // Iterate every symbol to check if any need to be realigned, and if so, how // much space we need to allocate for them. bool needsRealignment = false; unsigned totalRealignedSize = 0; auto ec = forEachCodeViewRecord( symsBuffer, [&](CVSymbol sym) -> llvm::Error { unsigned realignedSize = alignTo(sym.length(), alignOf(CodeViewContainer::Pdb)); needsRealignment |= realignedSize != sym.length(); totalRealignedSize += realignedSize; return Error::success(); }); // If any of the symbol record lengths was corrupt, ignore them all, warn // about it, and move on. if (ec) { warn("corrupt symbol records in " + file->getName()); consumeError(std::move(ec)); return; } // If any symbol needed realignment, allocate enough contiguous memory for // them all. Typically symbol subsections are small enough that this will not // cause fragmentation. MutableArrayRef alignedSymbolMem; if (needsRealignment) { void *alignedData = bAlloc.Allocate(totalRealignedSize, alignOf(CodeViewContainer::Pdb)); alignedSymbolMem = makeMutableArrayRef( reinterpret_cast(alignedData), totalRealignedSize); } // Iterate again, this time doing the real work. unsigned curSymOffset = file->moduleDBI->getNextSymbolOffset(); ArrayRef bulkSymbols; cantFail(forEachCodeViewRecord( symsBuffer, [&](CVSymbol sym) -> llvm::Error { // Align the record if required. MutableArrayRef recordBytes; if (needsRealignment) { recordBytes = copyAndAlignSymbol(sym, alignedSymbolMem); sym = CVSymbol(recordBytes); } else { // Otherwise, we can actually mutate the symbol directly, since we // copied it to apply relocations. recordBytes = makeMutableArrayRef( const_cast(sym.data().data()), sym.length()); } - // Discover type index references in the record. Skip it if we don't - // know where they are. - SmallVector typeRefs; - if (!discoverTypeIndicesInSymbol(sym, typeRefs)) { - log("ignoring unknown symbol record with kind 0x" + - utohexstr(sym.kind())); + // Re-map all the type index references. + if (!source->remapTypesInSymbolRecord(recordBytes)) { + log("error remapping types in symbol of kind 0x" + + utohexstr(sym.kind()) + ", ignoring"); return Error::success(); } - // Re-map all the type index references. - remapTypesInSymbolRecord(file, sym.kind(), recordBytes, source, - typeRefs); - // An object file may have S_xxx_ID symbols, but these get converted to // "real" symbols in a PDB. - translateIdSymbols(recordBytes, tMerger.getIDTable()); + translateIdSymbols(recordBytes, tMerger, source); sym = CVSymbol(recordBytes); // If this record refers to an offset in the object file's string table, // add that item to the global PDB string table and re-write the index. recordStringTableReferences(sym.kind(), recordBytes, stringTableRefs); // Fill in "Parent" and "End" fields by maintaining a stack of scopes. if (symbolOpensScope(sym.kind())) scopeStackOpen(scopes, curSymOffset, sym); else if (symbolEndsScope(sym.kind())) scopeStackClose(scopes, curSymOffset, file); // Add the symbol to the globals stream if necessary. Do this before // adding the symbol to the module since we may need to get the next // symbol offset, and writing to the module's symbol stream will update // that offset. if (symbolGoesInGlobalsStream(sym, !scopes.empty())) { addGlobalSymbol(builder.getGsiBuilder(), file->moduleDBI->getModuleIndex(), curSymOffset, sym); ++globalSymbols; } if (symbolGoesInModuleStream(sym, scopes.empty())) { // Add symbols to the module in bulk. If this symbol is contiguous // with the previous run of symbols to add, combine the ranges. If // not, close the previous range of symbols and start a new one. if (sym.data().data() == bulkSymbols.end()) { bulkSymbols = makeArrayRef(bulkSymbols.data(), bulkSymbols.size() + sym.length()); } else { file->moduleDBI->addSymbolsInBulk(bulkSymbols); bulkSymbols = recordBytes; } curSymOffset += sym.length(); ++moduleSymbols; } return Error::success(); })); // Add any remaining symbols we've accumulated. file->moduleDBI->addSymbolsInBulk(bulkSymbols); } static pdb::SectionContrib createSectionContrib(const Chunk *c, uint32_t modi) { OutputSection *os = c ? c->getOutputSection() : nullptr; pdb::SectionContrib sc; memset(&sc, 0, sizeof(sc)); sc.ISect = os ? os->sectionIndex : llvm::pdb::kInvalidStreamIndex; sc.Off = c && os ? c->getRVA() - os->getRVA() : 0; sc.Size = c ? c->getSize() : -1; if (auto *secChunk = dyn_cast_or_null(c)) { sc.Characteristics = secChunk->header->Characteristics; sc.Imod = secChunk->file->moduleDBI->getModuleIndex(); ArrayRef contents = secChunk->getContents(); JamCRC crc(0); crc.update(contents); sc.DataCrc = crc.getCRC(); } else { sc.Characteristics = os ? os->header.Characteristics : 0; sc.Imod = modi; } sc.RelocCrc = 0; // FIXME return sc; } static uint32_t translateStringTableIndex(uint32_t objIndex, const DebugStringTableSubsectionRef &objStrTable, DebugStringTableSubsection &pdbStrTable) { auto expectedString = objStrTable.getString(objIndex); if (!expectedString) { warn("Invalid string table reference"); consumeError(expectedString.takeError()); return 0; } return pdbStrTable.insert(*expectedString); } void DebugSHandler::handleDebugS(ArrayRef relocatedDebugContents) { relocatedDebugContents = SectionChunk::consumeDebugMagic(relocatedDebugContents, ".debug$S"); DebugSubsectionArray subsections; BinaryStreamReader reader(relocatedDebugContents, support::little); exitOnErr(reader.readArray(subsections, relocatedDebugContents.size())); for (const DebugSubsectionRecord &ss : subsections) { // Ignore subsections with the 'ignore' bit. Some versions of the Visual C++ // runtime have subsections with this bit set. if (uint32_t(ss.kind()) & codeview::SubsectionIgnoreFlag) continue; switch (ss.kind()) { case DebugSubsectionKind::StringTable: { assert(!cvStrTab.valid() && "Encountered multiple string table subsections!"); exitOnErr(cvStrTab.initialize(ss.getRecordData())); break; } case DebugSubsectionKind::FileChecksums: assert(!checksums.valid() && "Encountered multiple checksum subsections!"); exitOnErr(checksums.initialize(ss.getRecordData())); break; case DebugSubsectionKind::Lines: // We can add the relocated line table directly to the PDB without // modification because the file checksum offsets will stay the same. file.moduleDBI->addDebugSubsection(ss); break; case DebugSubsectionKind::InlineeLines: // The inlinee lines subsection also has file checksum table references // that can be used directly, but it contains function id references that // must be remapped. mergeInlineeLines(ss); break; case DebugSubsectionKind::FrameData: { // We need to re-write string table indices here, so save off all // frame data subsections until we've processed the entire list of // subsections so that we can be sure we have the string table. DebugFrameDataSubsectionRef fds; exitOnErr(fds.initialize(ss.getRecordData())); newFpoFrames.push_back(std::move(fds)); break; } case DebugSubsectionKind::Symbols: { linker.mergeSymbolRecords(source, stringTableReferences, ss.getRecordData()); break; } case DebugSubsectionKind::CrossScopeImports: case DebugSubsectionKind::CrossScopeExports: // These appear to relate to cross-module optimization, so we might use // these for ThinLTO. break; case DebugSubsectionKind::ILLines: case DebugSubsectionKind::FuncMDTokenMap: case DebugSubsectionKind::TypeMDTokenMap: case DebugSubsectionKind::MergedAssemblyInput: // These appear to relate to .Net assembly info. break; case DebugSubsectionKind::CoffSymbolRVA: // Unclear what this is for. break; default: warn("ignoring unknown debug$S subsection kind 0x" + utohexstr(uint32_t(ss.kind())) + " in file " + toString(&file)); break; } } } static Expected getFileName(const DebugStringTableSubsectionRef &strings, const DebugChecksumsSubsectionRef &checksums, uint32_t fileID) { auto iter = checksums.getArray().at(fileID); if (iter == checksums.getArray().end()) return make_error(cv_error_code::no_records); uint32_t offset = iter->FileNameOffset; return strings.getString(offset); } void DebugSHandler::mergeInlineeLines( const DebugSubsectionRecord &inlineeSubsection) { DebugInlineeLinesSubsectionRef inlineeLines; exitOnErr(inlineeLines.initialize(inlineeSubsection.getRecordData())); + if (!source) { + warn("ignoring inlinee lines section in file that lacks type information"); + return; + } // Remap type indices in inlinee line records in place. for (const InlineeSourceLine &line : inlineeLines) { TypeIndex &inlinee = *const_cast(&line.Header->Inlinee); - if (!remapTypeIndex(inlinee, source->ipiMap)) { + if (!source->remapTypeIndex(inlinee, TiRefKind::IndexRef)) { log("bad inlinee line record in " + file.getName() + " with bad inlinee index 0x" + utohexstr(inlinee.getIndex())); } } // Add the modified inlinee line subsection directly. file.moduleDBI->addDebugSubsection(inlineeSubsection); } void DebugSHandler::finish() { pdb::DbiStreamBuilder &dbiBuilder = linker.builder.getDbiBuilder(); // We should have seen all debug subsections across the entire object file now // which means that if a StringTable subsection and Checksums subsection were // present, now is the time to handle them. if (!cvStrTab.valid()) { if (checksums.valid()) fatal(".debug$S sections with a checksums subsection must also contain a " "string table subsection"); if (!stringTableReferences.empty()) warn("No StringTable subsection was encountered, but there are string " "table references"); return; } // Rewrite string table indices in the Fpo Data and symbol records to refer to // the global PDB string table instead of the object file string table. for (DebugFrameDataSubsectionRef &fds : newFpoFrames) { const ulittle32_t *reloc = fds.getRelocPtr(); for (codeview::FrameData fd : fds) { fd.RvaStart += *reloc; fd.FrameFunc = translateStringTableIndex(fd.FrameFunc, cvStrTab, linker.pdbStrTab); dbiBuilder.addNewFpoData(fd); } } for (ulittle32_t *ref : stringTableReferences) *ref = translateStringTableIndex(*ref, cvStrTab, linker.pdbStrTab); // Make a new file checksum table that refers to offsets in the PDB-wide // string table. Generally the string table subsection appears after the // checksum table, so we have to do this after looping over all the // subsections. The new checksum table must have the exact same layout and // size as the original. Otherwise, the file references in the line and // inlinee line tables will be incorrect. auto newChecksums = std::make_unique(linker.pdbStrTab); for (FileChecksumEntry &fc : checksums) { SmallString<128> filename = exitOnErr(cvStrTab.getString(fc.FileNameOffset)); pdbMakeAbsolute(filename); exitOnErr(dbiBuilder.addModuleSourceFile(*file.moduleDBI, filename)); newChecksums->addChecksum(filename, fc.Kind, fc.Checksum); } assert(checksums.getArray().getUnderlyingStream().getLength() == newChecksums->calculateSerializedSize() && "file checksum table must have same layout"); file.moduleDBI->addDebugSubsection(std::move(newChecksums)); } static void warnUnusable(InputFile *f, Error e) { if (!config->warnDebugInfoUnusable) { consumeError(std::move(e)); return; } auto msg = "Cannot use debug info for '" + toString(f) + "' [LNK4099]"; if (e) warn(msg + "\n>>> failed to load reference " + toString(std::move(e))); else warn(msg); } -bool PDBLinker::mergeTypeRecords(TpiSource *source) { - ScopedTimer t(typeMergingTimer); - // Before we can process symbol substreams from .debug$S, we need to process - // type information, file checksums, and the string table. Add type info to - // the PDB first, so that we can get the map from object file type and item - // indices to PDB type and item indices. - if (Error e = source->mergeDebugT(&tMerger)) { - // If the .debug$T sections fail to merge, assume there is no debug info. - warnUnusable(source->file, std::move(e)); - return false; - } - return true; -} - // Allocate memory for a .debug$S / .debug$F section and relocate it. static ArrayRef relocateDebugChunk(SectionChunk &debugChunk) { uint8_t *buffer = bAlloc.Allocate(debugChunk.getSize()); assert(debugChunk.getOutputSectionIdx() == 0 && "debug sections should not be in output sections"); debugChunk.writeTo(buffer); return makeArrayRef(buffer, debugChunk.getSize()); } void PDBLinker::addDebugSymbols(TpiSource *source) { // If this TpiSource doesn't have an object file, it must be from a type // server PDB. Type server PDBs do not contain symbols, so stop here. if (!source->file) return; ScopedTimer t(symbolMergingTimer); pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder(); DebugSHandler dsh(*this, *source->file, source); // Now do all live .debug$S and .debug$F sections. for (SectionChunk *debugChunk : source->file->getDebugChunks()) { if (!debugChunk->live || debugChunk->getSize() == 0) continue; bool isDebugS = debugChunk->getSectionName() == ".debug$S"; bool isDebugF = debugChunk->getSectionName() == ".debug$F"; if (!isDebugS && !isDebugF) continue; ArrayRef relocatedDebugContents = relocateDebugChunk(*debugChunk); if (isDebugS) { dsh.handleDebugS(relocatedDebugContents); } else if (isDebugF) { FixedStreamArray fpoRecords; BinaryStreamReader reader(relocatedDebugContents, support::little); uint32_t count = relocatedDebugContents.size() / sizeof(object::FpoData); exitOnErr(reader.readArray(fpoRecords, count)); // These are already relocated and don't refer to the string table, so we // can just copy it. for (const object::FpoData &fd : fpoRecords) dbiBuilder.addOldFpoData(fd); } } // Do any post-processing now that all .debug$S sections have been processed. dsh.finish(); } // Add a module descriptor for every object file. We need to put an absolute // path to the object into the PDB. If this is a plain object, we make its // path absolute. If it's an object in an archive, we make the archive path // absolute. static void createModuleDBI(pdb::PDBFileBuilder &builder, ObjFile *file) { pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder(); SmallString<128> objName; bool inArchive = !file->parentName.empty(); objName = inArchive ? file->parentName : file->getName(); pdbMakeAbsolute(objName); StringRef modName = inArchive ? file->getName() : StringRef(objName); file->moduleDBI = &exitOnErr(dbiBuilder.addModuleInfo(modName)); file->moduleDBI->setObjFileName(objName); ArrayRef chunks = file->getChunks(); uint32_t modi = file->moduleDBI->getModuleIndex(); for (Chunk *c : chunks) { auto *secChunk = dyn_cast(c); if (!secChunk || !secChunk->live) continue; pdb::SectionContrib sc = createSectionContrib(secChunk, modi); file->moduleDBI->setFirstSectionContrib(sc); break; } } void PDBLinker::addDebug(TpiSource *source) { + // Before we can process symbol substreams from .debug$S, we need to process + // type information, file checksums, and the string table. Add type info to + // the PDB first, so that we can get the map from object file type and item + // indices to PDB type and item indices. If we are using ghashes, types have + // already been merged. + if (!config->debugGHashes) { + ScopedTimer t(typeMergingTimer); + if (Error e = source->mergeDebugT(&tMerger)) { + // If type merging failed, ignore the symbols. + warnUnusable(source->file, std::move(e)); + return; + } + } + // If type merging failed, ignore the symbols. - if (mergeTypeRecords(source)) - addDebugSymbols(source); + Error typeError = std::move(source->typeMergingError); + if (typeError) { + warnUnusable(source->file, std::move(typeError)); + return; + } + + addDebugSymbols(source); } static pdb::BulkPublic createPublic(Defined *def) { pdb::BulkPublic pub; pub.Name = def->getName().data(); pub.NameLen = def->getName().size(); PublicSymFlags flags = PublicSymFlags::None; if (auto *d = dyn_cast(def)) { if (d->getCOFFSymbol().isFunctionDefinition()) flags = PublicSymFlags::Function; } else if (isa(def)) { flags = PublicSymFlags::Function; } pub.setFlags(flags); OutputSection *os = def->getChunk()->getOutputSection(); assert(os && "all publics should be in final image"); pub.Offset = def->getRVA() - os->getRVA(); pub.Segment = os->sectionIndex; return pub; } // Add all object files to the PDB. Merge .debug$T sections into IpiData and // TpiData. void PDBLinker::addObjectsToPDB() { ScopedTimer t1(addObjectsTimer); // Create module descriptors for_each(ObjFile::instances, [&](ObjFile *obj) { createModuleDBI(builder, obj); }); - // Merge dependencies - TpiSource::forEachSource([&](TpiSource *source) { - if (source->isDependency()) - addDebug(source); - }); + // Reorder dependency type sources to come first. + TpiSource::sortDependencies(); - // Merge regular and dependent OBJs - TpiSource::forEachSource([&](TpiSource *source) { - if (!source->isDependency()) - addDebug(source); - }); + // Merge type information from input files using global type hashing. + if (config->debugGHashes) + tMerger.mergeTypesWithGHash(); + + // Merge dependencies and then regular objects. + for_each(TpiSource::dependencySources, + [&](TpiSource *source) { addDebug(source); }); + for_each(TpiSource::objectSources, + [&](TpiSource *source) { addDebug(source); }); builder.getStringTableBuilder().setStrings(pdbStrTab); t1.stop(); // Construct TPI and IPI stream contents. ScopedTimer t2(tpiStreamLayoutTimer); - addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable()); - addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); + // Collect all the merged types. + if (config->debugGHashes) { + addGHashTypeInfo(builder); + } else { + addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable()); + addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); + } t2.stop(); } void PDBLinker::addPublicsToPDB() { ScopedTimer t3(publicsLayoutTimer); // Compute the public symbols. auto &gsiBuilder = builder.getGsiBuilder(); std::vector publics; symtab->forEachSymbol([&publics](Symbol *s) { // Only emit external, defined, live symbols that have a chunk. Static, // non-external symbols do not appear in the symbol table. auto *def = dyn_cast(s); if (def && def->isLive() && def->getChunk()) publics.push_back(createPublic(def)); }); if (!publics.empty()) { publicSymbols = publics.size(); gsiBuilder.addPublicSymbols(std::move(publics)); } } void PDBLinker::printStats() { if (!config->showSummary) return; SmallString<256> buffer; raw_svector_ostream stream(buffer); stream << center_justify("Summary", 80) << '\n' << std::string(80, '-') << '\n'; auto print = [&](uint64_t v, StringRef s) { stream << format_decimal(v, 15) << " " << s << '\n'; }; print(ObjFile::instances.size(), "Input OBJ files (expanded from all cmd-line inputs)"); print(TpiSource::countTypeServerPDBs(), "PDB type server dependencies"); print(TpiSource::countPrecompObjs(), "Precomp OBJ dependencies"); - print(tMerger.getTypeTable().size() + tMerger.getIDTable().size(), - "Merged TPI records"); + print(builder.getTpiBuilder().getRecordCount(), "Merged TPI records"); + print(builder.getIpiBuilder().getRecordCount(), "Merged IPI records"); print(pdbStrTab.size(), "Output PDB strings"); print(globalSymbols, "Global symbol records"); print(moduleSymbols, "Module symbol records"); print(publicSymbols, "Public symbol records"); auto printLargeInputTypeRecs = [&](StringRef name, ArrayRef recCounts, TypeCollection &records) { // Figure out which type indices were responsible for the most duplicate // bytes in the input files. These should be frequently emitted LF_CLASS and // LF_FIELDLIST records. struct TypeSizeInfo { uint32_t typeSize; uint32_t dupCount; TypeIndex typeIndex; uint64_t totalInputSize() const { return uint64_t(dupCount) * typeSize; } bool operator<(const TypeSizeInfo &rhs) const { if (totalInputSize() == rhs.totalInputSize()) return typeIndex < rhs.typeIndex; return totalInputSize() < rhs.totalInputSize(); } }; SmallVector tsis; for (auto e : enumerate(recCounts)) { TypeIndex typeIndex = TypeIndex::fromArrayIndex(e.index()); uint32_t typeSize = records.getType(typeIndex).length(); uint32_t dupCount = e.value(); tsis.push_back({typeSize, dupCount, typeIndex}); } if (!tsis.empty()) { stream << "\nTop 10 types responsible for the most " << name << " input:\n"; stream << " index total bytes count size\n"; llvm::sort(tsis); unsigned i = 0; for (const auto &tsi : reverse(tsis)) { stream << formatv(" {0,10:X}: {1,14:N} = {2,5:N} * {3,6:N}\n", tsi.typeIndex.getIndex(), tsi.totalInputSize(), tsi.dupCount, tsi.typeSize); if (++i >= 10) break; } stream << "Run llvm-pdbutil to print details about a particular record:\n"; stream << formatv("llvm-pdbutil dump -{0}s -{0}-index {1:X} {2}\n", (name == "TPI" ? "type" : "id"), tsis.back().typeIndex.getIndex(), config->pdbPath); } }; - printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable()); - printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable()); + if (!config->debugGHashes) { + // FIXME: Reimplement for ghash. + printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable()); + printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable()); + } message(buffer); } void PDBLinker::addNatvisFiles() { for (StringRef file : config->natvisFiles) { ErrorOr> dataOrErr = MemoryBuffer::getFile(file); if (!dataOrErr) { warn("Cannot open input file: " + file); continue; } builder.addInjectedSource(file, std::move(*dataOrErr)); } } void PDBLinker::addNamedStreams() { for (const auto &streamFile : config->namedStreams) { const StringRef stream = streamFile.getKey(), file = streamFile.getValue(); ErrorOr> dataOrErr = MemoryBuffer::getFile(file); if (!dataOrErr) { warn("Cannot open input file: " + file); continue; } exitOnErr(builder.addNamedStream(stream, (*dataOrErr)->getBuffer())); } } static codeview::CPUType toCodeViewMachine(COFF::MachineTypes machine) { switch (machine) { case COFF::IMAGE_FILE_MACHINE_AMD64: return codeview::CPUType::X64; case COFF::IMAGE_FILE_MACHINE_ARM: return codeview::CPUType::ARM7; case COFF::IMAGE_FILE_MACHINE_ARM64: return codeview::CPUType::ARM64; case COFF::IMAGE_FILE_MACHINE_ARMNT: return codeview::CPUType::ARMNT; case COFF::IMAGE_FILE_MACHINE_I386: return codeview::CPUType::Intel80386; default: llvm_unreachable("Unsupported CPU Type"); } } // Mimic MSVC which surrounds arguments containing whitespace with quotes. // Double double-quotes are handled, so that the resulting string can be // executed again on the cmd-line. static std::string quote(ArrayRef args) { std::string r; r.reserve(256); for (StringRef a : args) { if (!r.empty()) r.push_back(' '); bool hasWS = a.find(' ') != StringRef::npos; bool hasQ = a.find('"') != StringRef::npos; if (hasWS || hasQ) r.push_back('"'); if (hasQ) { SmallVector s; a.split(s, '"'); r.append(join(s, "\"\"")); } else { r.append(std::string(a)); } if (hasWS || hasQ) r.push_back('"'); } return r; } static void fillLinkerVerRecord(Compile3Sym &cs) { cs.Machine = toCodeViewMachine(config->machine); // Interestingly, if we set the string to 0.0.0.0, then when trying to view // local variables WinDbg emits an error that private symbols are not present. // By setting this to a valid MSVC linker version string, local variables are // displayed properly. As such, even though it is not representative of // LLVM's version information, we need this for compatibility. cs.Flags = CompileSym3Flags::None; cs.VersionBackendBuild = 25019; cs.VersionBackendMajor = 14; cs.VersionBackendMinor = 10; cs.VersionBackendQFE = 0; // MSVC also sets the frontend to 0.0.0.0 since this is specifically for the // linker module (which is by definition a backend), so we don't need to do // anything here. Also, it seems we can use "LLVM Linker" for the linker name // without any problems. Only the backend version has to be hardcoded to a // magic number. cs.VersionFrontendBuild = 0; cs.VersionFrontendMajor = 0; cs.VersionFrontendMinor = 0; cs.VersionFrontendQFE = 0; cs.Version = "LLVM Linker"; cs.setLanguage(SourceLanguage::Link); } static void addCommonLinkerModuleSymbols(StringRef path, pdb::DbiModuleDescriptorBuilder &mod) { ObjNameSym ons(SymbolRecordKind::ObjNameSym); EnvBlockSym ebs(SymbolRecordKind::EnvBlockSym); Compile3Sym cs(SymbolRecordKind::Compile3Sym); fillLinkerVerRecord(cs); ons.Name = "* Linker *"; ons.Signature = 0; ArrayRef args = makeArrayRef(config->argv).drop_front(); std::string argStr = quote(args); ebs.Fields.push_back("cwd"); SmallString<64> cwd; if (config->pdbSourcePath.empty()) sys::fs::current_path(cwd); else cwd = config->pdbSourcePath; ebs.Fields.push_back(cwd); ebs.Fields.push_back("exe"); SmallString<64> exe = config->argv[0]; pdbMakeAbsolute(exe); ebs.Fields.push_back(exe); ebs.Fields.push_back("pdb"); ebs.Fields.push_back(path); ebs.Fields.push_back("cmd"); ebs.Fields.push_back(argStr); mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( ons, bAlloc, CodeViewContainer::Pdb)); mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( cs, bAlloc, CodeViewContainer::Pdb)); mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( ebs, bAlloc, CodeViewContainer::Pdb)); } static void addLinkerModuleCoffGroup(PartialSection *sec, pdb::DbiModuleDescriptorBuilder &mod, OutputSection &os) { // If there's a section, there's at least one chunk assert(!sec->chunks.empty()); const Chunk *firstChunk = *sec->chunks.begin(); const Chunk *lastChunk = *sec->chunks.rbegin(); // Emit COFF group CoffGroupSym cgs(SymbolRecordKind::CoffGroupSym); cgs.Name = sec->name; cgs.Segment = os.sectionIndex; cgs.Offset = firstChunk->getRVA() - os.getRVA(); cgs.Size = lastChunk->getRVA() + lastChunk->getSize() - firstChunk->getRVA(); cgs.Characteristics = sec->characteristics; // Somehow .idata sections & sections groups in the debug symbol stream have // the "write" flag set. However the section header for the corresponding // .idata section doesn't have it. if (cgs.Name.startswith(".idata")) cgs.Characteristics |= llvm::COFF::IMAGE_SCN_MEM_WRITE; mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( cgs, bAlloc, CodeViewContainer::Pdb)); } static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &mod, OutputSection &os) { SectionSym sym(SymbolRecordKind::SectionSym); sym.Alignment = 12; // 2^12 = 4KB sym.Characteristics = os.header.Characteristics; sym.Length = os.getVirtualSize(); sym.Name = os.name; sym.Rva = os.getRVA(); sym.SectionNumber = os.sectionIndex; mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( sym, bAlloc, CodeViewContainer::Pdb)); // Skip COFF groups in MinGW because it adds a significant footprint to the // PDB, due to each function being in its own section if (config->mingw) return; // Output COFF groups for individual chunks of this section. for (PartialSection *sec : os.contribSections) { addLinkerModuleCoffGroup(sec, mod, os); } } // Add all import files as modules to the PDB. void PDBLinker::addImportFilesToPDB(ArrayRef outputSections) { if (ImportFile::instances.empty()) return; std::map dllToModuleDbi; for (ImportFile *file : ImportFile::instances) { if (!file->live) continue; if (!file->thunkSym) continue; if (!file->thunkLive) continue; std::string dll = StringRef(file->dllName).lower(); llvm::pdb::DbiModuleDescriptorBuilder *&mod = dllToModuleDbi[dll]; if (!mod) { pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder(); SmallString<128> libPath = file->parentName; pdbMakeAbsolute(libPath); sys::path::native(libPath); // Name modules similar to MSVC's link.exe. // The first module is the simple dll filename llvm::pdb::DbiModuleDescriptorBuilder &firstMod = exitOnErr(dbiBuilder.addModuleInfo(file->dllName)); firstMod.setObjFileName(libPath); pdb::SectionContrib sc = createSectionContrib(nullptr, llvm::pdb::kInvalidStreamIndex); firstMod.setFirstSectionContrib(sc); // The second module is where the import stream goes. mod = &exitOnErr(dbiBuilder.addModuleInfo("Import:" + file->dllName)); mod->setObjFileName(libPath); } DefinedImportThunk *thunk = cast(file->thunkSym); Chunk *thunkChunk = thunk->getChunk(); OutputSection *thunkOS = thunkChunk->getOutputSection(); ObjNameSym ons(SymbolRecordKind::ObjNameSym); Compile3Sym cs(SymbolRecordKind::Compile3Sym); Thunk32Sym ts(SymbolRecordKind::Thunk32Sym); ScopeEndSym es(SymbolRecordKind::ScopeEndSym); ons.Name = file->dllName; ons.Signature = 0; fillLinkerVerRecord(cs); ts.Name = thunk->getName(); ts.Parent = 0; ts.End = 0; ts.Next = 0; ts.Thunk = ThunkOrdinal::Standard; ts.Length = thunkChunk->getSize(); ts.Segment = thunkOS->sectionIndex; ts.Offset = thunkChunk->getRVA() - thunkOS->getRVA(); mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol( ons, bAlloc, CodeViewContainer::Pdb)); mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol( cs, bAlloc, CodeViewContainer::Pdb)); SmallVector scopes; CVSymbol newSym = codeview::SymbolSerializer::writeOneSymbol( ts, bAlloc, CodeViewContainer::Pdb); scopeStackOpen(scopes, mod->getNextSymbolOffset(), newSym); mod->addSymbol(newSym); newSym = codeview::SymbolSerializer::writeOneSymbol(es, bAlloc, CodeViewContainer::Pdb); scopeStackClose(scopes, mod->getNextSymbolOffset(), file); mod->addSymbol(newSym); pdb::SectionContrib sc = createSectionContrib(thunk->getChunk(), mod->getModuleIndex()); mod->setFirstSectionContrib(sc); } } // Creates a PDB file. void lld::coff::createPDB(SymbolTable *symtab, ArrayRef outputSections, ArrayRef sectionTable, llvm::codeview::DebugInfo *buildId) { ScopedTimer t1(totalPdbLinkTimer); PDBLinker pdb(symtab); pdb.initialize(buildId); pdb.addObjectsToPDB(); pdb.addImportFilesToPDB(outputSections); pdb.addSections(outputSections, sectionTable); pdb.addNatvisFiles(); pdb.addNamedStreams(); pdb.addPublicsToPDB(); ScopedTimer t2(diskCommitTimer); codeview::GUID guid; pdb.commit(&guid); memcpy(&buildId->PDB70.Signature, &guid, 16); t2.stop(); t1.stop(); pdb.printStats(); } void PDBLinker::initialize(llvm::codeview::DebugInfo *buildId) { exitOnErr(builder.initialize(4096)); // 4096 is blocksize buildId->Signature.CVSignature = OMF::Signature::PDB70; // Signature is set to a hash of the PDB contents when the PDB is done. memset(buildId->PDB70.Signature, 0, 16); buildId->PDB70.Age = 1; // Create streams in MSF for predefined streams, namely // PDB, TPI, DBI and IPI. for (int i = 0; i < (int)pdb::kSpecialStreamCount; ++i) exitOnErr(builder.getMsfBuilder().addStream(0)); // Add an Info stream. auto &infoBuilder = builder.getInfoBuilder(); infoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70); infoBuilder.setHashPDBContentsToGUID(true); // Add an empty DBI stream. pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder(); dbiBuilder.setAge(buildId->PDB70.Age); dbiBuilder.setVersionHeader(pdb::PdbDbiV70); dbiBuilder.setMachineType(config->machine); // Technically we are not link.exe 14.11, but there are known cases where // debugging tools on Windows expect Microsoft-specific version numbers or // they fail to work at all. Since we know we produce PDBs that are // compatible with LINK 14.11, we set that version number here. dbiBuilder.setBuildNumber(14, 11); } void PDBLinker::addSections(ArrayRef outputSections, ArrayRef sectionTable) { // It's not entirely clear what this is, but the * Linker * module uses it. pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder(); nativePath = config->pdbPath; pdbMakeAbsolute(nativePath); uint32_t pdbFilePathNI = dbiBuilder.addECName(nativePath); auto &linkerModule = exitOnErr(dbiBuilder.addModuleInfo("* Linker *")); linkerModule.setPdbFilePathNI(pdbFilePathNI); addCommonLinkerModuleSymbols(nativePath, linkerModule); // Add section contributions. They must be ordered by ascending RVA. for (OutputSection *os : outputSections) { addLinkerModuleSectionSymbol(linkerModule, *os); for (Chunk *c : os->chunks) { pdb::SectionContrib sc = createSectionContrib(c, linkerModule.getModuleIndex()); builder.getDbiBuilder().addSectionContrib(sc); } } // The * Linker * first section contrib is only used along with /INCREMENTAL, // to provide trampolines thunks for incremental function patching. Set this // as "unused" because LLD doesn't support /INCREMENTAL link. pdb::SectionContrib sc = createSectionContrib(nullptr, llvm::pdb::kInvalidStreamIndex); linkerModule.setFirstSectionContrib(sc); // Add Section Map stream. ArrayRef sections = { (const object::coff_section *)sectionTable.data(), sectionTable.size() / sizeof(object::coff_section)}; dbiBuilder.createSectionMap(sections); // Add COFF section header stream. exitOnErr( dbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, sectionTable)); } void PDBLinker::commit(codeview::GUID *guid) { ExitOnError exitOnErr((config->pdbPath + ": ").str()); // Write to a file. exitOnErr(builder.commit(config->pdbPath, guid)); } static uint32_t getSecrelReloc() { switch (config->machine) { case AMD64: return COFF::IMAGE_REL_AMD64_SECREL; case I386: return COFF::IMAGE_REL_I386_SECREL; case ARMNT: return COFF::IMAGE_REL_ARM_SECREL; case ARM64: return COFF::IMAGE_REL_ARM64_SECREL; default: llvm_unreachable("unknown machine type"); } } // Try to find a line table for the given offset Addr into the given chunk C. // If a line table was found, the line table, the string and checksum tables // that are used to interpret the line table, and the offset of Addr in the line // table are stored in the output arguments. Returns whether a line table was // found. static bool findLineTable(const SectionChunk *c, uint32_t addr, DebugStringTableSubsectionRef &cvStrTab, DebugChecksumsSubsectionRef &checksums, DebugLinesSubsectionRef &lines, uint32_t &offsetInLinetable) { ExitOnError exitOnErr; uint32_t secrelReloc = getSecrelReloc(); for (SectionChunk *dbgC : c->file->getDebugChunks()) { if (dbgC->getSectionName() != ".debug$S") continue; // Build a mapping of SECREL relocations in dbgC that refer to `c`. DenseMap secrels; for (const coff_relocation &r : dbgC->getRelocs()) { if (r.Type != secrelReloc) continue; if (auto *s = dyn_cast_or_null( c->file->getSymbols()[r.SymbolTableIndex])) if (s->getChunk() == c) secrels[r.VirtualAddress] = s->getValue(); } ArrayRef contents = SectionChunk::consumeDebugMagic(dbgC->getContents(), ".debug$S"); DebugSubsectionArray subsections; BinaryStreamReader reader(contents, support::little); exitOnErr(reader.readArray(subsections, contents.size())); for (const DebugSubsectionRecord &ss : subsections) { switch (ss.kind()) { case DebugSubsectionKind::StringTable: { assert(!cvStrTab.valid() && "Encountered multiple string table subsections!"); exitOnErr(cvStrTab.initialize(ss.getRecordData())); break; } case DebugSubsectionKind::FileChecksums: assert(!checksums.valid() && "Encountered multiple checksum subsections!"); exitOnErr(checksums.initialize(ss.getRecordData())); break; case DebugSubsectionKind::Lines: { ArrayRef bytes; auto ref = ss.getRecordData(); exitOnErr(ref.readLongestContiguousChunk(0, bytes)); size_t offsetInDbgC = bytes.data() - dbgC->getContents().data(); // Check whether this line table refers to C. auto i = secrels.find(offsetInDbgC); if (i == secrels.end()) break; // Check whether this line table covers Addr in C. DebugLinesSubsectionRef linesTmp; exitOnErr(linesTmp.initialize(BinaryStreamReader(ref))); uint32_t offsetInC = i->second + linesTmp.header()->RelocOffset; if (addr < offsetInC || addr >= offsetInC + linesTmp.header()->CodeSize) break; assert(!lines.header() && "Encountered multiple line tables for function!"); exitOnErr(lines.initialize(BinaryStreamReader(ref))); offsetInLinetable = addr - offsetInC; break; } default: break; } if (cvStrTab.valid() && checksums.valid() && lines.header()) return true; } } return false; } // Use CodeView line tables to resolve a file and line number for the given // offset into the given chunk and return them, or None if a line table was // not found. Optional> lld::coff::getFileLineCodeView(const SectionChunk *c, uint32_t addr) { ExitOnError exitOnErr; DebugStringTableSubsectionRef cvStrTab; DebugChecksumsSubsectionRef checksums; DebugLinesSubsectionRef lines; uint32_t offsetInLinetable; if (!findLineTable(c, addr, cvStrTab, checksums, lines, offsetInLinetable)) return None; Optional nameIndex; Optional lineNumber; for (LineColumnEntry &entry : lines) { for (const LineNumberEntry &ln : entry.LineNumbers) { LineInfo li(ln.Flags); if (ln.Offset > offsetInLinetable) { if (!nameIndex) { nameIndex = entry.NameIndex; lineNumber = li.getStartLine(); } StringRef filename = exitOnErr(getFileName(cvStrTab, checksums, *nameIndex)); return std::make_pair(filename, *lineNumber); } nameIndex = entry.NameIndex; lineNumber = li.getStartLine(); } } if (!nameIndex) return None; StringRef filename = exitOnErr(getFileName(cvStrTab, checksums, *nameIndex)); return std::make_pair(filename, *lineNumber); } diff --git a/lld/COFF/PDB.h b/lld/COFF/PDB.h index 273609ea788c..53506d40baef 100644 --- a/lld/COFF/PDB.h +++ b/lld/COFF/PDB.h @@ -1,38 +1,44 @@ //===- PDB.h ----------------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_PDB_H #define LLD_COFF_PDB_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" namespace llvm { namespace codeview { union DebugInfo; } } namespace lld { +class Timer; + namespace coff { class OutputSection; class SectionChunk; class SymbolTable; void createPDB(SymbolTable *symtab, llvm::ArrayRef outputSections, llvm::ArrayRef sectionTable, llvm::codeview::DebugInfo *buildId); llvm::Optional> getFileLineCodeView(const SectionChunk *c, uint32_t addr); + +extern Timer loadGHashTimer; +extern Timer mergeGHashTimer; + } // namespace coff } // namespace lld #endif diff --git a/lld/COFF/TypeMerger.h b/lld/COFF/TypeMerger.h index d3184a7f18d7..be877cfda6e6 100644 --- a/lld/COFF/TypeMerger.h +++ b/lld/COFF/TypeMerger.h @@ -1,61 +1,63 @@ //===- TypeMerger.h ---------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_TYPEMERGER_H #define LLD_COFF_TYPEMERGER_H #include "Config.h" -#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/Support/Allocator.h" +#include namespace lld { namespace coff { +using llvm::codeview::GloballyHashedType; +using llvm::codeview::TypeIndex; + +struct GHashState; + class TypeMerger { public: - TypeMerger(llvm::BumpPtrAllocator &alloc) - : typeTable(alloc), idTable(alloc), globalTypeTable(alloc), - globalIDTable(alloc) {} + TypeMerger(llvm::BumpPtrAllocator &alloc); + + ~TypeMerger(); /// Get the type table or the global type table if /DEBUG:GHASH is enabled. inline llvm::codeview::TypeCollection &getTypeTable() { - if (config->debugGHashes) - return globalTypeTable; + assert(!config->debugGHashes); return typeTable; } /// Get the ID table or the global ID table if /DEBUG:GHASH is enabled. inline llvm::codeview::TypeCollection &getIDTable() { - if (config->debugGHashes) - return globalIDTable; + assert(!config->debugGHashes); return idTable; } + /// Use global hashes to eliminate duplicate types and identify unique type + /// indices in each TpiSource. + void mergeTypesWithGHash(); + /// Type records that will go into the PDB TPI stream. llvm::codeview::MergingTypeTableBuilder typeTable; /// Item records that will go into the PDB IPI stream. llvm::codeview::MergingTypeTableBuilder idTable; - /// Type records that will go into the PDB TPI stream (for /DEBUG:GHASH) - llvm::codeview::GlobalTypeTableBuilder globalTypeTable; - - /// Item records that will go into the PDB IPI stream (for /DEBUG:GHASH) - llvm::codeview::GlobalTypeTableBuilder globalIDTable; - // When showSummary is enabled, these are histograms of TPI and IPI records // keyed by type index. SmallVector tpiCounts; SmallVector ipiCounts; }; } // namespace coff } // namespace lld #endif diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h index 4ffc564e67e2..79a5940823bd 100644 --- a/lld/include/lld/Common/ErrorHandler.h +++ b/lld/include/lld/Common/ErrorHandler.h @@ -1,177 +1,184 @@ //===- ErrorHandler.h -------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // We designed lld's error handlers with the following goals in mind: // // - Errors can occur at any place where we handle user input, but we don't // want them to affect the normal execution path too much. Ideally, // handling errors should be as simple as reporting them and exit (but // without actually doing exit). // // In particular, the design to wrap all functions that could fail with // ErrorOr is rejected because otherwise we would have to wrap a large // number of functions in lld with ErrorOr. With that approach, if some // function F can fail, not only F but all functions that transitively call // F have to be wrapped with ErrorOr. That seemed too much. // // - Finding only one error at a time is not sufficient. We want to find as // many errors as possible with one execution of the linker. That means the // linker needs to keep running after a first error and give up at some // checkpoint (beyond which it would find cascading, false errors caused by // the previous errors). // // - We want a simple interface to report errors. Unlike Clang, the data we // handle is compiled binary, so we don't need an error reporting mechanism // that's as sophisticated as the one that Clang has. // // The current lld's error handling mechanism is simple: // // - When you find an error, report it using error() and continue as far as // you can. An internal error counter is incremented by one every time you // call error(). // // A common idiom to handle an error is calling error() and then returning // a reasonable default value. For example, if your function handles a // user-supplied alignment value, and if you find an invalid alignment // (e.g. 17 which is not 2^n), you may report it using error() and continue // as if it were alignment 1 (which is the simplest reasonable value). // // Note that you should not continue with an invalid value; that breaks the // internal consistency. You need to maintain all variables have some sane // value even after an error occurred. So, when you have to continue with // some value, always use a dummy value. // // - Find a reasonable checkpoint at where you want to stop the linker, and // add code to return from the function if errorCount() > 0. In most cases, // a checkpoint already exists, so you don't need to do anything for this. // // This interface satisfies all the goals that we mentioned above. // // You should never call fatal() except for reporting a corrupted input file. // fatal() immediately terminates the linker, so the function is not desirable // if you are using lld as a subroutine in other program, and with that you // can find only one error at a time. // // warn() doesn't do anything but printing out a given message. // // It is not recommended to use llvm::outs() or lld::errs() directly in lld // because they are not thread-safe. The functions declared in this file are // thread-safe. // //===----------------------------------------------------------------------===// #ifndef LLD_COMMON_ERRORHANDLER_H #define LLD_COMMON_ERRORHANDLER_H #include "lld/Common/LLVM.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileOutputBuffer.h" namespace llvm { class DiagnosticInfo; class raw_ostream; } namespace lld { // We wrap stdout and stderr so that you can pass alternative stdout/stderr as // arguments to lld::*::link() functions. extern llvm::raw_ostream *stdoutOS; extern llvm::raw_ostream *stderrOS; llvm::raw_ostream &outs(); llvm::raw_ostream &errs(); class ErrorHandler { public: uint64_t errorCount = 0; uint64_t errorLimit = 20; StringRef errorLimitExceededMsg = "too many errors emitted, stopping now"; StringRef logName = "lld"; bool exitEarly = true; bool fatalWarnings = false; bool verbose = false; bool vsDiagnostics = false; bool disableOutput = false; std::function cleanupCallback; void error(const Twine &msg); LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &msg); void log(const Twine &msg); void message(const Twine &msg); void warn(const Twine &msg); void reset() { if (cleanupCallback) cleanupCallback(); *this = ErrorHandler(); } std::unique_ptr outputBuffer; private: using Colors = raw_ostream::Colors; std::string getLocation(const Twine &msg); }; /// Returns the default error handler. ErrorHandler &errorHandler(); inline void error(const Twine &msg) { errorHandler().error(msg); } inline LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &msg) { errorHandler().fatal(msg); } inline void log(const Twine &msg) { errorHandler().log(msg); } inline void message(const Twine &msg) { errorHandler().message(msg); } inline void warn(const Twine &msg) { errorHandler().warn(msg); } inline uint64_t errorCount() { return errorHandler().errorCount; } LLVM_ATTRIBUTE_NORETURN void exitLld(int val); void diagnosticHandler(const llvm::DiagnosticInfo &di); void checkError(Error e); // check functions are convenient functions to strip errors // from error-or-value objects. template T check(ErrorOr e) { if (auto ec = e.getError()) fatal(ec.message()); return std::move(*e); } template T check(Expected e) { if (!e) fatal(llvm::toString(e.takeError())); return std::move(*e); } +// Don't move from Expected wrappers around references. +template T &check(Expected e) { + if (!e) + fatal(llvm::toString(e.takeError())); + return *e; +} + template T check2(ErrorOr e, llvm::function_ref prefix) { if (auto ec = e.getError()) fatal(prefix() + ": " + ec.message()); return std::move(*e); } template T check2(Expected e, llvm::function_ref prefix) { if (!e) fatal(prefix() + ": " + toString(e.takeError())); return std::move(*e); } inline std::string toString(const Twine &s) { return s.str(); } // To evaluate the second argument lazily, we use C macro. #define CHECK(E, S) check2((E), [&] { return toString(S); }) } // namespace lld #endif diff --git a/lld/test/COFF/pdb-global-hashes.test b/lld/test/COFF/pdb-global-hashes.test index 13039d42fe26..430275b7a884 100644 --- a/lld/test/COFF/pdb-global-hashes.test +++ b/lld/test/COFF/pdb-global-hashes.test @@ -1,93 +1,93 @@ RUN: yaml2obj %p/Inputs/pdb-hashes-1.yaml -o %t.1.obj RUN: yaml2obj %p/Inputs/pdb-hashes-2.yaml -o %t.2.obj RUN: yaml2obj %p/Inputs/pdb-hashes-2-missing.yaml -o %t.2.missing.obj RUN: lld-link /debug %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.nohash.pdb -RUN: lld-link /debug:ghash %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.hash.pdb +RUN: lld-link /debug:ghash -verbose %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.hash.pdb RUN: lld-link /debug:ghash %t.1.obj %t.2.missing.obj /entry:main /nodefaultlib /PDB:%t.mixed.pdb RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.nohash.pdb | FileCheck %s RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.hash.pdb | FileCheck %s RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.mixed.pdb | FileCheck %s ; These object files were generated via the following inputs and commands: ; ---------------------------------------------- ; // obj.h ; namespace NS { ; struct Foo { ; explicit Foo(int x) : X(x) {} ; int X; ; }; ; ; int func(const Foo &f); ; } ; ---------------------------------------------- ; // obj1.cpp ; #include "obj.h" ; ; int main(int argc, char **argv) { ; NS::Foo f(argc); ; return NS::func(f); ; } ; ---------------------------------------------- ; // obj2.cpp ; #include "obj.h" ; ; int NS::func(const Foo &f) { ; return 2 * f.X; ; } ; ---------------------------------------------- ; $ clang-cl /Z7 /GS- obj1.cpp /c /o obj1.obj ; $ clang-cl /Z7 /GS- obj2.cpp /c /o obj2.obj CHECK: Types (TPI Stream) CHECK-NEXT: ============================================================ CHECK-NEXT: Showing 13 records CHECK-NEXT: 0x1000 | LF_POINTER [size = 12] CHECK-NEXT: referent = 0x0470 (char*), mode = pointer, opts = None, kind = ptr32 CHECK-NEXT: 0x1001 | LF_ARGLIST [size = 16] CHECK-NEXT: 0x0074 (int): `int` CHECK-NEXT: 0x1000: `char**` CHECK-NEXT: 0x1002 | LF_PROCEDURE [size = 16] CHECK-NEXT: return type = 0x0074 (int), # args = 2, param list = 0x1001 CHECK-NEXT: calling conv = cdecl, options = None CHECK-NEXT: 0x1003 | LF_STRUCTURE [size = 44] `NS::Foo` CHECK-NEXT: unique name: `.?AUFoo@NS@@` CHECK-NEXT: vtable: , base list: , field list: CHECK-NEXT: options: forward ref | has unique name CHECK-NEXT: 0x1004 | LF_POINTER [size = 12] CHECK-NEXT: referent = 0x1003, mode = pointer, opts = None, kind = ptr32 CHECK-NEXT: 0x1005 | LF_ARGLIST [size = 12] CHECK-NEXT: 0x0074 (int): `int` CHECK-NEXT: 0x1006 | LF_MFUNCTION [size = 28] CHECK-NEXT: return type = 0x0003 (void), # args = 1, param list = 0x1005 CHECK-NEXT: class type = 0x1003, this type = 0x1004, this adjust = 0 CHECK-NEXT: calling conv = thiscall, options = None CHECK-NEXT: 0x1007 | LF_FIELDLIST [size = 28] CHECK-NEXT: - LF_MEMBER [name = `X`, Type = 0x0074 (int), offset = 0, attrs = public] CHECK-NEXT: - LF_ONEMETHOD [name = `Foo`] CHECK-NEXT: type = 0x1006, vftable offset = -1, attrs = public CHECK-NEXT: 0x1008 | LF_STRUCTURE [size = 44] `NS::Foo` CHECK-NEXT: unique name: `.?AUFoo@NS@@` CHECK-NEXT: vtable: , base list: , field list: 0x1007 CHECK-NEXT: options: has unique name CHECK-NEXT: 0x1009 | LF_MODIFIER [size = 12] CHECK-NEXT: referent = 0x1003, modifiers = const CHECK-NEXT: 0x100A | LF_POINTER [size = 12] CHECK-NEXT: referent = 0x1009, mode = ref, opts = None, kind = ptr32 CHECK-NEXT: 0x100B | LF_ARGLIST [size = 12] CHECK-NEXT: 0x100A: `const NS::Foo&` CHECK-NEXT: 0x100C | LF_PROCEDURE [size = 16] CHECK-NEXT: return type = 0x0074 (int), # args = 1, param list = 0x100B CHECK-NEXT: calling conv = cdecl, options = None CHECK: Types (IPI Stream) CHECK-NEXT: ============================================================ CHECK-NEXT: Showing 6 records CHECK-NEXT: 0x1000 | LF_FUNC_ID [size = 20] CHECK-NEXT: name = main, type = 0x1002, parent scope = CHECK-NEXT: 0x1001 | LF_STRING_ID [size = {{.*}}] ID: , String: {{.*}}obj.h CHECK-NEXT: 0x1002 | LF_UDT_SRC_LINE [size = 16] CHECK-NEXT: udt = 0x1008, file = 4097, line = 2 CHECK-NEXT: 0x1003 | LF_MFUNC_ID [size = 16] CHECK-NEXT: name = Foo, type = 0x1006, class type = 0x1003 CHECK-NEXT: 0x1004 | LF_STRING_ID [size = 12] ID: , String: NS CHECK-NEXT: 0x1005 | LF_FUNC_ID [size = 20] CHECK-NEXT: name = func, type = 0x100C, parent scope = 0x1004 diff --git a/lld/test/COFF/pdb-procid-remapping.test b/lld/test/COFF/pdb-procid-remapping.test index d7ea775be98e..adc93585f2aa 100644 --- a/lld/test/COFF/pdb-procid-remapping.test +++ b/lld/test/COFF/pdb-procid-remapping.test @@ -1,31 +1,35 @@ -# RUN: yaml2obj %p/Inputs/pdb1.yaml -o %t1.obj -# RUN: yaml2obj %p/Inputs/pdb2.yaml -o %t2.obj +# RUN: yaml2obj < %p/Inputs/pdb1.yaml > %t1.obj +# RUN: yaml2obj < %p/Inputs/pdb2.yaml > %t2.obj + # RUN: lld-link /debug /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \ # RUN: %t1.obj %t2.obj +# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s +# RUN: lld-link /debug /debug:ghash /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \ +# RUN: %t1.obj %t2.obj # RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s CHECK: Symbols CHECK-NEXT: ============================================================ CHECK-LABEL: Mod 0000 | CHECK: 92 | S_GPROC32 [size = 44] `main` CHECK-NEXT: parent = 0, end = 168, addr = 0001:0000, code size = 14 CHECK-NEXT: type = `0x1004 (int ())`, debug start = 4, debug end = 9, flags = none CHECK-NEXT: 136 | S_FRAMEPROC [size = 32] CHECK-NEXT: size = 40, padding size = 0, offset to padding = 0 CHECK-NEXT: bytes of callee saved registers = 0, exception handler addr = 0000:0000 CHECK-NEXT: local fp reg = NONE, param fp reg = NONE CHECK-NEXT: flags = has async eh | opt speed CHECK-NEXT: 168 | S_END [size = 4] CHECK-LABEL: Mod 0001 | CHECK: 92 | S_GPROC32 [size = 44] `foo` CHECK-NEXT: parent = 0, end = 168, addr = 0001:0016, code size = 6 CHECK-NEXT: type = `0x1001 (int ())`, debug start = 0, debug end = 5, flags = none CHECK-NEXT: 136 | S_FRAMEPROC [size = 32] CHECK-NEXT: size = 0, padding size = 0, offset to padding = 0 CHECK-NEXT: bytes of callee saved registers = 0, exception handler addr = 0000:0000 CHECK-NEXT: local fp reg = NONE, param fp reg = NONE CHECK-NEXT: flags = has async eh | opt speed CHECK-NEXT: 168 | S_END [size = 4] CHECK-LABEL: Mod 0002 | CHECK: 4 | S_OBJNAME [size = 20] sig=0, `* Linker *` diff --git a/lld/test/COFF/pdb-type-server-missing.yaml b/lld/test/COFF/pdb-type-server-missing.yaml index 1a8c9a05c3d9..78ddc0e4adb2 100644 --- a/lld/test/COFF/pdb-type-server-missing.yaml +++ b/lld/test/COFF/pdb-type-server-missing.yaml @@ -1,144 +1,145 @@ # This is an object compiled with /Zi (see the LF_TYPESERVER2 record) without an # adjacent type server PDB. Test that LLD fails gracefully on it. # Also try linking another OBJ with a reference to the same PDB # RUN: yaml2obj %s -o %t1.obj # RUN: yaml2obj %p/Inputs/pdb-type-server-missing-2.yaml -o %t2.obj # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=WARN +# RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug:ghash -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=WARN # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /ignore:4099 2>&1 | FileCheck %s -check-prefix=IGNORE -allow-empty # RUN: not lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /WX 2>&1 | FileCheck %s -check-prefix=ERR # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /ignore:4099 /WX 2>&1 | FileCheck %s -check-prefix=IGNORE-ERR -allow-empty # WARN: warning: Cannot use debug info for '{{.*}}.obj' [LNK4099] # WARN-NEXT: {{N|n}}o such file or directory # IGNORE-NOT: warning: Cannot use debug info for '{{.*}}.obj' [LNK4099] # IGNORE-NOT: {{N|n}}o such file or directory # ERR: error: Cannot use debug info for '{{.*}}.obj' [LNK4099] # ERR-NEXT: {{N|n}}o such file or directory # IGNORE-ERR-NOT: error: Cannot use debug info for '{{.*}}.obj' [LNK4099] # IGNORE-ERR-NOT: {{N|n}}o such file or directory --- !COFF header: Machine: IMAGE_FILE_MACHINE_AMD64 Characteristics: [ ] sections: - Name: '.debug$S' Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] Alignment: 1 Subsections: - !Symbols Records: - Kind: S_GPROC32_ID ProcSym: CodeSize: 3 DbgStart: 0 DbgEnd: 2 FunctionType: 4199 Flags: [ ] DisplayName: main - Kind: S_FRAMEPROC FrameProcSym: TotalFrameBytes: 0 PaddingFrameBytes: 0 OffsetToPadding: 0 BytesOfCalleeSavedRegisters: 0 OffsetOfExceptionHandler: 0 SectionIdOfExceptionHandler: 0 Flags: [ AsynchronousExceptionHandling, OptimizedForSpeed ] - Kind: S_PROC_ID_END ScopeEndSym: - !Lines CodeSize: 3 Flags: [ ] RelocOffset: 0 RelocSegment: 0 Blocks: - FileName: 'c:\src\llvm-project\build\t.c' Lines: - Offset: 0 LineStart: 1 IsStatement: true EndDelta: 0 Columns: - !FileChecksums Checksums: - FileName: 'c:\src\llvm-project\build\t.c' Kind: MD5 Checksum: 270A878DCC1B845655B162F56C4F5020 - !StringTable Strings: - 'c:\src\llvm-project\build\t.c' Relocations: - VirtualAddress: 44 SymbolName: main Type: IMAGE_REL_AMD64_SECREL - VirtualAddress: 48 SymbolName: main Type: IMAGE_REL_AMD64_SECTION - VirtualAddress: 100 SymbolName: main Type: IMAGE_REL_AMD64_SECREL - VirtualAddress: 104 SymbolName: main Type: IMAGE_REL_AMD64_SECTION - Name: '.debug$T' Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] Alignment: 1 Types: - Kind: LF_TYPESERVER2 TypeServer2: Guid: '{01DF191B-22BF-6B42-96CE-5258B8329FE5}' Age: 18 Name: 'C:\src\llvm-project\build\definitely_not_found_for_sure.pdb' - Name: '.text$mn' Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] Alignment: 16 SectionData: 33C0C3 symbols: - Name: '.debug$S' Value: 0 SectionNumber: 1 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC SectionDefinition: Length: 328 NumberOfRelocations: 4 NumberOfLinenumbers: 0 CheckSum: 0 Number: 0 - Name: '.debug$T' Value: 0 SectionNumber: 2 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC SectionDefinition: Length: 564 NumberOfRelocations: 0 NumberOfLinenumbers: 0 CheckSum: 0 Number: 0 - Name: '.text$mn' Value: 0 SectionNumber: 3 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC SectionDefinition: Length: 3 NumberOfRelocations: 0 NumberOfLinenumbers: 0 CheckSum: 4021952397 Number: 0 - Name: main Value: 0 SectionNumber: 3 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_FUNCTION StorageClass: IMAGE_SYM_CLASS_EXTERNAL ... diff --git a/lld/test/COFF/pdb-type-server-simple.test b/lld/test/COFF/pdb-type-server-simple.test index bcba6da28b69..b954712d9b6c 100644 --- a/lld/test/COFF/pdb-type-server-simple.test +++ b/lld/test/COFF/pdb-type-server-simple.test @@ -1,120 +1,125 @@ Replicate this scenario: $ cat a.c struct Foo { int x; }; int g(struct Foo *p); int main() { struct Foo f = {42}; return g(&f); } $ cat b.c struct Foo { int x; }; int g(struct Foo *p) { return p->x; } $ cl -c a.c b.c -Zi -Fdts.pdb $ lld-link a.obj b.obj -debug -entry:main -nodefaultlib -out:t.exe RUN: rm -rf %t && mkdir -p %t && cd %t RUN: yaml2obj %S/Inputs/pdb-type-server-simple-a.yaml -o a.obj RUN: yaml2obj %S/Inputs/pdb-type-server-simple-b.yaml -o b.obj RUN: llvm-pdbutil yaml2pdb %S/Inputs/pdb-type-server-simple-ts.yaml -pdb ts.pdb -RUN: lld-link a.obj b.obj -entry:main -debug -out:t.exe -pdb:t.pdb -nodefaultlib /summary | FileCheck %s -check-prefix SUMMARY +RUN: lld-link a.obj b.obj -entry:main -debug -out:t.exe -pdb:t.pdb -nodefaultlib -summary | FileCheck %s -check-prefix SUMMARY +RUN: llvm-pdbutil dump -symbols -types -ids -globals %t/t.pdb | FileCheck %s + +Re-run with /DEBUG:GHASH +RUN: lld-link a.obj b.obj -entry:main -debug:ghash -out:t.exe -pdb:t.pdb -nodefaultlib -summary -verbose RUN: llvm-pdbutil dump -symbols -types -ids -globals %t/t.pdb | FileCheck %s CHECK-LABEL: Types (TPI Stream) CHECK: ============================================================ CHECK: [[FOO_DECL:[^ ]*]] | LF_STRUCTURE [size = 36] `Foo` CHECK: [[FOO_PTR:[^ ]*]] | LF_POINTER [size = 12] CHECK-NEXT: referent = [[FOO_DECL]] CHECK: [[G_ARGS:[^ ]*]] | LF_ARGLIST [size = 12] CHECK-NEXT: [[FOO_PTR]]: `Foo*` CHECK: [[G_PROTO:[^ ]*]] | LF_PROCEDURE [size = 16] CHECK-NEXT: return type = 0x0074 (int), # args = 1, param list = [[G_ARGS]] CHECK-NEXT: calling conv = cdecl, options = None CHECK: [[FOO_COMPLETE:[^ ]*]] | LF_STRUCTURE [size = 36] `Foo` CHECK-NEXT: unique name: `.?AUFoo@@` CHECK-NEXT: vtable: , base list: , field list: 0x{{.*}} CHECK: options: has unique name CHECK: [[MAIN_PROTO:[^ ]*]] | LF_PROCEDURE [size = 16] CHECK: return type = 0x0074 (int), # args = 0, param list = 0x{{.*}} CHECK: calling conv = cdecl, options = None CHECK-LABEL: Types (IPI Stream) CHECK: ============================================================ CHECK: [[MAIN_ID:[^ ]*]] | LF_FUNC_ID [size = 20] CHECK: name = main, type = [[MAIN_PROTO]], parent scope = CHECK: [[G_ID:[^ ]*]] | LF_FUNC_ID [size = 16] CHECK: name = g, type = [[G_PROTO]], parent scope = CHECK: [[A_BUILD:[^ ]*]] | LF_BUILDINFO [size = 28] CHECK: {{.*}}: `a.c` CHECK: [[B_BUILD:[^ ]*]] | LF_BUILDINFO [size = 28] CHECK: {{.*}}: `b.c` CHECK-LABEL: Global Symbols CHECK: ============================================================ CHECK-NEXT: Records CHECK-NEXT: 36 | S_PROCREF [size = 20] `main` CHECK-NEXT: module = 1, sum name = 0, offset = 104 CHECK-NEXT: 68 | S_PROCREF [size = 16] `g` CHECK-NEXT: module = 2, sum name = 0, offset = 104 CHECK-NEXT: 56 | S_UDT [size = 12] `Foo` CHECK-NEXT: original type = 0x1006 CHECK-LABEL: Symbols CHECK: ============================================================ CHECK-LABEL: Mod 0000 | `{{.*}}a.obj`: CHECK: 4 | S_OBJNAME [size = 40] sig=0, `C:\src\llvm-project\build\a.obj` CHECK: 104 | S_GPROC32 [size = 44] `main` CHECK: parent = 0, end = 196, addr = 0001:0000, code size = 27 CHECK: type = {{.*}}, debug start = 4, debug end = 22, flags = none CHECK: 200 | S_BUILDINFO [size = 8] BuildId = `[[A_BUILD]]` CHECK-LABEL: Mod 0001 | `{{.*}}b.obj`: CHECK: 4 | S_OBJNAME [size = 40] sig=0, `C:\src\llvm-project\build\b.obj` CHECK: 44 | S_COMPILE3 [size = 60] CHECK: machine = intel x86-x64, Ver = Microsoft (R) Optimizing Compiler, language = c CHECK: frontend = 19.0.24215.1, backend = 19.0.24215.1 CHECK: flags = security checks | hot patchable CHECK: 104 | S_GPROC32 [size = 44] `g` CHECK: parent = 0, end = 196, addr = 0001:0032, code size = 13 CHECK: type = {{.*}}, debug start = 5, debug end = 12, flags = none CHECK: 148 | S_FRAMEPROC [size = 32] CHECK: size = 0, padding size = 0, offset to padding = 0 CHECK: bytes of callee saved registers = 0, exception handler addr = 0000:0000 CHECK: flags = has async eh | opt speed CHECK: 180 | S_REGREL32 [size = 16] `p` CHECK: type = [[FOO_PTR]] (Foo*), register = RSP, offset = 8 CHECK: 196 | S_END [size = 4] CHECK: 200 | S_BUILDINFO [size = 8] BuildId = `[[B_BUILD]]` CHECK-LABEL: Mod 0002 | `* Linker *`: SUMMARY: Summary SUMMARY-NEXT: -------------------------------------------------------------------------------- SUMMARY-NEXT: 2 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 1 PDB type server dependencies SUMMARY-NEXT: 0 Precomp OBJ dependencies -SUMMARY-NEXT: 25 Merged TPI records +SUMMARY-NEXT: 9 Merged TPI records +SUMMARY-NEXT: 16 Merged IPI records SUMMARY-NEXT: 3 Output PDB strings SUMMARY-NEXT: 4 Global symbol records SUMMARY-NEXT: 14 Module symbol records SUMMARY-NEXT: 2 Public symbol records SUMMARY: Top 10 types responsible for the most TPI input: SUMMARY-NEXT: index total bytes count size SUMMARY-NEXT: 0x1006: 36 = 1 * 36 SUMMARY: Run llvm-pdbutil to print details about a particular record: SUMMARY-NEXT: llvm-pdbutil dump -types -type-index 0x1006 t.pdb SUMMARY: Top 10 types responsible for the most IPI input: SUMMARY-NEXT: index total bytes count size SUMMARY-NEXT: 0x1006: 256 = 1 * 256 SUMMARY: Run llvm-pdbutil to print details about a particular record: SUMMARY-NEXT: llvm-pdbutil dump -ids -id-index 0x1006 t.pdb diff --git a/lld/test/COFF/precomp-link.test b/lld/test/COFF/precomp-link.test index b0692ee8002f..161ee88d27f5 100644 --- a/lld/test/COFF/precomp-link.test +++ b/lld/test/COFF/precomp-link.test @@ -1,87 +1,95 @@ RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf /summary | FileCheck %s -check-prefix SUMMARY RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s RUN: lld-link %S/Inputs/precomp.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE +RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug:ghash /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE FIXME: The following RUN line should fail, regardless of whether debug info is enabled or not. Normally this would result in an error due to missing _PchSym_ references, but SymbolTable.cpp suppresses such errors. MSVC seems to have a special case for those symbols and it emits the LNK2011 error. RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE-MISSING-PRECOMPOBJ FAILURE: warning: Cannot use debug info for '{{.*}}precomp-invalid.obj' [LNK4099] FAILURE-NEXT: failed to load reference '{{.*}}precomp.obj': No matching precompiled header could be located. FAILURE-MISSING-PRECOMPOBJ: warning: Cannot use debug info for '{{.*}}precomp-a.obj' [LNK4099] FAILURE-MISSING-PRECOMPOBJ-NEXT: failed to load reference '{{.*}}precomp.obj': No matching precompiled header could be located. Check that a PCH object file with a missing S_OBJNAME record results in an error. Edit out this record from the yaml-ified object: - Kind: S_OBJNAME ObjNameSym: Signature: 545589255 ObjectName: 'F:\svn\lld\test\COFF\precomp\precomp.obj' RUN: obj2yaml %S/Inputs/precomp.obj | grep -v 'SectionData: *04000000' > %t.precomp.yaml RUN: sed '/S_OBJNAME/,/ObjectName:/d' < %t.precomp.yaml > precomp-no-objname.yaml RUN: sed 's/Signature: *545589255/Signature: 0/' < %t.precomp.yaml > precomp-zero-sig.yaml RUN: yaml2obj precomp-no-objname.yaml -o %t.precomp-no-objname.obj RUN: yaml2obj precomp-zero-sig.yaml -o %t.precomp-zero-sig.obj RUN: not lld-link %t.precomp-no-objname.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe 2>&1 | FileCheck %s -check-prefix FAILURE-NO-SIGNATURE RUN: not lld-link %t.precomp-zero-sig.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe 2>&1 | FileCheck %s -check-prefix FAILURE-NO-SIGNATURE FAILURE-NO-SIGNATURE: error: {{.*}}.obj claims to be a PCH object, but does not have a valid signature Check that two PCH objs with duplicate signatures are an error. RUN: cp %S/Inputs/precomp.obj %t.precomp-dup.obj RUN: not lld-link %S/Inputs/precomp.obj %t.precomp-dup.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe 2>&1 | FileCheck %s -check-prefix FAILURE-DUP-SIGNATURE FAILURE-DUP-SIGNATURE: error: a PCH object with the same signature has already been provided ({{.*precomp.obj and .*precomp-dup.obj.*}}) CHECK: Types (TPI Stream) CHECK-NOT: LF_PRECOMP CHECK-NOT: LF_ENDPRECOMP +Re-run with ghash. Eventually, perhaps this will be the default. + +RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /debug:ghash /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf /summary | FileCheck %s -check-prefix SUMMARY +RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s + + SUMMARY: Summary SUMMARY-NEXT: -------------------------------------------------------------------------------- SUMMARY-NEXT: 3 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 0 PDB type server dependencies SUMMARY-NEXT: 1 Precomp OBJ dependencies -SUMMARY-NEXT: 1044 Merged TPI records +SUMMARY-NEXT: 874 Merged TPI records +SUMMARY-NEXT: 170 Merged IPI records SUMMARY-NEXT: 5 Output PDB strings SUMMARY-NEXT: 167 Global symbol records SUMMARY-NEXT: 20 Module symbol records SUMMARY-NEXT: 3 Public symbol records // precomp.h #pragma once int Function(char A); // precomp.cpp #include "precomp.h" // a.cpp #include "precomp.h" int main(void) { Function('a'); return 0; } // b.cpp #include "precomp.h" int Function(char a) { return (int)a; } // cl.exe precomp.cpp /Z7 /Ycprecomp.h /c // cl.exe a.cpp b.cpp /Z7 /Yuprecomp.h /c diff --git a/lld/test/COFF/s_udt.s b/lld/test/COFF/s_udt.s index 63e409970957..373394334b19 100644 --- a/lld/test/COFF/s_udt.s +++ b/lld/test/COFF/s_udt.s @@ -1,476 +1,478 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-windows-msvc < %s > %t.obj # RUN: lld-link /DEBUG:FULL /nodefaultlib /entry:main %t.obj /PDB:%t.pdb /OUT:%t.exe # RUN: llvm-pdbutil dump -types -globals -symbols -modi=0 %t.pdb | FileCheck %s +# RUN: lld-link /DEBUG:FULL /debug:ghash /nodefaultlib /entry:main %t.obj /PDB:%t.pdb /OUT:%t.exe +# RUN: llvm-pdbutil dump -types -globals -symbols -modi=0 %t.pdb | FileCheck %s # CHECK: Types (TPI Stream) # CHECK-NEXT: ============================================================ # CHECK: 0x1003 | LF_STRUCTURE [size = 44] `Struct` # CHECK-NEXT: unique name: `.?AUStruct@@` # CHECK-NEXT: vtable: , base list: , field list: # CHECK-NEXT: options: forward ref (-> 0x1006) | has unique name, sizeof 0 # CHECK-NEXT: 0x1004 | LF_POINTER [size = 12] # CHECK-NEXT: referent = 0x1003, mode = pointer, opts = None, kind = ptr64 # CHECK: 0x1006 | LF_STRUCTURE [size = 44] `Struct` # CHECK-NEXT: unique name: `.?AUStruct@@` # CHECK-NEXT: vtable: , base list: , field list: 0x1005 # CHECK-NEXT: options: has unique name, sizeof 4 # CHECK: Global Symbols # CHECK-NEXT: ============================================================ # CHECK: {{.*}} | S_UDT [size = 24] `StructTypedef` # CHECK: original type = 0x1003 # CHECK: {{.*}} | S_UDT [size = 16] `Struct` # CHECK: original type = 0x1006 # CHECK: {{.*}} | S_UDT [size = 20] `IntTypedef` # CHECK: original type = 0x0074 (int) # CHECK: Symbols # CHECK-NEXT: ============================================================ # CHECK: {{.*}} | S_GPROC32 [size = 44] `main` # CHECK-NEXT: parent = 0, end = 252, addr = 0001:0000, code size = 52 # CHECK-NEXT: type = `0x1002 (int (int, char**))`, debug start = 0, debug end = 0, flags = none # CHECK-NOT: {{.*}} | S_END # CHECK: {{.*}} | S_UDT [size = 28] `main::LocalTypedef` # CHECK-NEXT: original type = 0x1004 # CHECK: {{.*}} | S_END [size = 4] # source code to re-generate: # clang-cl /Z7 /GS- /GR- /c foo.cpp # # struct Struct { # int x; # }; # # using IntTypedef = int; # using StructTypedef = Struct; # Struct S; # StructTypedef SS; # IntTypedef I; # # int main(int argc, char **argv) { # using LocalTypedef = Struct*; # LocalTypedef SPtr; # return I + S.x + SS.x + SPtr->x; # } .text .def @feat.00; .scl 3; .type 0; .endef .globl @feat.00 .set @feat.00, 0 .intel_syntax noprefix .def main; .scl 2; .type 32; .endef .globl main # -- Begin function main .p2align 4, 0x90 main: # @main .Lfunc_begin0: .cv_func_id 0 .cv_file 1 "D:\\src\\llvmbuild\\cl\\Debug\\x64\\foo.cpp" "2B62298EE3EEF94E1D81FDFE18BD46A6" 1 .cv_loc 0 1 12 0 # foo.cpp:12:0 .seh_proc main # %bb.0: # %entry sub rsp, 32 .seh_stackalloc 32 .seh_endprologue mov dword ptr [rsp + 28], 0 mov qword ptr [rsp + 16], rdx mov dword ptr [rsp + 12], ecx .Ltmp0: .cv_loc 0 1 15 0 # foo.cpp:15:0 mov ecx, dword ptr [rip + "?I@@3HA"] add ecx, dword ptr [rip + "?S@@3UStruct@@A"] add ecx, dword ptr [rip + "?SS@@3UStruct@@A"] mov rdx, qword ptr [rsp] add ecx, dword ptr [rdx] mov eax, ecx add rsp, 32 ret .Ltmp1: .Lfunc_end0: .seh_handlerdata .text .seh_endproc # -- End function .bss .globl "?S@@3UStruct@@A" # @"?S@@3UStruct@@A" .p2align 2 "?S@@3UStruct@@A": .zero 4 .globl "?SS@@3UStruct@@A" # @"?SS@@3UStruct@@A" .p2align 2 "?SS@@3UStruct@@A": .zero 4 .globl "?I@@3HA" # @"?I@@3HA" .p2align 2 "?I@@3HA": .long 0 # 0x0 .section .drectve,"yn" .ascii " /DEFAULTLIB:libcmt.lib" .ascii " /DEFAULTLIB:oldnames.lib" .section .debug$S,"dr" .p2align 2 .long 4 # Debug section magic .long 241 .long .Ltmp3-.Ltmp2 # Subsection size .Ltmp2: .short .Ltmp5-.Ltmp4 # Record length .Ltmp4: .short 4412 # Record kind: S_COMPILE3 .long 1 # Flags and language .short 208 # CPUType .short 8 # Frontend version .short 0 .short 0 .short 0 .short 8000 # Backend version .short 0 .short 0 .short 0 .asciz "clang version 8.0.0 " # Null-terminated compiler version string .Ltmp5: .Ltmp3: .p2align 2 .long 241 # Symbol subsection for main .long .Ltmp7-.Ltmp6 # Subsection size .Ltmp6: .short .Ltmp9-.Ltmp8 # Record length .Ltmp8: .short 4423 # Record kind: S_GPROC32_ID .long 0 # PtrParent .long 0 # PtrEnd .long 0 # PtrNext .long .Lfunc_end0-main # Code size .long 0 # Offset after prologue .long 0 # Offset before epilogue .long 4099 # Function type index .secrel32 main # Function section relative address .secidx main # Function section index .byte 0 # Flags .asciz "main" # Function name .Ltmp9: .short .Ltmp11-.Ltmp10 # Record length .Ltmp10: .short 4114 # Record kind: S_FRAMEPROC .long 32 # FrameSize .long 0 # Padding .long 0 # Offset of padding .long 0 # Bytes of callee saved registers .long 0 # Exception handler offset .short 0 # Exception handler section .long 81920 # Flags (defines frame register) .Ltmp11: .short .Ltmp13-.Ltmp12 # Record length .Ltmp12: .short 4414 # Record kind: S_LOCAL .long 116 # TypeIndex .short 1 # Flags .asciz "argc" .Ltmp13: .cv_def_range .Ltmp0 .Ltmp1, frame_ptr_rel, 12 .short .Ltmp15-.Ltmp14 # Record length .Ltmp14: .short 4414 # Record kind: S_LOCAL .long 4096 # TypeIndex .short 1 # Flags .asciz "argv" .Ltmp15: .cv_def_range .Ltmp0 .Ltmp1, frame_ptr_rel, 16 .short .Ltmp17-.Ltmp16 # Record length .Ltmp16: .short 4414 # Record kind: S_LOCAL .long 4101 # TypeIndex .short 0 # Flags .asciz "SPtr" .Ltmp17: .cv_def_range .Ltmp0 .Ltmp1, frame_ptr_rel, 0 .short .Ltmp19-.Ltmp18 # Record length .Ltmp18: .short 4360 # Record kind: S_UDT .long 4101 # Type .asciz "main::LocalTypedef" .Ltmp19: .short 2 # Record length .short 4431 # Record kind: S_PROC_ID_END .Ltmp7: .p2align 2 .cv_linetable 0, main, .Lfunc_end0 .long 241 # Symbol subsection for globals .long .Ltmp21-.Ltmp20 # Subsection size .Ltmp20: .short .Ltmp23-.Ltmp22 # Record length .Ltmp22: .short 4365 # Record kind: S_GDATA32 .long 4103 # Type .secrel32 "?S@@3UStruct@@A" # DataOffset .secidx "?S@@3UStruct@@A" # Segment .asciz "S" # Name .Ltmp23: .short .Ltmp25-.Ltmp24 # Record length .Ltmp24: .short 4365 # Record kind: S_GDATA32 .long 4100 # Type .secrel32 "?SS@@3UStruct@@A" # DataOffset .secidx "?SS@@3UStruct@@A" # Segment .asciz "SS" # Name .Ltmp25: .short .Ltmp27-.Ltmp26 # Record length .Ltmp26: .short 4365 # Record kind: S_GDATA32 .long 116 # Type .secrel32 "?I@@3HA" # DataOffset .secidx "?I@@3HA" # Segment .asciz "I" # Name .Ltmp27: .Ltmp21: .p2align 2 .long 241 .long .Ltmp29-.Ltmp28 # Subsection size .Ltmp28: .short .Ltmp31-.Ltmp30 # Record length .Ltmp30: .short 4360 # Record kind: S_UDT .long 4103 # Type .asciz "Struct" .Ltmp31: .short .Ltmp33-.Ltmp32 # Record length .Ltmp32: .short 4360 # Record kind: S_UDT .long 4100 # Type .asciz "StructTypedef" .Ltmp33: .short .Ltmp35-.Ltmp34 # Record length .Ltmp34: .short 4360 # Record kind: S_UDT .long 116 # Type .asciz "IntTypedef" .Ltmp35: .Ltmp29: .p2align 2 .cv_filechecksums # File index to string table offset subsection .cv_stringtable # String table .long 241 .long .Ltmp37-.Ltmp36 # Subsection size .Ltmp36: .short 6 # Record length .short 4428 # Record kind: S_BUILDINFO .long 4108 # LF_BUILDINFO index .Ltmp37: .p2align 2 .section .debug$T,"dr" .p2align 2 .long 4 # Debug section magic # Pointer (0x1000) { # TypeLeafKind: LF_POINTER (0x1002) # PointeeType: char* (0x670) # PtrType: Near64 (0xC) # PtrMode: Pointer (0x0) # IsFlat: 0 # IsConst: 0 # IsVolatile: 0 # IsUnaligned: 0 # IsRestrict: 0 # IsThisPtr&: 0 # IsThisPtr&&: 0 # SizeOf: 8 # } .byte 0x0a, 0x00, 0x02, 0x10 .byte 0x70, 0x06, 0x00, 0x00 .byte 0x0c, 0x00, 0x01, 0x00 # ArgList (0x1001) { # TypeLeafKind: LF_ARGLIST (0x1201) # NumArgs: 2 # Arguments [ # ArgType: int (0x74) # ArgType: char** (0x1000) # ] # } .byte 0x0e, 0x00, 0x01, 0x12 .byte 0x02, 0x00, 0x00, 0x00 .byte 0x74, 0x00, 0x00, 0x00 .byte 0x00, 0x10, 0x00, 0x00 # Procedure (0x1002) { # TypeLeafKind: LF_PROCEDURE (0x1008) # ReturnType: int (0x74) # CallingConvention: NearC (0x0) # FunctionOptions [ (0x0) # ] # NumParameters: 2 # ArgListType: (int, char**) (0x1001) # } .byte 0x0e, 0x00, 0x08, 0x10 .byte 0x74, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x02, 0x00 .byte 0x01, 0x10, 0x00, 0x00 # FuncId (0x1003) { # TypeLeafKind: LF_FUNC_ID (0x1601) # ParentScope: 0x0 # FunctionType: int (int, char**) (0x1002) # Name: main # } .byte 0x12, 0x00, 0x01, 0x16 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x02, 0x10, 0x00, 0x00 .byte 0x6d, 0x61, 0x69, 0x6e .byte 0x00, 0xf3, 0xf2, 0xf1 # Struct (0x1004) { # TypeLeafKind: LF_STRUCTURE (0x1505) # MemberCount: 0 # Properties [ (0x280) # ForwardReference (0x80) # HasUniqueName (0x200) # ] # FieldList: 0x0 # DerivedFrom: 0x0 # VShape: 0x0 # SizeOf: 0 # Name: Struct # LinkageName: .?AUStruct@@ # } .byte 0x2a, 0x00, 0x05, 0x15 .byte 0x00, 0x00, 0x80, 0x02 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x53, 0x74 .byte 0x72, 0x75, 0x63, 0x74 .byte 0x00, 0x2e, 0x3f, 0x41 .byte 0x55, 0x53, 0x74, 0x72 .byte 0x75, 0x63, 0x74, 0x40 .byte 0x40, 0x00, 0xf2, 0xf1 # Pointer (0x1005) { # TypeLeafKind: LF_POINTER (0x1002) # PointeeType: Struct (0x1004) # PtrType: Near64 (0xC) # PtrMode: Pointer (0x0) # IsFlat: 0 # IsConst: 0 # IsVolatile: 0 # IsUnaligned: 0 # IsRestrict: 0 # IsThisPtr&: 0 # IsThisPtr&&: 0 # SizeOf: 8 # } .byte 0x0a, 0x00, 0x02, 0x10 .byte 0x04, 0x10, 0x00, 0x00 .byte 0x0c, 0x00, 0x01, 0x00 # FieldList (0x1006) { # TypeLeafKind: LF_FIELDLIST (0x1203) # DataMember { # TypeLeafKind: LF_MEMBER (0x150D) # AccessSpecifier: Public (0x3) # Type: int (0x74) # FieldOffset: 0x0 # Name: x # } # } .byte 0x0e, 0x00, 0x03, 0x12 .byte 0x0d, 0x15, 0x03, 0x00 .byte 0x74, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x78, 0x00 # Struct (0x1007) { # TypeLeafKind: LF_STRUCTURE (0x1505) # MemberCount: 1 # Properties [ (0x200) # HasUniqueName (0x200) # ] # FieldList: (0x1006) # DerivedFrom: 0x0 # VShape: 0x0 # SizeOf: 4 # Name: Struct # LinkageName: .?AUStruct@@ # } .byte 0x2a, 0x00, 0x05, 0x15 .byte 0x01, 0x00, 0x00, 0x02 .byte 0x06, 0x10, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x04, 0x00, 0x53, 0x74 .byte 0x72, 0x75, 0x63, 0x74 .byte 0x00, 0x2e, 0x3f, 0x41 .byte 0x55, 0x53, 0x74, 0x72 .byte 0x75, 0x63, 0x74, 0x40 .byte 0x40, 0x00, 0xf2, 0xf1 # StringId (0x1008) { # TypeLeafKind: LF_STRING_ID (0x1605) # Id: 0x0 # StringData: D:\src\llvmbuild\cl\Debug\x64\foo.cpp # } .byte 0x2e, 0x00, 0x05, 0x16 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x44, 0x3a, 0x5c, 0x73 .byte 0x72, 0x63, 0x5c, 0x6c .byte 0x6c, 0x76, 0x6d, 0x62 .byte 0x75, 0x69, 0x6c, 0x64 .byte 0x5c, 0x63, 0x6c, 0x5c .byte 0x44, 0x65, 0x62, 0x75 .byte 0x67, 0x5c, 0x78, 0x36 .byte 0x34, 0x5c, 0x66, 0x6f .byte 0x6f, 0x2e, 0x63, 0x70 .byte 0x70, 0x00, 0xf2, 0xf1 # UdtSourceLine (0x1009) { # TypeLeafKind: LF_UDT_SRC_LINE (0x1606) # UDT: Struct (0x1007) # SourceFile: D:\src\llvmbuild\cl\Debug\x64\foo.cpp (0x1008) # LineNumber: 1 # } .byte 0x0e, 0x00, 0x06, 0x16 .byte 0x07, 0x10, 0x00, 0x00 .byte 0x08, 0x10, 0x00, 0x00 .byte 0x01, 0x00, 0x00, 0x00 # StringId (0x100A) { # TypeLeafKind: LF_STRING_ID (0x1605) # Id: 0x0 # StringData: D:\\src\\llvmbuild\\cl\\Debug\\x64 # } .byte 0x2a, 0x00, 0x05, 0x16 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x44, 0x3a, 0x5c, 0x5c .byte 0x73, 0x72, 0x63, 0x5c .byte 0x5c, 0x6c, 0x6c, 0x76 .byte 0x6d, 0x62, 0x75, 0x69 .byte 0x6c, 0x64, 0x5c, 0x5c .byte 0x63, 0x6c, 0x5c, 0x5c .byte 0x44, 0x65, 0x62, 0x75 .byte 0x67, 0x5c, 0x5c, 0x78 .byte 0x36, 0x34, 0x00, 0xf1 # StringId (0x100B) { # TypeLeafKind: LF_STRING_ID (0x1605) # Id: 0x0 # StringData: foo.cpp # } .byte 0x0e, 0x00, 0x05, 0x16 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x66, 0x6f, 0x6f, 0x2e .byte 0x63, 0x70, 0x70, 0x00 # BuildInfo (0x100C) { # TypeLeafKind: LF_BUILDINFO (0x1603) # NumArgs: 5 # Arguments [ # ArgType: D:\\src\\llvmbuild\\cl\\Debug\\x64 (0x100A) # ArgType: 0x0 # ArgType: foo.cpp (0x100B) # ArgType: 0x0 # ArgType: 0x0 # ] # } .byte 0x1a, 0x00, 0x03, 0x16 .byte 0x05, 0x00, 0x0a, 0x10 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x0b, 0x10 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0xf2, 0xf1 .addrsig .addrsig_sym "?S@@3UStruct@@A" .addrsig_sym "?SS@@3UStruct@@A" .addrsig_sym "?I@@3HA" diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h index b0a16cccbff3..e6ade770457c 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -1,233 +1,243 @@ //===- TypeHashing.h ---------------------------------------------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Hashing.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/FormatProviders.h" #include namespace llvm { namespace codeview { /// A locally hashed type represents a straightforward hash code of a serialized /// record. The record is simply serialized, and then the bytes are hashed by /// a standard algorithm. This is sufficient for the case of de-duplicating /// records within a single sequence of types, because if two records both have /// a back-reference to the same type in the same stream, they will both have /// the same numeric value for the TypeIndex of the back reference. struct LocallyHashedType { hash_code Hash; ArrayRef RecordData; /// Given a type, compute its local hash. static LocallyHashedType hashType(ArrayRef RecordData); /// Given a sequence of types, compute all of the local hashes. template static std::vector hashTypes(Range &&Records) { std::vector Hashes; Hashes.reserve(std::distance(std::begin(Records), std::end(Records))); for (const auto &R : Records) Hashes.push_back(hashType(R)); return Hashes; } static std::vector hashTypeCollection(TypeCollection &Types) { std::vector Hashes; Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { Hashes.push_back(hashType(Type.RecordData)); }); return Hashes; } }; enum class GlobalTypeHashAlg : uint16_t { SHA1 = 0, // standard 20-byte SHA1 hash SHA1_8 // last 8-bytes of standard SHA1 hash }; /// A globally hashed type represents a hash value that is sufficient to /// uniquely identify a record across multiple type streams or type sequences. /// This works by, for any given record A which references B, replacing the /// TypeIndex that refers to B with a previously-computed global hash for B. As /// this is a recursive algorithm (e.g. the global hash of B also depends on the /// global hashes of the types that B refers to), a global hash can uniquely /// identify identify that A occurs in another stream that has a completely /// different graph structure. Although the hash itself is slower to compute, /// probing is much faster with a globally hashed type, because the hash itself /// is considered "as good as" the original type. Since type records can be /// quite large, this makes the equality comparison of the hash much faster than /// equality comparison of a full record. struct GloballyHashedType { GloballyHashedType() = default; GloballyHashedType(StringRef H) : GloballyHashedType(ArrayRef(H.bytes_begin(), H.bytes_end())) {} GloballyHashedType(ArrayRef H) { assert(H.size() == 8); ::memcpy(Hash.data(), H.data(), 8); } std::array Hash; bool empty() const { return *(const uint64_t*)Hash.data() == 0; } + friend inline bool operator==(const GloballyHashedType &L, + const GloballyHashedType &R) { + return L.Hash == R.Hash; + } + + friend inline bool operator!=(const GloballyHashedType &L, + const GloballyHashedType &R) { + return !(L.Hash == R.Hash); + } + /// Given a sequence of bytes representing a record, compute a global hash for /// this record. Due to the nature of global hashes incorporating the hashes /// of referenced records, this function requires a list of types and ids /// that RecordData might reference, indexable by TypeIndex. static GloballyHashedType hashType(ArrayRef RecordData, ArrayRef PreviousTypes, ArrayRef PreviousIds); /// Given a sequence of bytes representing a record, compute a global hash for /// this record. Due to the nature of global hashes incorporating the hashes /// of referenced records, this function requires a list of types and ids /// that RecordData might reference, indexable by TypeIndex. static GloballyHashedType hashType(CVType Type, ArrayRef PreviousTypes, ArrayRef PreviousIds) { return hashType(Type.RecordData, PreviousTypes, PreviousIds); } /// Given a sequence of combined type and ID records, compute global hashes /// for each of them, returning the results in a vector of hashed types. template static std::vector hashTypes(Range &&Records) { std::vector Hashes; bool UnresolvedRecords = false; for (const auto &R : Records) { GloballyHashedType H = hashType(R, Hashes, Hashes); if (H.empty()) UnresolvedRecords = true; Hashes.push_back(H); } // In some rare cases, there might be records with forward references in the // stream. Several passes might be needed to fully hash each record in the // Type stream. However this occurs on very small OBJs generated by MASM, // with a dozen records at most. Therefore this codepath isn't // time-critical, as it isn't taken in 99% of cases. while (UnresolvedRecords) { UnresolvedRecords = false; auto HashIt = Hashes.begin(); for (const auto &R : Records) { if (HashIt->empty()) { GloballyHashedType H = hashType(R, Hashes, Hashes); if (H.empty()) UnresolvedRecords = true; else *HashIt = H; } ++HashIt; } } return Hashes; } /// Given a sequence of combined type and ID records, compute global hashes /// for each of them, returning the results in a vector of hashed types. template static std::vector hashIds(Range &&Records, ArrayRef TypeHashes) { std::vector IdHashes; for (const auto &R : Records) IdHashes.push_back(hashType(R, TypeHashes, IdHashes)); return IdHashes; } static std::vector hashTypeCollection(TypeCollection &Types) { std::vector Hashes; Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes)); }); return Hashes; } }; #if defined(_MSC_VER) // is_trivially_copyable is not available in older versions of libc++, but it is // available in all supported versions of MSVC, so at least this gives us some // coverage. static_assert(std::is_trivially_copyable::value, "GloballyHashedType must be trivially copyable so that we can " "reinterpret_cast arrays of hash data to arrays of " "GloballyHashedType"); #endif } // namespace codeview template <> struct DenseMapInfo { static codeview::LocallyHashedType Empty; static codeview::LocallyHashedType Tombstone; static codeview::LocallyHashedType getEmptyKey() { return Empty; } static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; } static unsigned getHashValue(codeview::LocallyHashedType Val) { return Val.Hash; } static bool isEqual(codeview::LocallyHashedType LHS, codeview::LocallyHashedType RHS) { if (LHS.Hash != RHS.Hash) return false; return LHS.RecordData == RHS.RecordData; } }; template <> struct DenseMapInfo { static codeview::GloballyHashedType Empty; static codeview::GloballyHashedType Tombstone; static codeview::GloballyHashedType getEmptyKey() { return Empty; } static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; } static unsigned getHashValue(codeview::GloballyHashedType Val) { return *reinterpret_cast(Val.Hash.data()); } static bool isEqual(codeview::GloballyHashedType LHS, codeview::GloballyHashedType RHS) { - return LHS.Hash == RHS.Hash; + return LHS == RHS; } }; template <> struct format_provider { public: static void format(const codeview::LocallyHashedType &V, llvm::raw_ostream &Stream, StringRef Style) { write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8); } }; template <> struct format_provider { public: static void format(const codeview::GloballyHashedType &V, llvm::raw_ostream &Stream, StringRef Style) { for (uint8_t B : V.Hash) { write_hex(Stream, B, HexPrintStyle::Upper, 2); } } }; } // namespace llvm #endif diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h index b9e2562bfc2b..bdc6cf46509b 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -1,299 +1,308 @@ //===- TypeIndex.h ----------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H #define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Endian.h" #include #include #include namespace llvm { class ScopedPrinter; namespace codeview { class TypeCollection; enum class SimpleTypeKind : uint32_t { None = 0x0000, // uncharacterized type (no type) Void = 0x0003, // void NotTranslated = 0x0007, // type not translated by cvpack HResult = 0x0008, // OLE/COM HRESULT SignedCharacter = 0x0010, // 8 bit signed UnsignedCharacter = 0x0020, // 8 bit unsigned NarrowCharacter = 0x0070, // really a char WideCharacter = 0x0071, // wide char Character16 = 0x007a, // char16_t Character32 = 0x007b, // char32_t SByte = 0x0068, // 8 bit signed int Byte = 0x0069, // 8 bit unsigned int Int16Short = 0x0011, // 16 bit signed UInt16Short = 0x0021, // 16 bit unsigned Int16 = 0x0072, // 16 bit signed int UInt16 = 0x0073, // 16 bit unsigned int Int32Long = 0x0012, // 32 bit signed UInt32Long = 0x0022, // 32 bit unsigned Int32 = 0x0074, // 32 bit signed int UInt32 = 0x0075, // 32 bit unsigned int Int64Quad = 0x0013, // 64 bit signed UInt64Quad = 0x0023, // 64 bit unsigned Int64 = 0x0076, // 64 bit signed int UInt64 = 0x0077, // 64 bit unsigned int Int128Oct = 0x0014, // 128 bit signed int UInt128Oct = 0x0024, // 128 bit unsigned int Int128 = 0x0078, // 128 bit signed int UInt128 = 0x0079, // 128 bit unsigned int Float16 = 0x0046, // 16 bit real Float32 = 0x0040, // 32 bit real Float32PartialPrecision = 0x0045, // 32 bit PP real Float48 = 0x0044, // 48 bit real Float64 = 0x0041, // 64 bit real Float80 = 0x0042, // 80 bit real Float128 = 0x0043, // 128 bit real Complex16 = 0x0056, // 16 bit complex Complex32 = 0x0050, // 32 bit complex Complex32PartialPrecision = 0x0055, // 32 bit PP complex Complex48 = 0x0054, // 48 bit complex Complex64 = 0x0051, // 64 bit complex Complex80 = 0x0052, // 80 bit complex Complex128 = 0x0053, // 128 bit complex Boolean8 = 0x0030, // 8 bit boolean Boolean16 = 0x0031, // 16 bit boolean Boolean32 = 0x0032, // 32 bit boolean Boolean64 = 0x0033, // 64 bit boolean Boolean128 = 0x0034, // 128 bit boolean }; enum class SimpleTypeMode : uint32_t { Direct = 0x00000000, // Not a pointer NearPointer = 0x00000100, // Near pointer FarPointer = 0x00000200, // Far pointer HugePointer = 0x00000300, // Huge pointer NearPointer32 = 0x00000400, // 32 bit near pointer FarPointer32 = 0x00000500, // 32 bit far pointer NearPointer64 = 0x00000600, // 64 bit near pointer NearPointer128 = 0x00000700 // 128 bit near pointer }; /// A 32-bit type reference. Types are indexed by their order of appearance in /// .debug$T plus 0x1000. Type indices less than 0x1000 are "simple" types, /// composed of a SimpleTypeMode byte followed by a SimpleTypeKind byte. class TypeIndex { public: static const uint32_t FirstNonSimpleIndex = 0x1000; static const uint32_t SimpleKindMask = 0x000000ff; static const uint32_t SimpleModeMask = 0x00000700; static const uint32_t DecoratedItemIdMask = 0x80000000; public: TypeIndex() : Index(static_cast(SimpleTypeKind::None)) {} explicit TypeIndex(uint32_t Index) : Index(Index) {} explicit TypeIndex(SimpleTypeKind Kind) : Index(static_cast(Kind)) {} TypeIndex(SimpleTypeKind Kind, SimpleTypeMode Mode) : Index(static_cast(Kind) | static_cast(Mode)) {} uint32_t getIndex() const { return Index; } void setIndex(uint32_t I) { Index = I; } bool isSimple() const { return Index < FirstNonSimpleIndex; } bool isDecoratedItemId() const { return !!(Index & DecoratedItemIdMask); } bool isNoneType() const { return *this == None(); } uint32_t toArrayIndex() const { assert(!isSimple()); - return getIndex() - FirstNonSimpleIndex; + return (getIndex() & ~DecoratedItemIdMask) - FirstNonSimpleIndex; } static TypeIndex fromArrayIndex(uint32_t Index) { return TypeIndex(Index + FirstNonSimpleIndex); } + static TypeIndex fromDecoratedArrayIndex(bool IsItem, uint32_t Index) { + return TypeIndex((Index + FirstNonSimpleIndex) | + (IsItem ? DecoratedItemIdMask : 0)); + } + + TypeIndex removeDecoration() { + return TypeIndex(Index & ~DecoratedItemIdMask); + } + SimpleTypeKind getSimpleKind() const { assert(isSimple()); return static_cast(Index & SimpleKindMask); } SimpleTypeMode getSimpleMode() const { assert(isSimple()); return static_cast(Index & SimpleModeMask); } TypeIndex makeDirect() const { return TypeIndex{getSimpleKind()}; } static TypeIndex None() { return TypeIndex(SimpleTypeKind::None); } static TypeIndex Void() { return TypeIndex(SimpleTypeKind::Void); } static TypeIndex VoidPointer32() { return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer32); } static TypeIndex VoidPointer64() { return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer64); } static TypeIndex NullptrT() { // std::nullptr_t uses the pointer mode that doesn't indicate bit-width, // presumably because std::nullptr_t is intended to be compatible with any // pointer type. return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer); } static TypeIndex SignedCharacter() { return TypeIndex(SimpleTypeKind::SignedCharacter); } static TypeIndex UnsignedCharacter() { return TypeIndex(SimpleTypeKind::UnsignedCharacter); } static TypeIndex NarrowCharacter() { return TypeIndex(SimpleTypeKind::NarrowCharacter); } static TypeIndex WideCharacter() { return TypeIndex(SimpleTypeKind::WideCharacter); } static TypeIndex Int16Short() { return TypeIndex(SimpleTypeKind::Int16Short); } static TypeIndex UInt16Short() { return TypeIndex(SimpleTypeKind::UInt16Short); } static TypeIndex Int32() { return TypeIndex(SimpleTypeKind::Int32); } static TypeIndex UInt32() { return TypeIndex(SimpleTypeKind::UInt32); } static TypeIndex Int32Long() { return TypeIndex(SimpleTypeKind::Int32Long); } static TypeIndex UInt32Long() { return TypeIndex(SimpleTypeKind::UInt32Long); } static TypeIndex Int64() { return TypeIndex(SimpleTypeKind::Int64); } static TypeIndex UInt64() { return TypeIndex(SimpleTypeKind::UInt64); } static TypeIndex Int64Quad() { return TypeIndex(SimpleTypeKind::Int64Quad); } static TypeIndex UInt64Quad() { return TypeIndex(SimpleTypeKind::UInt64Quad); } static TypeIndex Float32() { return TypeIndex(SimpleTypeKind::Float32); } static TypeIndex Float64() { return TypeIndex(SimpleTypeKind::Float64); } TypeIndex &operator+=(unsigned N) { Index += N; return *this; } TypeIndex &operator++() { Index += 1; return *this; } TypeIndex operator++(int) { TypeIndex Copy = *this; operator++(); return Copy; } TypeIndex &operator-=(unsigned N) { assert(Index >= N); Index -= N; return *this; } TypeIndex &operator--() { Index -= 1; return *this; } TypeIndex operator--(int) { TypeIndex Copy = *this; operator--(); return Copy; } friend inline bool operator==(const TypeIndex &A, const TypeIndex &B) { return A.getIndex() == B.getIndex(); } friend inline bool operator!=(const TypeIndex &A, const TypeIndex &B) { return A.getIndex() != B.getIndex(); } friend inline bool operator<(const TypeIndex &A, const TypeIndex &B) { return A.getIndex() < B.getIndex(); } friend inline bool operator<=(const TypeIndex &A, const TypeIndex &B) { return A.getIndex() <= B.getIndex(); } friend inline bool operator>(const TypeIndex &A, const TypeIndex &B) { return A.getIndex() > B.getIndex(); } friend inline bool operator>=(const TypeIndex &A, const TypeIndex &B) { return A.getIndex() >= B.getIndex(); } friend inline TypeIndex operator+(const TypeIndex &A, uint32_t N) { TypeIndex Result(A); Result += N; return Result; } friend inline TypeIndex operator-(const TypeIndex &A, uint32_t N) { assert(A.getIndex() >= N); TypeIndex Result(A); Result -= N; return Result; } friend inline uint32_t operator-(const TypeIndex &A, const TypeIndex &B) { assert(A >= B); return A.toArrayIndex() - B.toArrayIndex(); } static StringRef simpleTypeName(TypeIndex TI); private: support::ulittle32_t Index; }; // Used for pseudo-indexing an array of type records. An array of such records // sorted by TypeIndex can allow log(N) lookups even though such a type record // stream does not provide random access. struct TypeIndexOffset { TypeIndex Type; support::ulittle32_t Offset; }; void printTypeIndex(ScopedPrinter &Printer, StringRef FieldName, TypeIndex TI, TypeCollection &Types); } template <> struct DenseMapInfo { static inline codeview::TypeIndex getEmptyKey() { return codeview::TypeIndex{DenseMapInfo::getEmptyKey()}; } static inline codeview::TypeIndex getTombstoneKey() { return codeview::TypeIndex{DenseMapInfo::getTombstoneKey()}; } static unsigned getHashValue(const codeview::TypeIndex &TI) { return DenseMapInfo::getHashValue(TI.getIndex()); } static bool isEqual(const codeview::TypeIndex &LHS, const codeview::TypeIndex &RHS) { return LHS == RHS; } }; } // namespace llvm #endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h index 72d98e9c2c4d..9ef2ee6a9307 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h @@ -1,89 +1,94 @@ //===- TpiStreamBuilder.h - PDB Tpi Stream Creation -------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAMBUILDER_H #define LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAMBUILDER_H #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" #include namespace llvm { class BinaryByteStream; class WritableBinaryStreamRef; template <> struct BinaryItemTraits { static size_t length(const codeview::CVType &Item) { return Item.length(); } static ArrayRef bytes(const codeview::CVType &Item) { return Item.data(); } }; namespace codeview { class TypeRecord; } namespace msf { class MSFBuilder; struct MSFLayout; } namespace pdb { class PDBFile; class TpiStream; struct TpiStreamHeader; class TpiStreamBuilder { public: explicit TpiStreamBuilder(msf::MSFBuilder &Msf, uint32_t StreamIdx); ~TpiStreamBuilder(); TpiStreamBuilder(const TpiStreamBuilder &) = delete; TpiStreamBuilder &operator=(const TpiStreamBuilder &) = delete; void setVersionHeader(PdbRaw_TpiVer Version); void addTypeRecord(ArrayRef Type, Optional Hash); + void addTypeRecords(ArrayRef Types, ArrayRef Sizes, + ArrayRef Hashes); Error finalizeMsfLayout(); - uint32_t getRecordCount() const { return TypeRecords.size(); } + uint32_t getRecordCount() const { return TypeRecordCount; } Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer); uint32_t calculateSerializedLength(); private: + void updateTypeIndexOffsets(ArrayRef Sizes); + uint32_t calculateHashBufferSize() const; uint32_t calculateIndexOffsetSize() const; Error finalize(); msf::MSFBuilder &Msf; BumpPtrAllocator &Allocator; + uint32_t TypeRecordCount = 0; size_t TypeRecordBytes = 0; PdbRaw_TpiVer VerHeader = PdbRaw_TpiVer::PdbTpiV80; - std::vector> TypeRecords; + std::vector> TypeRecBuffers; std::vector TypeHashes; std::vector TypeIndexOffsets; uint32_t HashStreamIndex = kInvalidStreamIndex; std::unique_ptr HashValueStream; const TpiStreamHeader *Header; uint32_t Idx; }; } } #endif diff --git a/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/llvm/lib/DebugInfo/CodeView/RecordName.cpp index 47b5498181b7..1ca899789bef 100644 --- a/llvm/lib/DebugInfo/CodeView/RecordName.cpp +++ b/llvm/lib/DebugInfo/CodeView/RecordName.cpp @@ -1,337 +1,339 @@ //===- RecordName.cpp ----------------------------------------- *- C++ --*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/RecordName.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/FormatVariadic.h" using namespace llvm; using namespace llvm::codeview; namespace { class TypeNameComputer : public TypeVisitorCallbacks { /// The type collection. Used to calculate names of nested types. TypeCollection &Types; TypeIndex CurrentTypeIndex = TypeIndex::None(); /// Name of the current type. Only valid before visitTypeEnd. SmallString<256> Name; public: explicit TypeNameComputer(TypeCollection &Types) : Types(Types) {} StringRef name() const { return Name; } /// Paired begin/end actions for all types. Receives all record data, /// including the fixed-length record prefix. Error visitTypeBegin(CVType &Record) override; Error visitTypeBegin(CVType &Record, TypeIndex Index) override; Error visitTypeEnd(CVType &Record) override; #define TYPE_RECORD(EnumName, EnumVal, Name) \ Error visitKnownRecord(CVType &CVR, Name##Record &Record) override; #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD(EnumName, EnumVal, Name) #include "llvm/DebugInfo/CodeView/CodeViewTypes.def" }; } // namespace Error TypeNameComputer::visitTypeBegin(CVType &Record) { llvm_unreachable("Must call visitTypeBegin with a TypeIndex!"); return Error::success(); } Error TypeNameComputer::visitTypeBegin(CVType &Record, TypeIndex Index) { // Reset Name to the empty string. If the visitor sets it, we know it. Name = ""; CurrentTypeIndex = Index; return Error::success(); } Error TypeNameComputer::visitTypeEnd(CVType &CVR) { return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, FieldListRecord &FieldList) { Name = ""; return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVRecord &CVR, StringIdRecord &String) { Name = String.getString(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArgListRecord &Args) { auto Indices = Args.getIndices(); uint32_t Size = Indices.size(); Name = "("; for (uint32_t I = 0; I < Size; ++I) { - assert(Indices[I] < CurrentTypeIndex); - - Name.append(Types.getTypeName(Indices[I])); + if (Indices[I] < CurrentTypeIndex) + Name.append(Types.getTypeName(Indices[I])); + else + Name.append(""); if (I + 1 != Size) Name.append(", "); } Name.push_back(')'); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, StringListRecord &Strings) { auto Indices = Strings.getIndices(); uint32_t Size = Indices.size(); Name = "\""; for (uint32_t I = 0; I < Size; ++I) { Name.append(Types.getTypeName(Indices[I])); if (I + 1 != Size) Name.append("\" \""); } Name.push_back('\"'); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, ClassRecord &Class) { Name = Class.getName(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, UnionRecord &Union) { Name = Union.getName(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, EnumRecord &Enum) { Name = Enum.getName(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArrayRecord &AT) { Name = AT.getName(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, VFTableRecord &VFT) { Name = VFT.getName(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, MemberFuncIdRecord &Id) { Name = Id.getName(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, ProcedureRecord &Proc) { StringRef Ret = Types.getTypeName(Proc.getReturnType()); StringRef Params = Types.getTypeName(Proc.getArgumentList()); Name = formatv("{0} {1}", Ret, Params).sstr<256>(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, MemberFunctionRecord &MF) { StringRef Ret = Types.getTypeName(MF.getReturnType()); StringRef Class = Types.getTypeName(MF.getClassType()); StringRef Params = Types.getTypeName(MF.getArgumentList()); Name = formatv("{0} {1}::{2}", Ret, Class, Params).sstr<256>(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) { Name = Func.getName(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) { Name = TS.getName(); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) { if (Ptr.isPointerToMember()) { const MemberPointerInfo &MI = Ptr.getMemberInfo(); StringRef Pointee = Types.getTypeName(Ptr.getReferentType()); StringRef Class = Types.getTypeName(MI.getContainingType()); Name = formatv("{0} {1}::*", Pointee, Class); } else { Name.append(Types.getTypeName(Ptr.getReferentType())); if (Ptr.getMode() == PointerMode::LValueReference) Name.append("&"); else if (Ptr.getMode() == PointerMode::RValueReference) Name.append("&&"); else if (Ptr.getMode() == PointerMode::Pointer) Name.append("*"); // Qualifiers in pointer records apply to the pointer, not the pointee, so // they go on the right. if (Ptr.isConst()) Name.append(" const"); if (Ptr.isVolatile()) Name.append(" volatile"); if (Ptr.isUnaligned()) Name.append(" __unaligned"); if (Ptr.isRestrict()) Name.append(" __restrict"); } return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, ModifierRecord &Mod) { uint16_t Mods = static_cast(Mod.getModifiers()); if (Mods & uint16_t(ModifierOptions::Const)) Name.append("const "); if (Mods & uint16_t(ModifierOptions::Volatile)) Name.append("volatile "); if (Mods & uint16_t(ModifierOptions::Unaligned)) Name.append("__unaligned "); Name.append(Types.getTypeName(Mod.getModifiedType())); return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, VFTableShapeRecord &Shape) { Name = formatv("", Shape.getEntryCount()); return Error::success(); } Error TypeNameComputer::visitKnownRecord( CVType &CVR, UdtModSourceLineRecord &ModSourceLine) { return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, UdtSourceLineRecord &SourceLine) { return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, BitFieldRecord &BF) { return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, MethodOverloadListRecord &Overloads) { return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, BuildInfoRecord &BI) { return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, LabelRecord &R) { return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, PrecompRecord &Precomp) { return Error::success(); } Error TypeNameComputer::visitKnownRecord(CVType &CVR, EndPrecompRecord &EndPrecomp) { return Error::success(); } std::string llvm::codeview::computeTypeName(TypeCollection &Types, TypeIndex Index) { TypeNameComputer Computer(Types); CVType Record = Types.getType(Index); if (auto EC = visitTypeRecord(Record, Index, Computer)) { consumeError(std::move(EC)); return ""; } return std::string(Computer.name()); } static int getSymbolNameOffset(CVSymbol Sym) { switch (Sym.kind()) { // See ProcSym case SymbolKind::S_GPROC32: case SymbolKind::S_LPROC32: case SymbolKind::S_GPROC32_ID: case SymbolKind::S_LPROC32_ID: case SymbolKind::S_LPROC32_DPC: case SymbolKind::S_LPROC32_DPC_ID: return 35; // See Thunk32Sym case SymbolKind::S_THUNK32: return 21; // See SectionSym case SymbolKind::S_SECTION: return 16; // See CoffGroupSym case SymbolKind::S_COFFGROUP: return 14; // See PublicSym32, FileStaticSym, RegRelativeSym, DataSym, ThreadLocalDataSym case SymbolKind::S_PUB32: case SymbolKind::S_FILESTATIC: case SymbolKind::S_REGREL32: case SymbolKind::S_GDATA32: case SymbolKind::S_LDATA32: case SymbolKind::S_LMANDATA: case SymbolKind::S_GMANDATA: case SymbolKind::S_LTHREAD32: case SymbolKind::S_GTHREAD32: case SymbolKind::S_PROCREF: case SymbolKind::S_LPROCREF: return 10; // See RegisterSym and LocalSym case SymbolKind::S_REGISTER: case SymbolKind::S_LOCAL: return 6; // See BlockSym case SymbolKind::S_BLOCK32: return 18; // See LabelSym case SymbolKind::S_LABEL32: return 7; // See ObjNameSym, ExportSym, and UDTSym case SymbolKind::S_OBJNAME: case SymbolKind::S_EXPORT: case SymbolKind::S_UDT: return 4; // See BPRelativeSym case SymbolKind::S_BPREL32: return 8; // See UsingNamespaceSym case SymbolKind::S_UNAMESPACE: return 0; default: return -1; } } StringRef llvm::codeview::getSymbolName(CVSymbol Sym) { if (Sym.kind() == SymbolKind::S_CONSTANT) { // S_CONSTANT is preceded by an APSInt, which has a variable length. So we // have to do a full deserialization. BinaryStreamReader Reader(Sym.content(), llvm::support::little); // The container doesn't matter for single records. SymbolRecordMapping Mapping(Reader, CodeViewContainer::ObjectFile); ConstantSym Const(SymbolKind::S_CONSTANT); cantFail(Mapping.visitSymbolBegin(Sym)); cantFail(Mapping.visitKnownRecord(Sym, Const)); cantFail(Mapping.visitSymbolEnd(Sym)); return Const.Name; } int Offset = getSymbolNameOffset(Sym); if (Offset == -1) return StringRef(); StringRef StringData = toStringRef(Sym.content()).drop_front(Offset); return StringData.split('\0').first; } diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 51a1f0a544e3..b5e7b03e6917 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -1,184 +1,214 @@ //===- TpiStreamBuilder.cpp - -------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include #include +#include using namespace llvm; using namespace llvm::msf; using namespace llvm::pdb; using namespace llvm::support; TpiStreamBuilder::TpiStreamBuilder(MSFBuilder &Msf, uint32_t StreamIdx) : Msf(Msf), Allocator(Msf.getAllocator()), Header(nullptr), Idx(StreamIdx) { } TpiStreamBuilder::~TpiStreamBuilder() = default; void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) { VerHeader = Version; } +void TpiStreamBuilder::updateTypeIndexOffsets(ArrayRef Sizes) { + // If we just crossed an 8KB threshold, add a type index offset. + for (uint16_t Size : Sizes) { + size_t NewSize = TypeRecordBytes + Size; + constexpr size_t EightKB = 8 * 1024; + if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecordCount == 0) { + TypeIndexOffsets.push_back( + {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex + + TypeRecordCount), + ulittle32_t(TypeRecordBytes)}); + } + ++TypeRecordCount; + TypeRecordBytes = NewSize; + } +} + void TpiStreamBuilder::addTypeRecord(ArrayRef Record, Optional Hash) { - // If we just crossed an 8KB threshold, add a type index offset. assert(((Record.size() & 3) == 0) && "The type record's size is not a multiple of 4 bytes which will " "cause misalignment in the output TPI stream!"); - size_t NewSize = TypeRecordBytes + Record.size(); - constexpr size_t EightKB = 8 * 1024; - if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) { - TypeIndexOffsets.push_back( - {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex + - TypeRecords.size()), - ulittle32_t(TypeRecordBytes)}); - } - TypeRecordBytes = NewSize; + assert(Record.size() <= codeview::MaxRecordLength); + uint16_t OneSize = (uint16_t)Record.size(); + updateTypeIndexOffsets(makeArrayRef(&OneSize, 1)); - TypeRecords.push_back(Record); + TypeRecBuffers.push_back(Record); + // FIXME: Require it. if (Hash) TypeHashes.push_back(*Hash); } +void TpiStreamBuilder::addTypeRecords(ArrayRef Types, + ArrayRef Sizes, + ArrayRef Hashes) { + // Ignore empty type buffers. There should be no hashes or sizes in this case. + if (Types.empty()) { + assert(Sizes.empty() && Hashes.empty()); + return; + } + + assert(((Types.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); + assert(Sizes.size() == Hashes.size() && "sizes and hashes should be in sync"); + assert(std::accumulate(Sizes.begin(), Sizes.end(), 0U) == Types.size() && + "sizes of type records should sum to the size of the types"); + updateTypeIndexOffsets(Sizes); + + TypeRecBuffers.push_back(Types); + TypeHashes.insert(TypeHashes.end(), Hashes.begin(), Hashes.end()); +} + Error TpiStreamBuilder::finalize() { if (Header) return Error::success(); TpiStreamHeader *H = Allocator.Allocate(); - uint32_t Count = TypeRecords.size(); - H->Version = VerHeader; H->HeaderSize = sizeof(TpiStreamHeader); H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex; - H->TypeIndexEnd = H->TypeIndexBegin + Count; + H->TypeIndexEnd = H->TypeIndexBegin + TypeRecordCount; H->TypeRecordBytes = TypeRecordBytes; H->HashStreamIndex = HashStreamIndex; H->HashAuxStreamIndex = kInvalidStreamIndex; H->HashKeySize = sizeof(ulittle32_t); H->NumHashBuckets = MaxTpiHashBuckets - 1; // Recall that hash values go into a completely different stream identified by // the `HashStreamIndex` field of the `TpiStreamHeader`. Therefore, the data // begins at offset 0 of this independent stream. H->HashValueBuffer.Off = 0; H->HashValueBuffer.Length = calculateHashBufferSize(); // We never write any adjustments into our PDBs, so this is usually some // offset with zero length. H->HashAdjBuffer.Off = H->HashValueBuffer.Off + H->HashValueBuffer.Length; H->HashAdjBuffer.Length = 0; H->IndexOffsetBuffer.Off = H->HashAdjBuffer.Off + H->HashAdjBuffer.Length; H->IndexOffsetBuffer.Length = calculateIndexOffsetSize(); Header = H; return Error::success(); } uint32_t TpiStreamBuilder::calculateSerializedLength() { return sizeof(TpiStreamHeader) + TypeRecordBytes; } uint32_t TpiStreamBuilder::calculateHashBufferSize() const { - assert((TypeRecords.size() == TypeHashes.size() || TypeHashes.empty()) && + assert((TypeRecordCount == TypeHashes.size() || TypeHashes.empty()) && "either all or no type records should have hashes"); return TypeHashes.size() * sizeof(ulittle32_t); } uint32_t TpiStreamBuilder::calculateIndexOffsetSize() const { return TypeIndexOffsets.size() * sizeof(codeview::TypeIndexOffset); } Error TpiStreamBuilder::finalizeMsfLayout() { uint32_t Length = calculateSerializedLength(); if (auto EC = Msf.setStreamSize(Idx, Length)) return EC; uint32_t HashStreamSize = calculateHashBufferSize() + calculateIndexOffsetSize(); if (HashStreamSize == 0) return Error::success(); auto ExpectedIndex = Msf.addStream(HashStreamSize); if (!ExpectedIndex) return ExpectedIndex.takeError(); HashStreamIndex = *ExpectedIndex; if (!TypeHashes.empty()) { ulittle32_t *H = Allocator.Allocate(TypeHashes.size()); MutableArrayRef HashBuffer(H, TypeHashes.size()); for (uint32_t I = 0; I < TypeHashes.size(); ++I) { HashBuffer[I] = TypeHashes[I] % (MaxTpiHashBuckets - 1); } ArrayRef Bytes( reinterpret_cast(HashBuffer.data()), calculateHashBufferSize()); HashValueStream = std::make_unique(Bytes, llvm::support::little); } return Error::success(); } Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer) { if (auto EC = finalize()) return EC; auto InfoS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, Idx, Allocator); BinaryStreamWriter Writer(*InfoS); if (auto EC = Writer.writeObject(*Header)) return EC; - for (auto Rec : TypeRecords) { + for (auto Rec : TypeRecBuffers) { assert(!Rec.empty() && "Attempting to write an empty type record shifts " "all offsets in the TPI stream!"); assert(((Rec.size() & 3) == 0) && "The type record's size is not a multiple of 4 bytes which will " "cause misalignment in the output TPI stream!"); if (auto EC = Writer.writeBytes(Rec)) return EC; } if (HashStreamIndex != kInvalidStreamIndex) { auto HVS = WritableMappedBlockStream::createIndexedStream( Layout, Buffer, HashStreamIndex, Allocator); BinaryStreamWriter HW(*HVS); if (HashValueStream) { if (auto EC = HW.writeStreamRef(*HashValueStream)) return EC; } for (auto &IndexOffset : TypeIndexOffsets) { if (auto EC = HW.writeObject(IndexOffset)) return EC; } } return Error::success(); }