Index: lld/COFF/PDB.h =================================================================== --- lld/COFF/PDB.h +++ lld/COFF/PDB.h @@ -28,7 +28,7 @@ void createPDB(SymbolTable *Symtab, llvm::ArrayRef OutputSections, llvm::ArrayRef SectionTable, - const llvm::codeview::DebugInfo &BuildId); + llvm::codeview::DebugInfo *BuildId); std::pair getFileLine(const SectionChunk *C, uint32_t Addr); Index: lld/COFF/PDB.cpp =================================================================== --- lld/COFF/PDB.cpp +++ lld/COFF/PDB.cpp @@ -93,7 +93,7 @@ } /// Emit the basic PDB structure: initial streams, headers, etc. - void initialize(const llvm::codeview::DebugInfo &BuildId); + void initialize(llvm::codeview::DebugInfo *BuildId); /// Add natvis files specified on the command line. void addNatvisFiles(); @@ -125,8 +125,8 @@ void addSections(ArrayRef OutputSections, ArrayRef SectionTable); - /// Write the PDB to disk. - void commit(); + /// Write the PDB to disk and return the Guid generated for it. + void commit(codeview::GUID *Guid); private: BumpPtrAllocator Alloc; @@ -331,8 +331,8 @@ return std::move(NS); } -Expected PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, - TypeServer2Record &TS) { +Expected +PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, TypeServer2Record &TS) { const GUID &TSId = TS.getGuid(); StringRef TSPath = TS.getName(); @@ -952,7 +952,8 @@ // subsections. auto NewChecksums = make_unique(PDBStrTab); for (FileChecksumEntry &FC : Checksums) { - SmallString<128> FileName = ExitOnErr(CVStrTab.getString(FC.FileNameOffset)); + SmallString<128> FileName = + ExitOnErr(CVStrTab.getString(FC.FileNameOffset)); if (!sys::path::is_absolute(FileName) && !Config->PDBSourcePath.empty()) { SmallString<128> AbsoluteFileName = Config->PDBSourcePath; @@ -1131,7 +1132,7 @@ void coff::createPDB(SymbolTable *Symtab, ArrayRef OutputSections, ArrayRef SectionTable, - const llvm::codeview::DebugInfo &BuildId) { + llvm::codeview::DebugInfo *BuildId) { ScopedTimer T1(TotalPdbLinkTimer); PDBLinker PDB(Symtab); @@ -1141,12 +1142,19 @@ PDB.addNatvisFiles(); ScopedTimer T2(DiskCommitTimer); - PDB.commit(); + codeview::GUID Guid; + PDB.commit(&Guid); + memcpy(&BuildId->PDB70.Signature, &Guid, 16); } -void PDBLinker::initialize(const llvm::codeview::DebugInfo &BuildId) { +void PDBLinker::initialize(llvm::codeview::DebugInfo *BuildId) { ExitOnErr(Builder.initialize(4096)); // 4096 is blocksize + BuildId->Signature.CVSignature = OMF::Signature::PDB70; + // Signature is set to a hash of the PDB contents when the PDB is done. + memset(BuildId->PDB70.Signature, 0, 16); + BuildId->PDB70.Age = 1; + // Create streams in MSF for predefined streams, namely // PDB, TPI, DBI and IPI. for (int I = 0; I < (int)pdb::kSpecialStreamCount; ++I) @@ -1154,15 +1162,12 @@ // Add an Info stream. auto &InfoBuilder = Builder.getInfoBuilder(); - GUID uuid; - memcpy(&uuid, &BuildId.PDB70.Signature, sizeof(uuid)); - InfoBuilder.setAge(BuildId.PDB70.Age); - InfoBuilder.setGuid(uuid); InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70); + InfoBuilder.setHashPDBContentsToGUID(true); // Add an empty DBI stream. pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder(); - DbiBuilder.setAge(BuildId.PDB70.Age); + DbiBuilder.setAge(BuildId->PDB70.Age); DbiBuilder.setVersionHeader(pdb::PdbDbiV70); DbiBuilder.setMachineType(Config->Machine); // Technically we are not link.exe 14.11, but there are known cases where @@ -1206,9 +1211,9 @@ DbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, SectionTable)); } -void PDBLinker::commit() { +void PDBLinker::commit(codeview::GUID *Guid) { // Write to a file. - ExitOnErr(Builder.commit(Config->PDBPath)); + ExitOnErr(Builder.commit(Config->PDBPath, Guid)); } static Expected Index: lld/COFF/Writer.cpp =================================================================== --- lld/COFF/Writer.cpp +++ lld/COFF/Writer.cpp @@ -209,7 +209,6 @@ DebugDirectoryChunk *DebugDirectory = nullptr; std::vector DebugRecords; CVDebugRecordChunk *BuildId = nullptr; - Optional PreviousBuildId; ArrayRef SectionTable; uint64_t FileSize; @@ -285,67 +284,6 @@ } // namespace coff } // namespace lld -// PDBs are matched against executables using a build id which consists of three -// components: -// 1. A 16-bit GUID -// 2. An age -// 3. A time stamp. -// -// Debuggers and symbol servers match executables against debug info by checking -// each of these components of the EXE/DLL against the corresponding value in -// the PDB and failing a match if any of the components differ. In the case of -// symbol servers, symbols are cached in a folder that is a function of the -// GUID. As a result, in order to avoid symbol cache pollution where every -// incremental build copies a new PDB to the symbol cache, we must try to re-use -// the existing GUID if one exists, but bump the age. This way the match will -// fail, so the symbol cache knows to use the new PDB, but the GUID matches, so -// it overwrites the existing item in the symbol cache rather than making a new -// one. -static Optional loadExistingBuildId(StringRef Path) { - // We don't need to incrementally update a previous build id if we're not - // writing codeview debug info. - if (!Config->Debug) - return None; - - auto ExpectedBinary = llvm::object::createBinary(Path); - if (!ExpectedBinary) { - consumeError(ExpectedBinary.takeError()); - return None; - } - - auto Binary = std::move(*ExpectedBinary); - if (!Binary.getBinary()->isCOFF()) - return None; - - std::error_code EC; - COFFObjectFile File(Binary.getBinary()->getMemoryBufferRef(), EC); - if (EC) - return None; - - // If the machine of the binary we're outputting doesn't match the machine - // of the existing binary, don't try to re-use the build id. - if (File.is64() != Config->is64() || File.getMachine() != Config->Machine) - return None; - - for (const auto &DebugDir : File.debug_directories()) { - if (DebugDir.Type != IMAGE_DEBUG_TYPE_CODEVIEW) - continue; - - const codeview::DebugInfo *ExistingDI = nullptr; - StringRef PDBFileName; - if (auto EC = File.getDebugPDBInfo(ExistingDI, PDBFileName)) { - (void)EC; - return None; - } - // We only support writing PDBs in v70 format. So if this is not a build - // id that we recognize / support, ignore it. - if (ExistingDI->Signature.CVSignature != OMF::Signature::PDB70) - return None; - return *ExistingDI; - } - return None; -} - // The main function of the writer. void Writer::run() { ScopedTimer T1(CodeLayoutTimer); @@ -364,9 +302,6 @@ fatal("image size (" + Twine(FileSize) + ") " + "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")"); - // We must do this before opening the output file, as it depends on being able - // to read the contents of the existing output file. - PreviousBuildId = loadExistingBuildId(Config->OutputFile); openFile(Config->OutputFile); if (Config->is64()) { writeHeader(); @@ -375,14 +310,14 @@ } writeSections(); sortExceptionTable(); - writeBuildId(); T1.stop(); if (!Config->PDBPath.empty() && Config->Debug) { assert(BuildId); - createPDB(Symtab, OutputSections, SectionTable, *BuildId->BuildId); + createPDB(Symtab, OutputSections, SectionTable, BuildId->BuildId); } + writeBuildId(); writeMapFile(OutputSections); @@ -1225,25 +1160,10 @@ // timestamp as well as a Guid and Age of the PDB. // 2) In all cases, the PE COFF file header also contains a timestamp. // For reproducibility, instead of a timestamp we want to use a hash of the - // binary, however when building with debug info the hash needs to take into - // account the debug info, since it's possible to add blank lines to a file - // which causes the debug info to change but not the generated code. - // - // To handle this, we first set the Guid and Age in the debug directory (but - // only if we're doing a debug build). Then, we hash the binary (thus causing - // the hash to change if only the debug info changes, since the Age will be - // different). Finally, we write that hash into the debug directory (if - // present) as well as the COFF file header (always). + // PE contents. if (Config->Debug) { assert(BuildId && "BuildId is not set!"); - if (PreviousBuildId.hasValue()) { - *BuildId->BuildId = *PreviousBuildId; - BuildId->BuildId->PDB70.Age = BuildId->BuildId->PDB70.Age + 1; - } else { - BuildId->BuildId->Signature.CVSignature = OMF::Signature::PDB70; - BuildId->BuildId->PDB70.Age = 1; - llvm::getRandomBytes(BuildId->BuildId->PDB70.Signature, 16); - } + // BuildId->BuildId was filled in when the PDB was written. } // At this point the only fields in the COFF file which remain unset are the Index: lld/test/COFF/rsds.test =================================================================== --- lld/test/COFF/rsds.test +++ lld/test/COFF/rsds.test @@ -1,9 +1,9 @@ # RUN: yaml2obj %s > %t.obj # RUN: rm -f %t.dll %t.pdb -# RUN: lld-link /debug /pdbaltpath:test1.pdb /dll /out:%t.dll /entry:DllMain %t.obj +# RUN: lld-link /debug /pdbaltpath:test.pdb /dll /out:%t.dll /entry:DllMain %t.obj # RUN: llvm-readobj -coff-debug-directory %t.dll > %t.1.txt -# RUN: lld-link /debug /pdbaltpath:test2.pdb /dll /out:%t.dll /entry:DllMain %t.obj +# RUN: lld-link /debug /pdbaltpath:test.pdb /dll /out:%t.dll /entry:DllMain %t.obj # RUN: llvm-readobj -coff-debug-directory %t.dll > %t.2.txt # RUN: cat %t.1.txt %t.2.txt | FileCheck %s @@ -12,7 +12,7 @@ # RUN: llvm-readobj -coff-debug-directory %t.dll > %t.3.txt # RUN: lld-link /debug /pdb:%t2.pdb /dll /out:%t.dll /entry:DllMain %t.obj # RUN: llvm-readobj -coff-debug-directory %t.dll > %t.4.txt -# RUN: cat %t.3.txt %t.4.txt | FileCheck %s +# RUN: cat %t.3.txt %t.4.txt | FileCheck --check-prefix TWOPDBS %s # RUN: rm -f %t.dll %t.pdb # RUN: lld-link /Brepro /dll /out:%t.dll /entry:DllMain %t.obj @@ -37,7 +37,7 @@ # CHECK: PDBSignature: 0x53445352 # CHECK: PDBGUID: [[GUID:\(([A-Za-z0-9]{2} ?){16}\)]] # CHECK: PDBAge: 1 -# CHECK: PDBFileName: {{.*}}1.pdb +# CHECK: PDBFileName: {{.*}}.pdb # CHECK: } # CHECK: } # CHECK: ] @@ -55,12 +55,51 @@ # CHECK: PDBInfo { # CHECK: PDBSignature: 0x53445352 # CHECK: PDBGUID: [[GUID]] -# CHECK: PDBAge: 2 -# CHECK: PDBFileName: {{.*}}2.pdb +# CHECK: PDBAge: 1 +# CHECK: PDBFileName: {{.*}}.pdb # CHECK: } # CHECK: } # CHECK: ] +# TWOPDBS: File: [[FILE:.*]].dll +# TWOPDBS: DebugDirectory [ +# TWOPDBS: DebugEntry { +# TWOPDBS: Characteristics: 0x0 +# TWOPDBS: TimeDateStamp: +# TWOPDBS: MajorVersion: 0x0 +# TWOPDBS: MinorVersion: 0x0 +# TWOPDBS: Type: CodeView (0x2) +# TWOPDBS: SizeOfData: 0x{{[^0]}} +# TWOPDBS: AddressOfRawData: 0x{{[^0]}} +# TWOPDBS: PointerToRawData: 0x{{[^0]}} +# TWOPDBS: PDBInfo { +# TWOPDBS: PDBSignature: 0x53445352 +# TWOPDBS: PDBGUID: [[GUID:\(([A-Za-z0-9]{2} ?){16}\)]] +# TWOPDBS: PDBAge: 1 +# TWOPDBS: PDBFileName: {{.*}}.pdb +# TWOPDBS: } +# TWOPDBS: } +# TWOPDBS: ] +# TWOPDBS: File: [[FILE]].dll +# TWOPDBS: DebugDirectory [ +# TWOPDBS: DebugEntry { +# TWOPDBS: Characteristics: 0x0 +# TWOPDBS: TimeDateStamp: +# TWOPDBS: MajorVersion: 0x0 +# TWOPDBS: MinorVersion: 0x0 +# TWOPDBS: Type: CodeView (0x2) +# TWOPDBS: SizeOfData: 0x{{[^0]}} +# TWOPDBS: AddressOfRawData: 0x{{[^0]}} +# TWOPDBS: PointerToRawData: 0x{{[^0]}} +# TWOPDBS: PDBInfo { +# TWOPDBS: PDBSignature: 0x53445352 +# TWOPDBS-NOT: PDBGUID: [[GUID]] +# TWOPDBS: PDBAge: 1 +# TWOPDBS: PDBFileName: {{.*}}.pdb +# TWOPDBS: } +# TWOPDBS: } +# TWOPDBS: ] + # REPRO: File: {{.*}}.dll # REPRO: DebugDirectory [ # REPRO: DebugEntry { Index: llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h =================================================================== --- llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h +++ llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h @@ -35,11 +35,18 @@ InfoStreamBuilder &operator=(const InfoStreamBuilder &) = delete; void setVersion(PdbRaw_ImplVer V); + void addFeature(PdbRaw_FeatureSig Sig); + + // If this is true, the PDB contents are hashed and this hash is used as + // PDB GUID and as Signature. The age is always 1. + void setHashPDBContentsToGUID(bool B); + + // These only have an effect if hashPDBContentsToGUID() is false. void setSignature(uint32_t S); void setAge(uint32_t A); void setGuid(codeview::GUID G); - void addFeature(PdbRaw_FeatureSig Sig); + bool hashPDBContentsToGUID() const { return HashPDBContentsToGUID; } uint32_t getAge() const { return Age; } codeview::GUID getGuid() const { return Guid; } Optional getSignature() const { return Signature; } @@ -60,6 +67,8 @@ Optional Signature; codeview::GUID Guid; + bool HashPDBContentsToGUID = false; + NamedStreamMap &NamedStreams; }; } Index: llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h =================================================================== --- llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h +++ llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h @@ -53,7 +53,9 @@ PDBStringTableBuilder &getStringTableBuilder(); GSIStreamBuilder &getGsiBuilder(); - Error commit(StringRef Filename); + // If HashPDBContentsToGUID is true on the InfoStreamBuilder, Guid is filled + // with the computed PDB GUID on return. + Error commit(StringRef Filename, codeview::GUID *Guid); Expected getNamedStreamIndex(StringRef Name) const; Error addNamedStream(StringRef Name, StringRef Data); Index: llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp =================================================================== --- llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp +++ llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp @@ -310,13 +310,14 @@ PublicsStreamHeader Header; // FIXME: Fill these in. They are for incremental linking. + Header.SymHash = PSH->calculateSerializedLength(); + Header.AddrMap = PSH->Records.size() * 4; Header.NumThunks = 0; Header.SizeOfThunk = 0; Header.ISectThunkTable = 0; + memset(Header.Padding, 0, sizeof(Header.Padding)); Header.OffThunkTable = 0; Header.NumSections = 0; - Header.SymHash = PSH->calculateSerializedLength(); - Header.AddrMap = PSH->Records.size() * 4; if (auto EC = Writer.writeObject(Header)) return EC; Index: llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp =================================================================== --- llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp +++ llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp @@ -32,15 +32,20 @@ void InfoStreamBuilder::setVersion(PdbRaw_ImplVer V) { Ver = V; } +void InfoStreamBuilder::addFeature(PdbRaw_FeatureSig Sig) { + Features.push_back(Sig); +} + +void InfoStreamBuilder::setHashPDBContentsToGUID(bool B) { + HashPDBContentsToGUID = B; +} + void InfoStreamBuilder::setAge(uint32_t A) { Age = A; } void InfoStreamBuilder::setSignature(uint32_t S) { Signature = S; } void InfoStreamBuilder::setGuid(GUID G) { Guid = G; } -void InfoStreamBuilder::addFeature(PdbRaw_FeatureSig Sig) { - Features.push_back(Sig); -} Error InfoStreamBuilder::finalizeMsfLayout() { uint32_t Length = sizeof(InfoStreamHeader) + Index: llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp =================================================================== --- llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -25,6 +25,8 @@ #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/JamCRC.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Parallel.h" +#include "llvm/Support/xxhash.h" using namespace llvm; using namespace llvm::codeview; @@ -261,13 +263,27 @@ } } -Error PDBFileBuilder::commit(StringRef Filename) { +static std::vector> split(ArrayRef Arr, + size_t ChunkSize) { + std::vector> Ret; + while (Arr.size() > ChunkSize) { + Ret.push_back(Arr.take_front(ChunkSize)); + Arr = Arr.drop_front(ChunkSize); + } + if (!Arr.empty()) + Ret.push_back(Arr); + return Ret; +} + + +Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) { assert(!Filename.empty()); if (auto EC = finalizeMsfLayout()) return EC; MSFLayout Layout; - auto ExpectedMsfBuffer = Msf->commit(Filename, Layout); + Expected ExpectedMsfBuffer = + Msf->commit(Filename, Layout); if (!ExpectedMsfBuffer) return ExpectedMsfBuffer.takeError(); FileBufferByteStream Buffer = std::move(*ExpectedMsfBuffer); @@ -329,9 +345,38 @@ // Set the build id at the very end, after every other byte of the PDB // has been written. + if (Info->hashPDBContentsToGUID()) { + // Compute a hash of all sections of the output file. + uint8_t *Start = Buffer.getBufferStart(); + uint8_t *End = Buffer.getBufferEnd(); + ArrayRef D{Start, End}; + + std::vector> Chunks = split(D, 1024 * 1024); + std::vector Hashes(Chunks.size() * 8); + + // Compute hash values. + for_each_n(llvm::parallel::par, (size_t)0, Chunks.size(), [&](size_t I) { + support::endian::write64le(Hashes.data() + I * 8, xxHash64(Chunks[I])); + }); + + // Write to the final output buffer. + uint64_t Digest = xxHash64(Hashes); + + H->Age = 1; + + memcpy(H->Guid.Guid, &Digest, 8); + // xxhash only gives us 8 bytes, so put some fixed data in the other half. + memcpy(H->Guid.Guid + 8, "LLD PDB.", 8); + + // Return GUID to caller. + memcpy(Guid, H->Guid.Guid, 16); + } else { + H->Age = Info->getAge(); + H->Guid = Info->getGuid(); + } + // FIXME: Use a hash of the PDB rather than time(nullptr) for the signature. - H->Age = Info->getAge(); - H->Guid = Info->getGuid(); + // XXX: change this too Optional Sig = Info->getSignature(); H->Signature = Sig.hasValue() ? *Sig : time(nullptr); Index: llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp =================================================================== --- llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -800,7 +800,8 @@ Builder.getStringTableBuilder().setStrings(*Strings.strings()); - ExitOnErr(Builder.commit(opts::yaml2pdb::YamlPdbOutputFile)); + codeview::GUID IgnoredOutGuid; + ExitOnErr(Builder.commit(opts::yaml2pdb::YamlPdbOutputFile, &IgnoredOutGuid)); } static PDBFile &loadPDB(StringRef Path, std::unique_ptr &Session) { @@ -1260,7 +1261,9 @@ OutFile = opts::merge::InputFilenames[0]; llvm::sys::path::replace_extension(OutFile, "merged.pdb"); } - ExitOnErr(Builder.commit(OutFile)); + + codeview::GUID IgnoredOutGuid; + ExitOnErr(Builder.commit(OutFile, &IgnoredOutGuid)); } static void explain() {