diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/ModInfo.h b/llvm/include/llvm/DebugInfo/PDB/Raw/ModInfo.h --- a/llvm/include/llvm/DebugInfo/PDB/Raw/ModInfo.h +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/ModInfo.h @@ -13,6 +13,7 @@ #include "llvm/ADT/StringRef.h" #include +#include namespace llvm { class PDBFile; @@ -42,6 +43,13 @@ const FileLayout *Layout; }; +struct ModuleInfoEx { + ModuleInfoEx(ModInfo Module) : Info(Module) {} + + ModInfo Info; + std::vector SourceFiles; +}; + class ModInfoIterator { public: ModInfoIterator(const uint8_t *Stream); diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/PDBDbiStream.h b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBDbiStream.h --- a/llvm/include/llvm/DebugInfo/PDB/Raw/PDBDbiStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBDbiStream.h @@ -43,13 +43,23 @@ PDB_Machine getMachineType() const; - llvm::iterator_range modules() const; + ArrayRef modules() const; private: + std::error_code readSubstream(std::vector &Bytes, uint32_t Size); + std::error_code initializeFileInfo(); + PDBFile &Pdb; PDBStream Stream; + std::vector ModuleInfos; + std::vector ModInfoSubstream; + std::vector SecContrSubstream; + std::vector SecMapSubstream; + std::vector FileInfoSubstream; + std::vector TypeServerMapSubstream; + std::vector ECSubstream; std::unique_ptr Header; }; } diff --git a/llvm/lib/DebugInfo/PDB/Raw/PDBDbiStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/PDBDbiStream.cpp --- a/llvm/lib/DebugInfo/PDB/Raw/PDBDbiStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Raw/PDBDbiStream.cpp @@ -48,16 +48,16 @@ little32_t VersionSignature; ulittle32_t VersionHeader; ulittle32_t Age; // Should match PDBInfoStream. - ulittle16_t GSSyms; - ulittle16_t BuildNumber; // See DbiBuildNo structure. - ulittle16_t PSSyms; + ulittle16_t GSSyms; // Number of global symbols + ulittle16_t BuildNumber; // See DbiBuildNo structure. + ulittle16_t PSSyms; // Number of public symbols ulittle16_t PdbDllVersion; // version of mspdbNNN.dll ulittle16_t SymRecords; // Number of symbols ulittle16_t PdbDllRbld; // rbld number of mspdbNNN.dll little32_t ModiSubstreamSize; // Size of module info stream little32_t SecContrSubstreamSize; // Size of sec. contribution stream - little32_t SectionMapSize; - little32_t FileInfoSize; + little32_t SectionMapSize; // Size of sec. map substream + little32_t FileInfoSize; // Size of file info substream little32_t TypeServerSize; // Size of type server map ulittle32_t MFCTypeServerIndex; // Index of MFC Type Server little32_t OptionalDbgHdrSize; // Size of DbgHeader info @@ -101,12 +101,42 @@ Header->OptionalDbgHdrSize + Header->ECSubstreamSize) return std::make_error_code(std::errc::illegal_byte_sequence); + // Only certain substreams are guaranteed to be aligned. Validate + // them here. if (Header->ModiSubstreamSize % sizeof(uint32_t) != 0) return std::make_error_code(std::errc::illegal_byte_sequence); + if (Header->SecContrSubstreamSize % sizeof(uint32_t) != 0) + return std::make_error_code(std::errc::illegal_byte_sequence); + if (Header->SectionMapSize % sizeof(uint32_t) != 0) + return std::make_error_code(std::errc::illegal_byte_sequence); + if (Header->FileInfoSize % sizeof(uint32_t) != 0) + return std::make_error_code(std::errc::illegal_byte_sequence); + if (Header->TypeServerSize % sizeof(uint32_t) != 0) + return std::make_error_code(std::errc::illegal_byte_sequence); + + std::error_code EC; + if (EC = readSubstream(ModInfoSubstream, Header->ModiSubstreamSize)) + return EC; + + // Since each ModInfo in the stream is a variable length, we have to iterate + // them to know how many there actually are. + auto Range = llvm::make_range(ModInfoIterator(&ModInfoSubstream.front()), + ModInfoIterator(&ModInfoSubstream.back() + 1)); + for (auto Info : Range) + ModuleInfos.push_back(ModuleInfoEx(Info)); - ModInfoSubstream.resize(Header->ModiSubstreamSize); - if (auto EC = - Stream.readBytes(&ModInfoSubstream[0], Header->ModiSubstreamSize)) + if (EC = readSubstream(SecContrSubstream, Header->SecContrSubstreamSize)) + return EC; + if (EC = readSubstream(SecMapSubstream, Header->SectionMapSize)) + return EC; + if (EC = readSubstream(FileInfoSubstream, Header->FileInfoSize)) + return EC; + if (EC = readSubstream(TypeServerMapSubstream, Header->TypeServerSize)) + return EC; + if (EC = readSubstream(ECSubstream, Header->ECSubstreamSize)) + return EC; + + if (EC = initializeFileInfo()) return EC; return std::error_code(); @@ -150,7 +180,90 @@ return static_cast(Machine); } -llvm::iterator_range PDBDbiStream::modules() const { - return llvm::make_range(ModInfoIterator(&ModInfoSubstream.front()), - ModInfoIterator(&ModInfoSubstream.back() + 1)); +ArrayRef PDBDbiStream::modules() const { return ModuleInfos; } + +std::error_code PDBDbiStream::readSubstream(std::vector &Bytes, uint32_t Size) { + Bytes.clear(); + if (Size == 0) + return std::error_code(); + + Bytes.resize(Size); + return Stream.readBytes(&Bytes[0], Size); +} + +std::error_code PDBDbiStream::initializeFileInfo() { + struct FileInfoSubstreamHeader { + ulittle16_t NumModules; // Total # of modules, should match number of + // records in the ModuleInfo substream. + ulittle16_t NumSourceFiles; // Total # of source files. This value is not + // accurate because PDB actually supports more + // than 64k source files, so we ignore it and + // compute the value from other stream fields. + }; + + // The layout of the FileInfoSubstream is like this: + // struct { + // ulittle16_t NumModules; + // ulittle16_t NumSourceFiles; + // ulittle16_t ModIndices[NumModules]; + // ulittle16_t ModFileCounts[NumModules]; + // ulittle32_t FileNameOffsets[NumSourceFiles]; + // char Names[][NumSourceFiles]; + // }; + // with the caveat that `NumSourceFiles` cannot be trusted, so + // it is computed by summing `ModFileCounts`. + // + const uint8_t *Buf = &FileInfoSubstream[0]; + auto FI = reinterpret_cast(Buf); + Buf += sizeof(FileInfoSubstreamHeader); + // The number of modules in the stream should be the same as reported by + // the FileInfoSubstreamHeader. + if (FI->NumModules != ModuleInfos.size()) + return std::make_error_code(std::errc::illegal_byte_sequence); + + // First is an array of `NumModules` module indices. This is not used for the + // same reason that `NumSourceFiles` is not used. It's an array of uint16's, + // but it's possible there are more than 64k source files, which would imply + // more than 64k modules (e.g. object files) as well. So we ignore this + // field. + llvm::ArrayRef ModIndexArray( + reinterpret_cast(Buf), ModuleInfos.size()); + + llvm::ArrayRef ModFileCountArray(ModIndexArray.end(), + ModuleInfos.size()); + + // Compute the real number of source files. + uint32_t NumSourceFiles = 0; + for (auto Count : ModFileCountArray) + NumSourceFiles += Count; + + // This is the array that in the reference implementation corresponds to + // `ModInfo::FileLayout::FileNameOffs`, which is commented there as being a + // pointer. Due to the mentioned problems of pointers causing difficulty + // when reading from the file on 64-bit systems, we continue to ignore that + // field in `ModInfo`, and instead build a vector of StringRefs and stores + // them in `ModuleInfoEx`. The value written to and read from the file is + // not used anyway, it is only there as a way to store the offsets for the + // purposes of later accessing the names at runtime. + llvm::ArrayRef FileNameOffsets( + reinterpret_cast(ModFileCountArray.end()), + NumSourceFiles); + + const char *Names = reinterpret_cast(FileNameOffsets.end()); + + // We go through each ModuleInfo, determine the number N of source files for + // that module, and then get the next N offsets from the Offsets array, using + // them to get the corresponding N names from the Names buffer and associating + // each one with the corresponding module. + uint32_t NextFileIndex = 0; + for (size_t I = 0; I < ModuleInfos.size(); ++I) { + uint32_t NumFiles = ModFileCountArray[I]; + ModuleInfos[I].SourceFiles.resize(NumFiles); + for (size_t J = 0; J < NumFiles; ++J, ++NextFileIndex) { + uint32_t FileIndex = FileNameOffsets[NextFileIndex]; + ModuleInfos[I].SourceFiles[J] = StringRef(Names + FileIndex); + } + } + + return std::error_code(); } diff --git a/llvm/test/DebugInfo/PDB/pdbdump-headers.test b/llvm/test/DebugInfo/PDB/pdbdump-headers.test --- a/llvm/test/DebugInfo/PDB/pdbdump-headers.test +++ b/llvm/test/DebugInfo/PDB/pdbdump-headers.test @@ -41,6 +41,8 @@ ; CHECK-NEXT: Symbol Byte Size: 208 ; CHECK-NEXT: Type Server Index: 0 ; CHECK-NEXT: Has EC Info: 0 +; CHECK-NEXT: 1 Contributing Source Files: +; CHECK-NEXT: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp ; CHECK-NEXT: * Linker * ; CHECK-NEXT: Debug Stream Index: 14 ; CHECK-NEXT: Object File: @@ -52,3 +54,4 @@ ; CHECK-NEXT: Symbol Byte Size: 516 ; CHECK-NEXT: Type Server Index: 0 ; CHECK-NEXT: Has EC Info: 0 +; CHECK-NEXT: 0 Contributing Source Files: diff --git a/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp b/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp --- a/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ b/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -281,25 +281,30 @@ << Minor << "." << DbiStream.getPdbDllVersion() << '\n'; outs() << "Modules: \n"; - for (auto Modi : DbiStream.modules()) { - outs() << Modi.getModuleName() << '\n'; - outs().indent(4) << "Debug Stream Index: " << Modi.getModuleStreamIndex() - << '\n'; - outs().indent(4) << "Object File: " << Modi.getObjFileName() << '\n'; - outs().indent(4) << "Num Files: " << Modi.getNumberOfFiles() << '\n'; + for (auto &Modi : DbiStream.modules()) { + outs() << Modi.Info.getModuleName() << '\n'; + outs().indent(4) << "Debug Stream Index: " + << Modi.Info.getModuleStreamIndex() << '\n'; + outs().indent(4) << "Object File: " << Modi.Info.getObjFileName() << '\n'; + outs().indent(4) << "Num Files: " << Modi.Info.getNumberOfFiles() << '\n'; outs().indent(4) << "Source File Name Idx: " - << Modi.getSourceFileNameIndex() << '\n'; - outs().indent(4) << "Pdb File Name Idx: " << Modi.getPdbFilePathNameIndex() - << '\n'; - outs().indent(4) << "Line Info Byte Size: " << Modi.getLineInfoByteSize() - << '\n'; + << Modi.Info.getSourceFileNameIndex() << '\n'; + outs().indent(4) << "Pdb File Name Idx: " + << Modi.Info.getPdbFilePathNameIndex() << '\n'; + outs().indent(4) << "Line Info Byte Size: " + << Modi.Info.getLineInfoByteSize() << '\n'; outs().indent(4) << "C13 Line Info Byte Size: " - << Modi.getC13LineInfoByteSize() << '\n'; + << Modi.Info.getC13LineInfoByteSize() << '\n'; outs().indent(4) << "Symbol Byte Size: " - << Modi.getSymbolDebugInfoByteSize() << '\n'; - outs().indent(4) << "Type Server Index: " << Modi.getTypeServerIndex() + << Modi.Info.getSymbolDebugInfoByteSize() << '\n'; + outs().indent(4) << "Type Server Index: " << Modi.Info.getTypeServerIndex() << '\n'; - outs().indent(4) << "Has EC Info: " << Modi.hasECInfo() << '\n'; + outs().indent(4) << "Has EC Info: " << Modi.Info.hasECInfo() << '\n'; + outs().indent(4) << Modi.SourceFiles.size() + << " Contributing Source Files: \n"; + for (auto File : Modi.SourceFiles) { + outs().indent(8) << File << '\n'; + } } }