diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h --- a/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h @@ -20,6 +20,7 @@ class MemoryBuffer; struct PDBFileContext; +class PDBStream; class PDBFile { public: @@ -54,6 +55,8 @@ return BlockNumber * BlockSize; } + PDBStream *getPDBStream() const; + private: std::unique_ptr Context; }; diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/PDBInfoStream.h b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBInfoStream.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBInfoStream.h @@ -0,0 +1,62 @@ +//===- PDBInfoStream.h - PDB Info Stream (Stream 1) Access ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBINFOSTREAM_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBINFOSTREAM_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" +#include "llvm/DebugInfo/PDB/Raw/PDBNameMap.h" +#include "llvm/DebugInfo/PDB/Raw/PDBRawConstants.h" +#include "llvm/DebugInfo/PDB/Raw/PDBStream.h" + +#include "llvm/Support/Endian.h" + +namespace llvm { + +class PDBInfoStream { +public: + PDBInfoStream(const PDBFile &File); + + std::error_code reload(); + + PdbRaw_ImplVer getVersion() const; + uint32_t getSignature() const; + uint32_t getAge() const; + PDB_UniqueId getGuid() const; + + uint32_t getNamedStreamIndex(llvm::StringRef Name) const; + +private: + PDBStream Stream1; + const PDBFile &Pdb; + + // PDB file format version. We only support VC70. See the enumeration + // `PdbRaw_ImplVer` for the other possible values. + uint32_t Version; + + // A 32-bit signature unique across all PDBs. This is generated with + // a call to time() when the PDB is written, but obviously this is not + // universally unique. + uint32_t Signature; + + // The number of times the PDB has been written. Might also be used to + // ensure that the PDB matches the executable. + uint32_t Age; + + // Due to the aforementioned limitations with `Signature`, this is a new + // signature present on VC70 and higher PDBs which is guaranteed to be + // universally unique. + PDB_UniqueId Guid; + + PDBNameMap NamedStreams; +}; +} + +#endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/PDBNameMap.h b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBNameMap.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBNameMap.h @@ -0,0 +1,34 @@ +//===- PDBNameMap.h - PDB Name Map ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBNAMEMAP_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBNAMEMAP_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" + +#include +#include + +namespace llvm { +class PDBStream; +class PDBNameMap { +public: + PDBNameMap(); + + std::error_code load(PDBStream &Stream); + + bool tryGetValue(StringRef Name, uint32_t &Value) const; + +private: + StringMap Mapping; +}; +} + +#endif \ No newline at end of file diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/PDBRawConstants.h b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBRawConstants.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBRawConstants.h @@ -0,0 +1,31 @@ +//===- PDBRawConstants.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBRAWCONSTANTS_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBRAWCONSTANTS_H + +#include + +namespace llvm { + +enum PdbRaw_ImplVer : uint32_t { + VC2 = 19941610, + VC4 = 19950623, + VC41 = 19950814, + VC50 = 19960307, + VC98 = 19970604, + VC70Dep = 19990604, // deprecated + VC70 = 20000404, + VC80 = 20030901, + VC110 = 20091201, + VC140 = 20140508, +}; +} + +#endif diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt --- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -28,6 +28,8 @@ add_pdb_impl_folder(Raw Raw/PDBFile.cpp + Raw/PDBInfoStream.cpp + Raw/PDBNameMap.cpp Raw/PDBStream.cpp Raw/RawSession.cpp) diff --git a/llvm/lib/DebugInfo/PDB/Raw/PDBInfoStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/PDBInfoStream.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Raw/PDBInfoStream.cpp @@ -0,0 +1,54 @@ +//===- PDBInfoStream.cpp - PDB Info Stream (Stream 1) Access ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/PDBInfoStream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" + +using namespace llvm; + +PDBInfoStream::PDBInfoStream(const PDBFile &File) + : Pdb(File), Stream1(1, File) {} + +std::error_code PDBInfoStream::reload() { + Stream1.setOffset(0); + support::ulittle32_t Value; + + Stream1.readObject(&Version); + if (Version < PdbRaw_ImplVer::VC70) + return std::make_error_code(std::errc::not_supported); + + Stream1.readObject(&Value); + Signature = Value; + + Stream1.readObject(&Value); + Age = Value; + + Stream1.readObject(&Guid); + NamedStreams.load(Stream1); + + return std::error_code(); +} + +uint32_t PDBInfoStream::getNamedStreamIndex(llvm::StringRef Name) const { + uint32_t Result; + if (!NamedStreams.tryGetValue(Name, Result)) + return 0; + return Result; +} + +PdbRaw_ImplVer PDBInfoStream::getVersion() const { + return static_cast(Version); +} + +uint32_t PDBInfoStream::getSignature() const { return Signature; } + +uint32_t PDBInfoStream::getAge() const { return Age; } + +PDB_UniqueId PDBInfoStream::getGuid() const { return Guid; } diff --git a/llvm/lib/DebugInfo/PDB/Raw/PDBNameMap.cpp b/llvm/lib/DebugInfo/PDB/Raw/PDBNameMap.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Raw/PDBNameMap.cpp @@ -0,0 +1,108 @@ +//===- PDBNameMap.cpp - PDB Name Map ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/PDBNameMap.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/DebugInfo/PDB/Raw/PDBStream.h" + +using namespace llvm; + +PDBNameMap::PDBNameMap() {} + +std::error_code PDBNameMap::load(PDBStream &Stream) { + // This is some sort of weird string-set/hash table encoded in the stream. + // It starts with the number of bytes in the table. + uint32_t NumberOfBytes; + Stream.readInteger(NumberOfBytes); + + // Following that field is the starting offset of strings in the name table. + uint32_t StringsOffset = Stream.getOffset(); + Stream.setOffset(StringsOffset + NumberOfBytes); + + // This appears to be equivalent to the total number of strings *actually* + // in the name table. + uint32_t HashSize; + Stream.readInteger(HashSize); + + // This appears to be an upper bound on the number of strings in the name + // table. + uint32_t MaxNumberOfStrings; + Stream.readInteger(MaxNumberOfStrings); + + // This appears to be a hash table which uses bitfields to determine whether + // or not a bucket is 'present'. + uint32_t NumPresentWords; + Stream.readInteger(NumPresentWords); + + // Store all the 'present' bits in a vector for later processing. + SmallVector PresentWords; + for (uint32_t I = 0; I != NumPresentWords; ++I) { + uint32_t Word; + Stream.readInteger(Word); + PresentWords.push_back(Word); + } + + // This appears to be a hash table which uses bitfields to determine whether + // or not a bucket is 'deleted'. + uint32_t NumDeletedWords; + Stream.readInteger(NumDeletedWords); + + // Store all the 'deleted' bits in a vector for later processing. + SmallVector DeletedWords; + for (uint32_t I = 0; I != NumDeletedWords; ++I) { + uint32_t Word; + Stream.readInteger(Word); + DeletedWords.push_back(Word); + } + + BitVector Present(MaxNumberOfStrings, false); + if (!PresentWords.empty()) + Present.setBitsInMask(PresentWords.data(), PresentWords.size()); + BitVector Deleted(MaxNumberOfStrings, false); + if (!DeletedWords.empty()) + Deleted.setBitsInMask(DeletedWords.data(), DeletedWords.size()); + + for (uint32_t I = 0; I < MaxNumberOfStrings; ++I) { + if (!Present.test(I)) + continue; + + // For all present entries, dump out their mapping. + + // This appears to be an offset relative to the start of the strings. + // It tells us where the null-terminated string begins. + uint32_t NameOffset; + Stream.readInteger(NameOffset); + + // This appears to be a stream number into the stream directory. + uint32_t NameIndex; + Stream.readInteger(NameIndex); + + // Compute the offset of the start of the string relative to the stream. + uint32_t StringOffset = StringsOffset + NameOffset; + uint32_t OldOffset = Stream.getOffset(); + // Pump out our c-string from the stream. + std::string Str; + Stream.setOffset(StringOffset); + Stream.readZeroString(Str); + + Stream.setOffset(OldOffset); + // Add this to a string-map from name to stream number. + Mapping.insert({Str, NameIndex}); + } + + return std::error_code(); +} + +bool PDBNameMap::tryGetValue(StringRef Name, uint32_t &Value) const { + auto Iter = Mapping.find(Name); + if (Iter == Mapping.end()) + return false; + Value = Iter->second; + return true; +} diff --git a/llvm/test/DebugInfo/PDB/pdbdump-headers.test b/llvm/test/DebugInfo/PDB/pdbdump-headers.test --- a/llvm/test/DebugInfo/PDB/pdbdump-headers.test +++ b/llvm/test/DebugInfo/PDB/pdbdump-headers.test @@ -14,23 +14,6 @@ ; CHECK-NEXT: Signature: 54e507e2 ; CHECK-NEXT: Age: 1 ; CHECK-NEXT: Guid: {0B355641-86A0-A249-896F-9988FAE52FF0} -; CHECK-NEXT: NumberOfBytes: 34 -; CHECK-NEXT: HashSize: 3 -; CHECK-NEXT: MaxNumberOfStrings: 6 -; CHECK-NEXT: NumPresentWords: 1 -; CHECK-NEXT: Word: 26 -; CHECK-NEXT: NumDeletedWords: 0 -; CHECK-NEXT: NameOffset: 17 -; CHECK-NEXT: NameIndex: 9 -; CHECK-NEXT: String: /src/headerblock - -; CHECK: NameOffset: 10 -; CHECK-NEXT: NameIndex: 13 -; CHECK-NEXT: String: /names - -; CHECK: NameOffset: 0 -; CHECK-NEXT: NameIndex: 5 -; CHECK-NEXT: String: /LinkInfo ; CHECK: NameStream: 13 ; CHECK-NEXT: NameStreamSignature: effeeffe diff --git a/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp b/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp --- a/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ b/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -36,6 +36,7 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolThunk.h" #include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/DebugInfo/PDB/Raw/PDBInfoStream.h" #include "llvm/DebugInfo/PDB/Raw/PDBStream.h" #include "llvm/DebugInfo/PDB/Raw/RawSession.h" #include "llvm/Support/CommandLine.h" @@ -233,135 +234,21 @@ } } - // Stream 1 starts with the following header: - // uint32_t Version; - // uint32_t Signature; - // uint32_t Age; - // GUID Guid; - PDBStream Stream1(1, File); - uint32_t Version; - uint32_t Signature; - uint32_t Age; - PDB_UniqueId Guid; - - Stream1.readInteger(Version); - outs() << "Version: " << Version << '\n'; - // PDB's with versions before PDBImpvVC70 might not have the Guid field, we - // don't support them. - if (Version < 20000404) - reportError("", std::make_error_code(std::errc::not_supported)); - - // This appears to be the time the PDB was last opened by an MSVC tool? - // It is definitely a timestamp of some sort. - Stream1.readInteger(Signature); - outs() << "Signature: "; - outs().write_hex(Signature) << '\n'; - - // This appears to be a number which is used to determine that the PDB is kept - // in sync with the EXE. - Stream1.readInteger(Age); - outs() << "Age: " << Age << '\n'; - - // I'm not sure what the purpose of the GUID is. - Stream1.readObject(&Guid); - outs() << "Guid: " << Guid << '\n'; - - // This is some sort of weird string-set/hash table encoded in the stream. - // It starts with the number of bytes in the table. - uint32_t NumberOfBytes; - Stream1.readInteger(NumberOfBytes); - outs() << "NumberOfBytes: " << NumberOfBytes << '\n'; - - // Following that field is the starting offset of strings in the name table. - uint32_t StringsOffset = Stream1.getOffset(); - Stream1.setOffset(StringsOffset + NumberOfBytes); - - // This appears to be equivalent to the total number of strings *actually* - // in the name table. - uint32_t HashSize; - Stream1.readInteger(HashSize); - outs() << "HashSize: " << HashSize << '\n'; - - // This appears to be an upper bound on the number of strings in the name - // table. - uint32_t MaxNumberOfStrings; - Stream1.readInteger(MaxNumberOfStrings); - outs() << "MaxNumberOfStrings: " << MaxNumberOfStrings << '\n'; - - // This appears to be a hash table which uses bitfields to determine whether - // or not a bucket is 'present'. - uint32_t NumPresentWords; - Stream1.readInteger(NumPresentWords); - outs() << "NumPresentWords: " << NumPresentWords << '\n'; - - // Store all the 'present' bits in a vector for later processing. - SmallVector PresentWords; - for (uint32_t I = 0; I != NumPresentWords; ++I) { - uint32_t Word; - Stream1.readInteger(Word); - PresentWords.push_back(Word); - outs() << "Word: " << Word << '\n'; - } - - // This appears to be a hash table which uses bitfields to determine whether - // or not a bucket is 'deleted'. - uint32_t NumDeletedWords; - Stream1.readInteger(NumDeletedWords); - outs() << "NumDeletedWords: " << NumDeletedWords << '\n'; - - // Store all the 'deleted' bits in a vector for later processing. - SmallVector DeletedWords; - for (uint32_t I = 0; I != NumDeletedWords; ++I) { - uint32_t Word; - Stream1.readInteger(Word); - DeletedWords.push_back(Word); - outs() << "Word: " << Word << '\n'; - } + PDBInfoStream InfoStream(File); + if (auto EC = InfoStream.reload()) + reportError("", EC); - BitVector Present(MaxNumberOfStrings, false); - if (!PresentWords.empty()) - Present.setBitsInMask(PresentWords.data(), PresentWords.size()); - BitVector Deleted(MaxNumberOfStrings, false); - if (!DeletedWords.empty()) - Deleted.setBitsInMask(DeletedWords.data(), DeletedWords.size()); - - StringMap NamedStreams; - for (uint32_t I = 0; I < MaxNumberOfStrings; ++I) { - if (!Present.test(I)) - continue; - - // For all present entries, dump out their mapping. - - // This appears to be an offset relative to the start of the strings. - // It tells us where the null-terminated string begins. - uint32_t NameOffset; - Stream1.readInteger(NameOffset); - outs() << "NameOffset: " << NameOffset << '\n'; - - // This appears to be a stream number into the stream directory. - uint32_t NameIndex; - Stream1.readInteger(NameIndex); - outs() << "NameIndex: " << NameIndex << '\n'; - - // Compute the offset of the start of the string relative to the stream. - uint32_t StringOffset = StringsOffset + NameOffset; - uint32_t OldOffset = Stream1.getOffset(); - // Pump out our c-string from the stream. - std::string Str; - Stream1.setOffset(StringOffset); - Stream1.readZeroString(Str); - outs() << "String: " << Str << "\n\n"; - - Stream1.setOffset(OldOffset); - // Add this to a string-map from name to stream number. - NamedStreams.insert({Str, NameIndex}); - } + outs() << "Version: " << InfoStream.getVersion() << '\n'; + outs() << "Signature: "; + outs().write_hex(InfoStream.getSignature()) << '\n'; + outs() << "Age: " << InfoStream.getAge() << '\n'; + outs() << "Guid: " << InfoStream.getGuid() << '\n'; // Let's try to dump out the named stream "/names". - auto NameI = NamedStreams.find("/names"); - if (NameI != NamedStreams.end()) { - PDBStream NameStream(NameI->second, File); - outs() << "NameStream: " << NameI->second << '\n'; + uint32_t NameStreamIndex = InfoStream.getNamedStreamIndex("/names"); + if (NameStreamIndex != 0) { + PDBStream NameStream(NameStreamIndex, File); + outs() << "NameStream: " << NameStreamIndex << '\n'; // The name stream appears to start with a signature and version. uint32_t NameStreamSignature;