Index: include/llvm/DebugInfo/PDB/PDBTypes.h =================================================================== --- include/llvm/DebugInfo/PDB/PDBTypes.h +++ include/llvm/DebugInfo/PDB/PDBTypes.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_PDB_PDBTYPES_H #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Endian.h" #include #include @@ -500,6 +501,35 @@ bool operator!=(const Variant &Other) const { return !(*this == Other); } }; +namespace PDB { +static const char Magic[] = {'M', 'i', 'c', 'r', 'o', 's', 'o', 'f', + 't', ' ', 'C', '/', 'C', '+', '+', ' ', + 'M', 'S', 'F', ' ', '7', '.', '0', '0', + '\r', '\n', '\x1a', 'D', 'S', '\0', '\0', '\0'}; + +// The superblock is overlaid at the beginning of the file (offset 0). +// It starts with a magic header and is followed by information which describes +// the layout of the file system. +struct SuperBlock { + char MagicBytes[sizeof(Magic)]; + // The file system is split into a variable number of fixed size elements. + // These elements are referred to as blocks. The size of a block may vary + // from system to system. + support::ulittle32_t BlockSize; + // This field's purpose is not yet known. + support::ulittle32_t Unknown0; + // This contains the number of blocks resident in the file system. In + // practice, NumBlocks * BlockSize is equivalent to the size of the PDB file. + support::ulittle32_t NumBlocks; + // This contains the number of bytes which make up the directory. + support::ulittle32_t NumDirectoryBytes; + // This field's purpose is not yet known. + support::ulittle32_t Unknown1; + // This contains the block # of the block map. + support::ulittle32_t BlockMapAddr; +}; +} + } // namespace llvm namespace std { @@ -513,4 +543,5 @@ }; } + #endif Index: include/llvm/Support/MemoryBuffer.h =================================================================== --- include/llvm/Support/MemoryBuffer.h +++ include/llvm/Support/MemoryBuffer.h @@ -122,7 +122,8 @@ /// Open the specified file as a MemoryBuffer, or open stdin if the Filename /// is "-". static ErrorOr> - getFileOrSTDIN(const Twine &Filename, int64_t FileSize = -1); + getFileOrSTDIN(const Twine &Filename, int64_t FileSize = -1, + bool RequiresNullTerminator = true); /// Map a subrange of the specified file as a MemoryBuffer. static ErrorOr> Index: lib/Support/MemoryBuffer.cpp =================================================================== --- lib/Support/MemoryBuffer.cpp +++ lib/Support/MemoryBuffer.cpp @@ -162,13 +162,14 @@ } ErrorOr> -MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize) { +MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize, + bool RequiresNullTerminator) { SmallString<256> NameBuf; StringRef NameRef = Filename.toStringRef(NameBuf); if (NameRef == "-") return getSTDIN(); - return getFile(Filename, FileSize); + return getFile(Filename, FileSize, RequiresNullTerminator); } ErrorOr> Index: test/DebugInfo/PDB/DIA/pdbdump-flags.test =================================================================== --- test/DebugInfo/PDB/DIA/pdbdump-flags.test +++ test/DebugInfo/PDB/DIA/pdbdump-flags.test @@ -1,7 +1,7 @@ -; RUN: llvm-pdbdump %p/Inputs/empty.pdb | FileCheck %s -check-prefix=NO_ARGS -; RUN: llvm-pdbdump -types %p/Inputs/empty.pdb | FileCheck %s -check-prefix=TYPES -; RUN: llvm-pdbdump -compilands %p/Inputs/empty.pdb | FileCheck %s -check-prefix=COMPILANDS -; RUN: llvm-pdbdump -types -compilands %p/Inputs/empty.pdb | FileCheck %s -check-prefix=MULTIPLE +; RUN: llvm-pdbdump %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=NO_ARGS +; RUN: llvm-pdbdump -types %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=TYPES +; RUN: llvm-pdbdump -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=COMPILANDS +; RUN: llvm-pdbdump -types -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=MULTIPLE ; Check that neither symbols nor compilands are dumped when neither argument specified. ; NO_ARGS: empty.pdb Index: test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test =================================================================== --- test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test +++ test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test @@ -1,6 +1,6 @@ -; RUN: llvm-pdbdump -symbols %p/Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT %s -; RUN: llvm-pdbdump -types %p/Inputs/symbolformat.pdb | FileCheck --check-prefix=TYPES_FORMAT %s -; RUN: llvm-pdbdump -globals %p/Inputs/symbolformat.pdb | FileCheck --check-prefix=GLOBALS %s +; RUN: llvm-pdbdump -symbols %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT %s +; RUN: llvm-pdbdump -types %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=TYPES_FORMAT %s +; RUN: llvm-pdbdump -globals %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=GLOBALS %s ; The format is func [0x+ - 0x-] ; SYM_FORMAT: ---SYMBOLS--- Index: test/DebugInfo/PDB/lit.local.cfg =================================================================== --- /dev/null +++ test/DebugInfo/PDB/lit.local.cfg @@ -1 +0,0 @@ -config.unsupported = not config.have_dia_sdk Index: test/DebugInfo/PDB/pdbdump-headers.test =================================================================== --- /dev/null +++ test/DebugInfo/PDB/pdbdump-headers.test @@ -0,0 +1,12 @@ +; RUN: llvm-pdbdump --dump-headers %p/Inputs/empty.pdb | FileCheck %s + +; CHECK: BlockSize: 4096 +; CHECK-NEXT: Unknown0: 2 +; CHECK-NEXT: NumBlocks: 25 +; CHECK-NEXT: NumDirectoryBytes: 136 +; CHECK-NEXT: Unknown1: 0 +; CHECK-NEXT: BlockMapAddr: 24 +; CHECK-NEXT: NumDirectoryBlocks: 1 +; CHECK-NEXT: BlockMapOffset: 98304 +; CHECK-NEXT: DirectoryBlocks: [23] +; CHECK-NEXT: NumFiles: 17 Index: tools/llvm-pdbdump/llvm-pdbdump.cpp =================================================================== --- tools/llvm-pdbdump/llvm-pdbdump.cpp +++ tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -22,6 +22,8 @@ #include "VariableDumper.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" @@ -38,6 +40,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" @@ -79,6 +82,15 @@ cl::desc("Assume the module is loaded at the specified address"), cl::cat(OtherOptions)); +cl::opt DumpHeaders("dump-headers", cl::desc("dump PDB headers"), + cl::cat(OtherOptions)); +cl::opt DumpFileSizes("dump-file-sizes", cl::desc("dump PDB file sizes"), + cl::cat(OtherOptions)); +cl::opt DumpFileBlocks("dump-file-blocks", cl::desc("dump PDB file blocks"), + cl::cat(OtherOptions)); +cl::opt DumpFileData("dump-file", cl::desc("dump file data"), + cl::cat(OtherOptions)); + cl::list ExcludeTypes("exclude-types", cl::desc("Exclude types by regular expression"), @@ -121,10 +133,264 @@ cl::cat(FilterCategory)); } + +static void reportError(StringRef Input, StringRef Message) { + if (Input == "-") + Input = ""; + errs() << Input << ": " << Message << "\n"; + errs().flush(); + exit(1); +} + +static void reportError(StringRef Input, std::error_code EC) { + reportError(Input, EC.message()); +} + +static std::error_code checkOffset(MemoryBufferRef M, uintptr_t Addr, + const uint64_t Size) { + if (Addr + Size < Addr || Addr + Size < Size || + Addr + Size > uintptr_t(M.getBufferEnd()) || + Addr < uintptr_t(M.getBufferStart())) { + return std::make_error_code(std::errc::bad_address); + } + return std::error_code(); +} + +template +static std::error_code checkOffset(MemoryBufferRef M, ArrayRef AR) { + return checkOffset(M, uintptr_t(AR.data()), (uint64_t)AR.size() * sizeof(T)); +} + +static std::error_code checkOffset(MemoryBufferRef M, StringRef SR) { + return checkOffset(M, uintptr_t(SR.data()), SR.size()); +} + +// Sets Obj unless any bytes in [addr, addr + size) fall outsize of m. +// Returns unexpected_eof if error. +template +static std::error_code getObject(const T *&Obj, MemoryBufferRef M, + const void *Ptr, + const uint64_t Size = sizeof(T)) { + uintptr_t Addr = uintptr_t(Ptr); + if (std::error_code EC = checkOffset(M, Addr, Size)) + return EC; + Obj = reinterpret_cast(Addr); + return std::error_code(); +} + +static uint64_t bytesToBlocks(uint64_t NumBytes, uint64_t BlockSize) { + return RoundUpToAlignment(NumBytes, BlockSize) / BlockSize; +} + +static uint64_t blockToOffset(uint64_t BlockNumber, uint64_t BlockSize) { + return BlockNumber * BlockSize; +} + +static void dumpStructure(MemoryBufferRef M) { + const PDB::SuperBlock *SB; + if (auto EC = getObject(SB, M, M.getBufferStart())) + reportError(M.getBufferIdentifier(), EC); + + if (opts::DumpHeaders) { + outs() << "BlockSize: " << SB->BlockSize << '\n'; + outs() << "Unknown0: " << SB->Unknown0 << '\n'; + outs() << "NumBlocks: " << SB->NumBlocks << '\n'; + outs() << "NumDirectoryBytes: " << SB->NumDirectoryBytes << '\n'; + outs() << "Unknown1: " << SB->Unknown1 << '\n'; + outs() << "BlockMapAddr: " << SB->BlockMapAddr << '\n'; + } + + // We don't support blocksizes which aren't a multiple of four bytes. + if (SB->BlockSize % sizeof(support::ulittle32_t) != 0) + reportError(M.getBufferIdentifier(), + std::make_error_code(std::errc::illegal_byte_sequence)); + + // We don't support directories whose sizes aren't a multiple of four bytes. + if (SB->NumDirectoryBytes % sizeof(support::ulittle32_t) != 0) + reportError(M.getBufferIdentifier(), + std::make_error_code(std::errc::illegal_byte_sequence)); + + // The number of blocks which comprise the directory is a simple function of + // the number of bytes it contains. + uint64_t NumDirectoryBlocks = + bytesToBlocks(SB->NumDirectoryBytes, SB->BlockSize); + if (opts::DumpHeaders) + outs() << "NumDirectoryBlocks: " << NumDirectoryBlocks << '\n'; + + // The block map, as we understand it, is a block which consists of a list of + // block numbers. + // It is unclear what would happen if the number of blocks couldn't fit on a + // single block. + if (NumDirectoryBlocks > SB->BlockSize / sizeof(support::ulittle32_t)) + reportError(M.getBufferIdentifier(), + std::make_error_code(std::errc::illegal_byte_sequence)); + + + uint64_t BlockMapOffset = (uint64_t)SB->BlockMapAddr * SB->BlockSize; + if (opts::DumpHeaders) + outs() << "BlockMapOffset: " << BlockMapOffset << '\n'; + + // The directory is not contiguous. Instead, the block map contains a + // contiguous list of block numbers whose contents, when concatenated in + // order, make up the directory. + auto DirectoryBlocks = + makeArrayRef(reinterpret_cast( + M.getBufferStart() + BlockMapOffset), + NumDirectoryBlocks); + if (auto EC = checkOffset(M, DirectoryBlocks)) + reportError(M.getBufferIdentifier(), EC); + + if (opts::DumpHeaders) { + outs() << "DirectoryBlocks: ["; + for (const support::ulittle32_t &DirectoryBlockAddr : DirectoryBlocks) { + if (&DirectoryBlockAddr != &DirectoryBlocks.front()) + outs() << ", "; + outs() << DirectoryBlockAddr; + } + outs() << "]\n"; + } + + bool SeenNumFiles = false; + uint32_t NumFiles = 0; + std::vector FileSizes; + DenseMap> FileMap; + uint32_t FileIdx = 0; + uint64_t DirectoryBytesRead = 0; + // The structure of the directory is as follows: + // struct PDBDirectory { + // uint32_t NumFiles; + // uint32_t FileSizes[NumFiles]; + // uint32_t FileMap[NumFiles][]; + // }; + // + // Empty files don't consume entries in the FileMap. + for (uint32_t DirectoryBlockAddr : DirectoryBlocks) { + uint64_t DirectoryBlockOffset = + blockToOffset(DirectoryBlockAddr, SB->BlockSize); + auto DirectoryBlock = + makeArrayRef(reinterpret_cast( + M.getBufferStart() + DirectoryBlockOffset), + SB->BlockSize / sizeof(support::ulittle32_t)); + if (auto EC = checkOffset(M, DirectoryBlock)) + reportError(M.getBufferIdentifier(), EC); + + // We read data out of the directory four bytes at a time. Depending on + // where we are in the directory, the contents may be: the number of files + // in the directory, a file's size, or a block in the file map. + for (uint32_t Data : DirectoryBlock) { + // Don't read beyond the end of the directory. + if (DirectoryBytesRead == SB->NumDirectoryBytes) + break; + + DirectoryBytesRead += sizeof(Data); + + // This data must be the number of files if we haven't seen it yet. + if (!SeenNumFiles) { + NumFiles = Data; + SeenNumFiles = true; + continue; + } + // This data must be a file size if we have not seen them all yet. + if (FileSizes.size() < NumFiles) { + // It seems like some files have their set to -1 when their contents are + // not present. Treat them like empty files for now. + if (Data == UINT32_MAX) + FileSizes.push_back(0); + else + FileSizes.push_back(Data); + continue; + } + + // This data must be a file block number if we have seen all of the file + // sizes. + std::vector *FileBlocks = nullptr; + // Figure out which file this block number belongs to. + while (FileIdx < NumFiles) { + uint64_t NumExpectedFileBlocks = + bytesToBlocks(FileSizes[FileIdx], SB->BlockSize); + FileBlocks = &FileMap[FileIdx]; + if (NumExpectedFileBlocks > FileBlocks->size()) + break; + ++FileIdx; + } + // It seems this block doesn't belong to any file? The file is either + // corrupt or something more mysterious is going on. + if (FileIdx == NumFiles) + reportError(M.getBufferIdentifier(), + std::make_error_code(std::errc::illegal_byte_sequence)); + + FileBlocks->push_back(Data); + } + } + + // We should have read exactly SB->NumDirectoryBytes bytes. + assert(DirectoryBytesRead == SB->NumDirectoryBytes); + + if (opts::DumpHeaders) + outs() << "NumFiles: " << NumFiles << '\n'; + if (opts::DumpFileSizes) + for (uint32_t FileIdx = 0; FileIdx < NumFiles; ++FileIdx) + outs() << "FileSizes[" << FileIdx << "]: " << FileSizes[FileIdx] + << '\n'; + + if (opts::DumpFileBlocks) { + for (uint32_t FileIdx = 0; FileIdx < NumFiles; ++FileIdx) { + outs() << "FileBlocks[" << FileIdx << "]: ["; + std::vector &FileBlocks = FileMap[FileIdx]; + for (uint32_t &FileBlock : FileBlocks) { + if (&FileBlock != &FileBlocks.front()) + outs() << ", "; + outs() << FileBlock; + } + outs() << "]\n"; + } + } + + StringRef DumpFileStr = opts::DumpFileData; + uint32_t DumpFileNum; + if (!DumpFileStr.getAsInteger(/*Radix=*/0U, DumpFileNum) && + DumpFileNum < NumFiles) { + uint32_t FileBytesRead = 0; + uint32_t FileSize = FileSizes[DumpFileNum]; + std::vector &FileBlocks = FileMap[DumpFileNum]; + for (uint32_t &FileBlockAddr : FileBlocks) { + uint64_t FileBlockOffset = blockToOffset(FileBlockAddr, SB->BlockSize); + uint32_t BytesLeftToReadInFile = FileSize - FileBytesRead; + if (BytesLeftToReadInFile == 0) + break; + + uint32_t BytesToReadInBlock = + std::min(BytesLeftToReadInFile, static_cast(SB->BlockSize)); + auto FileBlockData = + StringRef(M.getBufferStart() + FileBlockOffset, BytesToReadInBlock); + if (auto EC = checkOffset(M, FileBlockData)) + reportError(M.getBufferIdentifier(), EC); + + outs() << FileBlockData; + FileBytesRead += FileBlockData.size(); + } + } +} + static void dumpInput(StringRef Path) { + if (opts::DumpHeaders || !opts::DumpFileData.empty()) { + ErrorOr> ErrorOrBuffer = + MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1, + /*RequiresNullTerminator=*/false); + + if (std::error_code EC = ErrorOrBuffer.getError()) + reportError(Path, EC); + + std::unique_ptr &Buffer = ErrorOrBuffer.get(); + + dumpStructure(Buffer->getMemBufferRef()); + + outs().flush(); + return; + } + std::unique_ptr Session; - PDB_ErrorCode Error = - llvm::loadDataForPDB(PDB_ReaderType::DIA, Path, Session); + PDB_ErrorCode Error = loadDataForPDB(PDB_ReaderType::DIA, Path, Session); switch (Error) { case PDB_ErrorCode::Success: break; @@ -159,7 +425,7 @@ Printer.NewLine(); WithColor(Printer, PDB_ColorItem::Identifier).get() << "Size"; - if (!llvm::sys::fs::file_size(FileName, FileSize)) { + if (!sys::fs::file_size(FileName, FileSize)) { Printer << ": " << FileSize << " bytes"; } else { Printer << ": (Unable to obtain file size)"; @@ -256,11 +522,11 @@ PrettyStackTraceProgram X(argc_, argv_); SmallVector argv; - llvm::SpecificBumpPtrAllocator ArgAllocator; - std::error_code EC = llvm::sys::Process::GetArgumentVector( - argv, llvm::makeArrayRef(argv_, argc_), ArgAllocator); + SpecificBumpPtrAllocator ArgAllocator; + std::error_code EC = sys::Process::GetArgumentVector( + argv, makeArrayRef(argv_, argc_), ArgAllocator); if (EC) { - llvm::errs() << "error: couldn't get arguments: " << EC.message() << '\n'; + errs() << "error: couldn't get arguments: " << EC.message() << '\n'; return 1; }