Index: llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBFile.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBFile.h +++ llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBFile.h @@ -67,6 +67,8 @@ Error setBlockData(uint32_t BlockIndex, uint32_t Offset, ArrayRef Data) const override; + ArrayRef getFpmPages() const { return FpmPages; } + ArrayRef getStreamSizes() const { return ContainerLayout.StreamSizes; } @@ -95,6 +97,7 @@ std::unique_ptr Buffer; + std::vector FpmPages; msf::MSFLayout ContainerLayout; std::unique_ptr Info; Index: llvm/trunk/lib/DebugInfo/PDB/Raw/PDBFile.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/PDB/Raw/PDBFile.cpp +++ llvm/trunk/lib/DebugInfo/PDB/Raw/PDBFile.cpp @@ -121,14 +121,41 @@ ContainerLayout.SB = SB; // Initialize Free Page Map. - ContainerLayout.FreePageMap.resize(getBlockSize() * 8); - uint64_t FPMOffset = SB->FreeBlockMapBlock * getBlockSize(); - ArrayRef FPMBlock; - if (auto EC = Buffer->readBytes(FPMOffset, getBlockSize(), FPMBlock)) - return EC; - for (uint32_t I = 0, E = getBlockSize() * 8; I != E; ++I) - if (FPMBlock[I / 8] & (1 << (I % 8))) - ContainerLayout.FreePageMap[I] = true; + ContainerLayout.FreePageMap.resize(SB->NumBlocks); + ArrayRef FpmBytes; + // The Fpm exists either at block 1 or block 2 of the MSF. However, this + // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and + // thusly an equal number of total blocks in the file. For a block size + // of 4KiB (very common), this would yield 32KiB total blocks in file, for a + // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so + // the Fpm is split across the file at `getBlockSize()` intervals. As a + // result, every block whose index is of the form |{1,2} + getBlockSize() * k| + // for any non-negative integer k is an Fpm block. In theory, we only really + // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but + // current versions of the MSF format already expect the Fpm to be arranged + // at getBlockSize() intervals, so we have to be compatible. + // See the function fpmPn() for more information: + // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 + + uint32_t BlocksPerSection = getBlockSize(); + uint64_t FpmBlockOffset = SB->FreeBlockMapBlock; + uint32_t BlocksRemaining = getBlockCount(); + for (uint32_t SI = 0; BlocksRemaining > 0; ++SI) { + uint32_t FpmFileOffset = FpmBlockOffset * getBlockSize(); + + if (auto EC = Buffer->readBytes(FpmFileOffset, getBlockSize(), FpmBytes)) + return EC; + + uint32_t BlocksThisSection = std::min(BlocksRemaining, BlocksPerSection); + for (uint32_t I = 0; I < BlocksThisSection; ++I) { + uint32_t BI = I + BlocksPerSection * SI; + + if (FpmBytes[I / 8] & (1 << (I % 8))) + ContainerLayout.FreePageMap[BI] = true; + } + BlocksRemaining -= BlocksThisSection; + FpmBlockOffset += BlocksPerSection; + } Reader.setOffset(getBlockMapOffset()); if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks, Index: llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test =================================================================== --- llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test +++ llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test @@ -2,7 +2,7 @@ ; RUN: -sym-record-bytes -publics -module-files -stream-name=/names \ ; RUN: -stream-summary -stream-blocks -ipi-records -ipi-record-bytes \ ; RUN: -section-contribs -section-map -section-headers -line-info \ -; RUN: -tpi-hash -fpo -fpm %p/Inputs/empty.pdb | FileCheck -check-prefix=EMPTY %s +; RUN: -tpi-hash -fpo -page-stats %p/Inputs/empty.pdb | FileCheck -check-prefix=EMPTY %s ; RUN: llvm-pdbdump raw -all %p/Inputs/empty.pdb | FileCheck -check-prefix=ALL %s ; RUN: llvm-pdbdump raw -headers -stream-name=/names -modules -module-files \ ; RUN: %p/Inputs/big-read.pdb | FileCheck -check-prefix=BIG %s @@ -38,7 +38,10 @@ ; EMPTY-NEXT: Stream 15: [TPI Hash] (308 bytes) ; EMPTY-NEXT: Stream 16: [IPI Hash] (68 bytes) ; EMPTY-NEXT: ] -; EMPTY-NEXT: Used Page Map: [0, 1, 2, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] +; EMPTY-NEXT: Msf Free Pages: [3, 4, 5, 8, 9] +; EMPTY-NEXT: Orphaned Pages: [] +; EMPTY-NEXT: Multiply Used Pages: [] +; EMPTY-NEXT: Use After Free Pages: [8] ; EMPTY-NEXT: StreamBlocks [ ; EMPTY-NEXT: Stream 0: [8] ; EMPTY-NEXT: Stream 1: [19] @@ -974,7 +977,10 @@ ; ALL: Stream 15: [TPI Hash] (308 bytes) ; ALL: Stream 16: [IPI Hash] (68 bytes) ; ALL: ] -; ALL: Used Page Map: [0, 1, 2, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] +; ALL: Msf Free Pages: [3, 4, 5, 8, 9] +; ALL: Orphaned Pages: [] +; ALL: Multiply Used Pages: [] +; ALL: Use After Free Pages: [8] ; ALL: StreamBlocks [ ; ALL: Stream 0: [8] ; ALL: Stream 1: [19] Index: llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.h =================================================================== --- llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.h +++ llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.h @@ -16,6 +16,7 @@ #include "llvm/Support/ScopedPrinter.h" namespace llvm { +class BitVector; namespace pdb { class LLVMOutputStyle : public OutputStyle { public: @@ -39,6 +40,8 @@ Error dumpSectionHeaders(); Error dumpFpoStream(); + void dumpBitVector(StringRef Name, const BitVector &V); + void flush(); PDBFile &File; Index: llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.cpp =================================================================== --- llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.cpp +++ llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.cpp @@ -35,6 +35,48 @@ using namespace llvm::msf; using namespace llvm::pdb; +namespace { +struct PageStats { + explicit PageStats(const BitVector &FreePages) + : Upm(FreePages), ActualUsedPages(FreePages.size()), + MultiUsePages(FreePages.size()), UseAfterFreePages(FreePages.size()) { + const_cast(Upm).flip(); + // To calculate orphaned pages, we start with the set of pages that the + // MSF thinks are used. Each time we find one that actually *is* used, + // we unset it. Whichever bits remain set at the end are orphaned. + OrphanedPages = Upm; + } + + // The inverse of the MSF File's copy of the Fpm. The basis for which we + // determine the allocation status of each page. + const BitVector Upm; + + // Pages which are marked as used in the FPM and are used at least once. + BitVector ActualUsedPages; + + // Pages which are marked as used in the FPM but are used more than once. + BitVector MultiUsePages; + + // Pages which are marked as used in the FPM but are not used at all. + BitVector OrphanedPages; + + // Pages which are marked free in the FPM but are used. + BitVector UseAfterFreePages; +}; +} + +static void recordKnownUsedPage(PageStats &Stats, uint32_t UsedIndex) { + if (Stats.Upm.test(UsedIndex)) { + if (Stats.ActualUsedPages.test(UsedIndex)) + Stats.MultiUsePages.set(UsedIndex); + Stats.ActualUsedPages.set(UsedIndex); + Stats.OrphanedPages.reset(UsedIndex); + } else { + // The MSF doesn't think this page is used, but it is. + Stats.UseAfterFreePages.set(UsedIndex); + } +} + static void printSectionOffset(llvm::raw_ostream &OS, const SectionOffset &Off) { OS << Off.Off << ", " << Off.Isect; @@ -238,21 +280,53 @@ } Error LLVMOutputStyle::dumpFreePageMap() { - if (!opts::raw::DumpFreePageMap) + if (!opts::raw::DumpPageStats) return Error::success(); - const BitVector &FPM = File.getMsfLayout().FreePageMap; - - std::vector Vec; - for (uint32_t I = 0, E = FPM.size(); I != E; ++I) - if (!FPM[I]) - Vec.push_back(I); - // Prints out used pages instead of free pages because + // Start with used pages instead of free pages because // the number of free pages is far larger than used pages. - P.printList("Used Page Map", Vec); + BitVector FPM = File.getMsfLayout().FreePageMap; + + PageStats PS(FPM); + + recordKnownUsedPage(PS, 0); // MSF Super Block + + uint32_t BlocksPerSection = File.getBlockSize(); + uint32_t NumSections = + llvm::alignTo(File.getBlockCount(), BlocksPerSection) / BlocksPerSection; + for (uint32_t I = 0; I < NumSections; ++I) { + uint32_t Fpm0 = 1 + BlocksPerSection * I; + // 2 Fpm blocks spaced at `getBlockSize()` block intervals + recordKnownUsedPage(PS, Fpm0); + recordKnownUsedPage(PS, Fpm0 + 1); + } + + recordKnownUsedPage(PS, File.getBlockMapIndex()); // Stream Table + + for (auto DB : File.getDirectoryBlockArray()) { + recordKnownUsedPage(PS, DB); + } + for (auto &SE : File.getStreamMap()) { + for (auto &S : SE) { + recordKnownUsedPage(PS, S); + } + } + + dumpBitVector("Msf Free Pages", FPM); + dumpBitVector("Orphaned Pages", PS.OrphanedPages); + dumpBitVector("Multiply Used Pages", PS.MultiUsePages); + dumpBitVector("Use After Free Pages", PS.UseAfterFreePages); return Error::success(); } +void LLVMOutputStyle::dumpBitVector(StringRef Name, const BitVector &V) { + std::vector Vec; + for (uint32_t I = 0, E = V.size(); I != E; ++I) + if (V[I]) + Vec.push_back(I); + P.printList(Name, Vec); +} + Error LLVMOutputStyle::dumpStreamBlocks() { if (!opts::raw::DumpStreamBlocks) return Error::success(); Index: llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.h =================================================================== --- llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.h +++ llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.h @@ -37,7 +37,7 @@ extern llvm::cl::opt DumpHeaders; extern llvm::cl::opt DumpStreamBlocks; extern llvm::cl::opt DumpStreamSummary; -extern llvm::cl::opt DumpFreePageMap; +extern llvm::cl::opt DumpPageStats; extern llvm::cl::opt DumpTpiHash; extern llvm::cl::opt DumpTpiRecordBytes; extern llvm::cl::opt DumpTpiRecords; Index: llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp =================================================================== --- llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -167,8 +167,10 @@ cl::opt DumpStreamSummary("stream-summary", cl::desc("dump summary of the PDB streams"), cl::cat(MsfOptions), cl::sub(RawSubcommand)); -cl::opt DumpFreePageMap("fpm", cl::desc("dump free page bitmap"), - cl::cat(MsfOptions), cl::sub(RawSubcommand)); +cl::opt DumpPageStats( + "page-stats", + cl::desc("dump allocation stats of the pages in the MSF file"), + cl::cat(MsfOptions), cl::sub(RawSubcommand)); // TYPE OPTIONS cl::opt @@ -544,7 +546,7 @@ opts::raw::DumpPublics = true; opts::raw::DumpSectionHeaders = true; opts::raw::DumpStreamSummary = true; - opts::raw::DumpFreePageMap = true; + opts::raw::DumpPageStats = true; opts::raw::DumpStreamBlocks = true; opts::raw::DumpTpiRecords = true; opts::raw::DumpTpiHash = true;