Index: llvm/include/llvm/DebugInfo/PDB/Raw/LinearIntegerMap.h =================================================================== --- /dev/null +++ llvm/include/llvm/DebugInfo/PDB/Raw/LinearIntegerMap.h @@ -0,0 +1,98 @@ +//===- LinearIntegerMap.h - PDB Linear Map ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_LINEARINTEGERMAP_H +#define LLVM_DEBUGINFO_PDB_RAW_LINEARINTEGERMAP_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/DebugInfo/MSF/StreamArray.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" + +#include +#include + +namespace llvm { +namespace pdb { + +class LinearIntegerMap { + friend class LinearIntegerMapIterator; + struct Header { + support::ulittle32_t Size; + support::ulittle32_t Capacity; + }; + +public: + typedef std::pair Mapping; + LinearIntegerMap(); + + Error load(msf::StreamReader &Stream); + + uint32_t calculateSerializedLength() const; + Error commit(msf::StreamWriter &Writer) const; + + uint32_t capacity() const; + uint32_t size() const; + + LinearIntegerMapIterator begin() const; + LinearIntegerMapIterator end() const; + LinearIntegerMapIterator find(uint32_t K); + + void insertOrUpdate(uint32_t K, uint32_t V); + void remove(uint32_t K); + uint32_t get(uint32_t K); + +private: + std::pair findInternal(uint32_t K); + static uint32_t maxSize(uint32_t capacity); + void growIfAtCapacity(); + static Error readSparseBitVector(msf::StreamReader &Stream, + SparseBitVector<> &V); + static Error writeSparseBitVector(msf::StreamWriter &Writer, + SparseBitVector<> &Vec); + + mutable SparseBitVector<> Present; + mutable SparseBitVector<> Deleted; + + std::vector Keys; + std::vector Values; +}; + +class LinearIntegerMapIterator + : public iterator_facade_base> { +public: + LinearIntegerMapIterator(const LinearIntegerMap &Map); + LinearIntegerMapIterator() {} + + LinearIntegerMapIterator &operator=(const LinearIntegerMapIterator &R); + bool operator==(const LinearIntegerMapIterator &R) const; + const std::pair &operator*() const; + LinearIntegerMapIterator &operator++(); + +private: + void update(); + bool isEnd() const; + + std::pair Item; + const LinearIntegerMap *Map = nullptr; + llvm::SparseBitVector<>::iterator Iter; +}; + +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_RAW_LINEARMAP_H Index: llvm/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h =================================================================== --- llvm/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h +++ llvm/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h @@ -10,11 +10,12 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBNAMEMAPBUILDER_H #define LLVM_DEBUGINFO_PDB_RAW_PDBNAMEMAPBUILDER_H -#include "llvm/ADT/StringMap.h" +#include "llvm/DebugInfo/PDB/Raw/LinearIntegerMap.h" #include "llvm/Support/Error.h" #include #include +#include namespace llvm { namespace msf { @@ -29,14 +30,14 @@ void addMapping(StringRef Name, uint32_t Mapping); - Expected> build(); Error commit(msf::StreamWriter &Writer) const; uint32_t calculateSerializedLength() const; private: - StringMap Map; - uint32_t StringDataBytes = 0; + std::vector Strings; + LinearIntegerMap Map; + uint32_t Offset = 0; }; } // end namespace pdb Index: llvm/lib/DebugInfo/PDB/CMakeLists.txt =================================================================== --- llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -36,6 +36,7 @@ Raw/Hash.cpp Raw/InfoStream.cpp Raw/InfoStreamBuilder.cpp + Raw/LinearIntegerMap.cpp Raw/ModInfo.cpp Raw/ModStream.cpp Raw/NameHashTable.cpp Index: llvm/lib/DebugInfo/PDB/Raw/LinearIntegerMap.cpp =================================================================== --- /dev/null +++ llvm/lib/DebugInfo/PDB/Raw/LinearIntegerMap.cpp @@ -0,0 +1,286 @@ +//===- LinearIntegerMap.cpp - PDB Linear Map --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/LinearIntegerMap.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::pdb; + +LinearIntegerMap::LinearIntegerMap() { + Keys.resize(1); + Values.resize(1); +} + +Error LinearIntegerMap::load(msf::StreamReader &Stream) { + const Header *HAH; + if (auto EC = Stream.readObject(HAH)) + return EC; + if (HAH->Capacity == 0) + return make_error(raw_error_code::corrupt_file, + "Invalid Linear Map Capacity"); + if (HAH->Size > maxSize(HAH->Capacity)) + return make_error(raw_error_code::corrupt_file, + "Invalid Linear Map Size"); + + Keys.resize(HAH->Capacity); + Values.resize(HAH->Capacity); + + if (auto EC = readSparseBitVector(Stream, Present)) + return EC; + if (Present.count() != HAH->Size) + return make_error(raw_error_code::corrupt_file, + "Present bit vector does not match size!"); + + if (auto EC = readSparseBitVector(Stream, Deleted)) + return EC; + if (Present.intersects(Deleted)) + return make_error(raw_error_code::corrupt_file, + "Present bit vector interesects deleted!"); + + for (uint32_t I = 0; I < HAH->Capacity; ++I) { + if (!Present.test(I)) + continue; + + if (auto EC = Stream.readInteger(Keys[I])) + return EC; + if (auto EC = Stream.readInteger(Values[I])) + return EC; + } + return Error::success(); +} + +uint32_t LinearIntegerMap::calculateSerializedLength() const { + uint32_t Size = sizeof(Header); + + // Figure out the number of 32-bit words required to hold all of the bits in + // the present and deleted sets. The only way to do this is to find the + // largest value in each set, which is O(n) in the number of set bits. + uint32_t LargestP = 0; + uint32_t LargestD = 0; + for (uint32_t P : Present) + LargestP = P; + for (uint32_t D : Deleted) + LargestD = D; + uint32_t PresentWords = + alignTo(LargestP, sizeof(uint32_t)) / sizeof(uint32_t); + uint32_t DeletedWords = + alignTo(LargestD, sizeof(uint32_t)) / sizeof(uint32_t); + + // Present bit set number of words, followed by that many actual words. + Size += sizeof(uint32_t); + Size += PresentWords * sizeof(uint32_t); + + // Deleted bit set number of words, followed by that many actual words. + Size += sizeof(uint32_t); + Size += DeletedWords * sizeof(uint32_t); + + // One (Key, Value) pair for each entry Present. + Size += 2 * sizeof(uint32_t) * Present.count(); + + return Size; +} + +Error LinearIntegerMap::commit(msf::StreamWriter &Writer) const { + Header H; + H.Size = size(); + H.Capacity = capacity(); + if (auto EC = Writer.writeObject(H)) + return EC; + + if (auto EC = writeSparseBitVector(Writer, Present)) + return EC; + + if (auto EC = writeSparseBitVector(Writer, Deleted)) + return EC; + + for (const auto &Entry : *this) { + if (auto EC = Writer.writeInteger(Entry.first)) + return EC; + if (auto EC = Writer.writeInteger(Entry.second)) + return EC; + } + return Error::success(); +} + +uint32_t LinearIntegerMap::capacity() const { return Keys.size(); } +uint32_t LinearIntegerMap::size() const { return Present.count(); } + +LinearIntegerMapIterator LinearIntegerMap::begin() const { + return LinearIntegerMapIterator(*this); +} +LinearIntegerMapIterator LinearIntegerMap::end() const { + return LinearIntegerMapIterator(); +} + +LinearIntegerMapIterator LinearIntegerMap::find(uint32_t K) { + return findInternal(K).second; +} + +void LinearIntegerMap::insertOrUpdate(uint32_t K, uint32_t V) { + auto Entry = findInternal(K); + if (Entry.second != end()) { + assert(Present.test(Entry.first)); + assert(!Deleted.test(Entry.first)); + // We're updating, no need to do anything special. + Values[Entry.first] = V; + return; + } + + // We're inserting, try to put it in the first deleted location. + int D = Deleted.find_first(); + if (D != -1) { + Keys[D] = K; + Values[D] = V; + Present.set(D); + Deleted.reset(D); + return; + } + + // No deleted locations exist, append it to the end, making sure to grow if + // necessary. + uint32_t S = size(); + Keys[S] = K; + Values[S] = V; + Present.set(S); + growIfAtCapacity(); + + return; +} + +void LinearIntegerMap::remove(uint32_t K) { + auto Entry = findInternal(K); + // It wasn't here to begin with, just exit. + if (Entry.second == end()) { + assert(!Present.test(Entry.first)); + return; + } + + assert(Present.test(Entry.first) && !Deleted.test(Entry.first)); + Present.reset(Entry.first); + Deleted.set(Entry.first); +} + +uint32_t LinearIntegerMap::get(uint32_t K) { + auto I = find(K); + assert(I != end()); + return (*I).second; +} + +std::pair +LinearIntegerMap::findInternal(uint32_t K) { + uint32_t Idx = 0; + for (auto IT = begin(); IT != end(); ++IT, ++Idx) { + if ((*IT).first == K) + return std::make_pair(Idx, IT); + } + + return std::make_pair(size(), end()); +} + +uint32_t LinearIntegerMap::maxSize(uint32_t capacity) { + return capacity * 2 / 3 + 1; +} + +void LinearIntegerMap::growIfAtCapacity() { + if (size() < maxSize(capacity())) + return; + uint32_t NewCapacity = capacity() * 3 / 2 + 1; + Keys.resize(NewCapacity); + Values.resize(NewCapacity); +} + +Error LinearIntegerMap::readSparseBitVector(msf::StreamReader &Stream, + SparseBitVector<> &V) { + uint32_t NumWords; + if (auto EC = Stream.readInteger(NumWords)) + return joinErrors( + std::move(EC), + make_error(raw_error_code::corrupt_file, + "Expected Linear map number of words")); + + for (uint32_t I = 0; I != NumWords; ++I) { + uint32_t Word; + if (auto EC = Stream.readInteger(Word)) + return joinErrors(std::move(EC), + make_error(raw_error_code::corrupt_file, + "Expected Linear map word")); + for (unsigned Idx = 0; Idx < 32; ++Idx) + if (Word & (1U << Idx)) + V.set((I * 32) + Idx); + } + return Error::success(); +} + +Error LinearIntegerMap::writeSparseBitVector(msf::StreamWriter &Writer, + SparseBitVector<> &Vec) { + uint32_t Largest = 0; + for (uint32_t V : Vec) + Largest = V; + uint32_t NumWords = alignTo(Largest, sizeof(uint32_t)) / sizeof(uint32_t); + if (auto EC = Writer.writeInteger(NumWords)) + return joinErrors( + std::move(EC), + make_error(raw_error_code::corrupt_file, + "Could not write linear map number of words")); + + uint32_t Idx = 0; + for (uint32_t I = 0; I != NumWords; ++I) { + uint32_t Word = 0; + for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) { + if (Vec.test(Idx)) + Word |= (1 << WordIdx); + } + if (auto EC = Writer.writeInteger(Word)) + return joinErrors(std::move(EC), make_error( + raw_error_code::corrupt_file, + "Could not write linear map word")); + } + return Error::success(); +} + +LinearIntegerMapIterator::LinearIntegerMapIterator(const LinearIntegerMap &Map) + : Map(&Map), Iter(Map.Present.begin()) { + update(); +} + +LinearIntegerMapIterator &LinearIntegerMapIterator:: +operator=(const LinearIntegerMapIterator &R) { + Map = R.Map; + return *this; +} + +bool LinearIntegerMapIterator:: +operator==(const LinearIntegerMapIterator &R) const { + if (isEnd() && R.isEnd()) + return true; + + return (Map == R.Map) && (Iter == R.Iter); +} + +const std::pair &LinearIntegerMapIterator:: +operator*() const { + return Item; +} + +LinearIntegerMapIterator &LinearIntegerMapIterator::operator++() { + ++Iter; + update(); + return *this; +} + +void LinearIntegerMapIterator::update() { + if (isEnd()) + return; + Item.first = Map->Keys[*Iter]; + Item.second = Map->Values[*Iter]; +} +bool LinearIntegerMapIterator::isEnd() const { + return !Map || Iter == Map->Present.end(); +} Index: llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp =================================================================== --- llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp +++ llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp @@ -7,12 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/PDB/Raw/NameMap.h" + #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/MSF/StreamReader.h" -#include "llvm/DebugInfo/PDB/Raw/NameMap.h" +#include "llvm/DebugInfo/PDB/Raw/LinearIntegerMap.h" #include "llvm/DebugInfo/PDB/Raw/RawError.h" #include "llvm/Support/Error.h" #include @@ -25,123 +27,35 @@ NameMap::NameMap() = default; Error NameMap::load(StreamReader &Stream) { - // This is some sort of weird string-set/hash table encoded in the stream. - // It starts with the number of bytes in the table. - uint32_t NumberOfBytes; - if (auto EC = Stream.readInteger(NumberOfBytes)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map length")); - if (Stream.bytesRemaining() < NumberOfBytes) - return make_error(raw_error_code::corrupt_file, - "Invalid name map length"); - - // Following that field is the starting offset of strings in the name table. - uint32_t StringsOffset = Stream.getOffset(); - Stream.setOffset(StringsOffset + NumberOfBytes); - - // This appears to be equivalent to the total number of strings *actually* - // in the name table. - uint32_t HashSize; - if (auto EC = Stream.readInteger(HashSize)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map hash size")); - - // This appears to be an upper bound on the number of strings in the name - // table. - uint32_t MaxNumberOfStrings; - if (auto EC = Stream.readInteger(MaxNumberOfStrings)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map max strings")); - - if (MaxNumberOfStrings > (UINT32_MAX / sizeof(uint32_t))) - return make_error(raw_error_code::corrupt_file, - "Implausible number of strings"); - - const uint32_t MaxNumberOfWords = UINT32_MAX / (sizeof(uint32_t) * 8); - - // This appears to be a hash table which uses bitfields to determine whether - // or not a bucket is 'present'. - uint32_t NumPresentWords; - if (auto EC = Stream.readInteger(NumPresentWords)) + uint32_t StringBufferSize; + if (auto EC = Stream.readInteger(StringBufferSize)) return joinErrors(std::move(EC), make_error(raw_error_code::corrupt_file, - "Expected name map num words")); + "Expected string buffer size")); - if (NumPresentWords > MaxNumberOfWords) - return make_error(raw_error_code::corrupt_file, - "Number of present words is too large"); + msf::ReadableStreamRef StringsBuffer; + if (auto EC = Stream.readStreamRef(StringsBuffer, StringBufferSize)) + return EC; - SparseBitVector<> Present; - for (uint32_t I = 0; I != NumPresentWords; ++I) { - uint32_t Word; - if (auto EC = Stream.readInteger(Word)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map word")); - for (unsigned Idx = 0; Idx < 32; ++Idx) - if (Word & (1U << Idx)) - Present.set((I * 32) + Idx); - } - - // This appears to be a hash table which uses bitfields to determine whether - // or not a bucket is 'deleted'. - uint32_t NumDeletedWords; - if (auto EC = Stream.readInteger(NumDeletedWords)) - return joinErrors( - std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map num deleted words")); + LinearIntegerMap OffsetIndexMap; + if (auto EC = OffsetIndexMap.load(Stream)) + return EC; - if (NumDeletedWords > MaxNumberOfWords) - return make_error(raw_error_code::corrupt_file, - "Number of deleted words is too large"); - - SparseBitVector<> Deleted; - for (uint32_t I = 0; I != NumDeletedWords; ++I) { - uint32_t Word; - if (auto EC = Stream.readInteger(Word)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map word")); - for (unsigned Idx = 0; Idx < 32; ++Idx) - if (Word & (1U << Idx)) - Deleted.set((I * 32) + Idx); - } - - for (unsigned I : Present) { - // For all present entries, dump out their mapping. - (void)I; - - // This appears to be an offset relative to the start of the strings. - // It tells us where the null-terminated string begins. - uint32_t NameOffset; - if (auto EC = Stream.readInteger(NameOffset)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map name offset")); - - // This appears to be a stream number into the stream directory. - uint32_t NameIndex; - if (auto EC = Stream.readInteger(NameIndex)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map name index")); + uint32_t NameOffset; + uint32_t NameIndex; + for (const auto &Entry : OffsetIndexMap) { + std::tie(NameOffset, NameIndex) = Entry; // Compute the offset of the start of the string relative to the stream. - uint32_t StringOffset = StringsOffset + NameOffset; - uint32_t OldOffset = Stream.getOffset(); + msf::StreamReader NameReader(StringsBuffer); + NameReader.setOffset(NameOffset); // Pump out our c-string from the stream. StringRef Str; - Stream.setOffset(StringOffset); - if (auto EC = Stream.readZeroString(Str)) + if (auto EC = NameReader.readZeroString(Str)) return joinErrors(std::move(EC), make_error(raw_error_code::corrupt_file, "Expected name map name")); - Stream.setOffset(OldOffset); // Add this to a string-map from name to stream number. Mapping.insert({Str, NameIndex}); } Index: llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp =================================================================== --- llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp +++ llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp @@ -22,87 +22,39 @@ NameMapBuilder::NameMapBuilder() = default; void NameMapBuilder::addMapping(StringRef Name, uint32_t Mapping) { - StringDataBytes += Name.size() + 1; - Map.insert({Name, Mapping}); -} - -Expected> NameMapBuilder::build() { - auto Result = llvm::make_unique(); - Result->Mapping = Map; - return std::move(Result); + Strings.push_back(Name); + Map.insertOrUpdate(Offset, Mapping); + Offset += Name.size() + 1; } uint32_t NameMapBuilder::calculateSerializedLength() const { uint32_t TotalLength = 0; - TotalLength += sizeof(support::ulittle32_t); // StringDataBytes value - TotalLength += StringDataBytes; // actual string data - - TotalLength += sizeof(support::ulittle32_t); // Hash Size - TotalLength += sizeof(support::ulittle32_t); // Max Number of Strings - TotalLength += sizeof(support::ulittle32_t); // Num Present Words - // One bitmask word for each present entry - TotalLength += Map.size() * sizeof(support::ulittle32_t); - TotalLength += sizeof(support::ulittle32_t); // Num Deleted Words - - // For each present word, which we are treating as equivalent to the number of - // entries in the table, we have a pair of integers. An offset into the - // string data, and a corresponding stream number. - TotalLength += Map.size() * 2 * sizeof(support::ulittle32_t); + // Number of bytes of string data. + TotalLength += sizeof(support::ulittle32_t); + // Followed by that many actual bytes of string data. + TotalLength += Offset; + // Followed by the mapping from Name to Index. + TotalLength += Map.calculateSerializedLength(); return TotalLength; } Error NameMapBuilder::commit(msf::StreamWriter &Writer) const { - // The first field is the number of bytes of string data. So add - // up the length of all strings plus a null terminator for each - // one. - uint32_t NumBytes = 0; - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - NumBytes += B->getKeyLength() + 1; - } - - if (auto EC = Writer.writeInteger(NumBytes)) // Number of bytes of string data - return EC; - // Now all of the string data itself. - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - if (auto EC = Writer.writeZeroString(B->getKey())) - return EC; - } - - if (auto EC = Writer.writeInteger(Map.size())) // Hash Size + // The first field is the number of bytes of string data. We've already been + // keeping a running total of this in `Offset`. + if (auto EC = Writer.writeInteger(Offset)) // Number of bytes of string data return EC; - if (auto EC = Writer.writeInteger(Map.size())) // Max Number of Strings - return EC; - - if (auto EC = Writer.writeInteger(Map.size())) // Num Present Words - return EC; - - // For each entry in the mapping, write a bit mask which represents a bucket - // to store it in. We don't use this, so the value we write isn't important - // to us, it just has to be there. - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - if (auto EC = Writer.writeInteger(1U)) + // Now all of the string data itself. + for (auto S : Strings) { + if (auto EC = Writer.writeZeroString(S)) return EC; } - if (auto EC = Writer.writeInteger(0U)) // Num Deleted Words + // And finally the Linear Map. + if (auto EC = Map.commit(Writer)) return EC; - // Mappings of each word. - uint32_t OffsetSoFar = 0; - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - // This is a list of key value pairs where the key is the offset into the - // strings buffer, and the value is a stream number. Write each pair. - if (auto EC = Writer.writeInteger(OffsetSoFar)) - return EC; - - if (auto EC = Writer.writeInteger(B->second)) - return EC; - - OffsetSoFar += B->getKeyLength() + 1; - } - return Error::success(); }