Index: clang-tools-extra/clangd/index/Ref.h =================================================================== --- clang-tools-extra/clangd/index/Ref.h +++ clang-tools-extra/clangd/index/Ref.h @@ -12,10 +12,13 @@ #include "SymbolID.h" #include "SymbolLocation.h" #include "clang/Index/IndexSymbol.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/iterator.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include @@ -68,26 +71,22 @@ /// Filenames are deduplicated. class RefSlab { public: - // Refs are stored in order. using value_type = std::pair>; - using const_iterator = std::vector::const_iterator; + class const_iterator; using iterator = const_iterator; RefSlab() = default; RefSlab(RefSlab &&Slab) = default; RefSlab &operator=(RefSlab &&RHS) = default; - const_iterator begin() const { return Refs.begin(); } - const_iterator end() const { return Refs.end(); } + const_iterator begin() const; + const_iterator end() const; /// Gets the number of symbols. - size_t size() const { return Refs.size(); } - size_t numRefs() const { return NumRefs; } - bool empty() const { return Refs.empty(); } + size_t size() const { return Symbols.size(); } + size_t numRefs() const { return Refs.size(); } + bool empty() const { return Symbols.empty(); } - size_t bytes() const { - return sizeof(*this) + Arena.getTotalMemory() + - sizeof(value_type) * Refs.capacity(); - } + size_t bytes() const { return sizeof(*this) + Arena.getTotalMemory(); } /// RefSlab::Builder is a mutable container that can 'freeze' to RefSlab. class Builder { @@ -101,20 +100,70 @@ private: llvm::BumpPtrAllocator Arena; llvm::UniqueStringSaver UniqueStrings; // Contents on the arena. - llvm::DenseMap> Refs; + // There are many refs, so insert() is on the hot path when loading + // indexes from disk. We use a flat structure to make it cheap. + std::vector> Refs; }; private: - RefSlab(std::vector Refs, llvm::BumpPtrAllocator Arena, - size_t NumRefs) - : Arena(std::move(Arena)), Refs(std::move(Refs)), NumRefs(NumRefs) {} + struct StoredSymbol { + SymbolID ID; + unsigned NumRefs; + }; + RefSlab(llvm::ArrayRef Symbols, llvm::ArrayRef Refs, + llvm::BumpPtrAllocator Arena) + : Arena(std::move(Arena)), Symbols(Symbols), Refs(std::move(Refs)) {} + // StoredSymbols are (SymbolID, #Refs) + // Refs are stored in the same order. llvm::BumpPtrAllocator Arena; - std::vector Refs; - /// Number of all references. - size_t NumRefs = 0; + llvm::ArrayRef Symbols; + llvm::ArrayRef Refs; }; +// Forward iterator for RefSlab: iterates over pair>. +class RefSlab::const_iterator + : public llvm::iterator_facade_base< + const_iterator, std::forward_iterator_tag, RefSlab::value_type> { + // We traverse the parallel arrays of StoredSymbol and Refs in parallel. + // When advancing, the StoredSymbol tells us how many Refs to skip over. + const RefSlab::StoredSymbol *SymIt; + const Ref *RefPtr; + // This is storage so dereferencing can yield value_type&. Populated by *. + RefSlab::value_type Value; + + const_iterator(const RefSlab::StoredSymbol *SymIt, const Ref *RefPtr) + : SymIt(SymIt), RefPtr(RefPtr) {} + friend RefSlab; + +public: + const_iterator &operator=(const const_iterator &R) = default; + bool operator==(const const_iterator &other) const { + return SymIt == other.SymIt; + } + // iterator_facade_base requires a non-const deference operator, even though + // mutation isn't meaningful here. + RefSlab::value_type &operator*() { + Value = {SymIt->ID, llvm::makeArrayRef(RefPtr, SymIt->NumRefs)}; + return Value; + } + const RefSlab::value_type &operator*() const { + return const_cast(this)->operator*(); + } + const const_iterator &operator++() { + RefPtr += SymIt->NumRefs; + ++SymIt; + return *this; + } +}; + +inline RefSlab::const_iterator RefSlab::begin() const { + return const_iterator(Symbols.begin(), Refs.data()); +} +inline RefSlab::const_iterator RefSlab::end() const { + return const_iterator(Symbols.end(), nullptr); +} + } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/index/Ref.cpp =================================================================== --- clang-tools-extra/clangd/index/Ref.cpp +++ clang-tools-extra/clangd/index/Ref.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "Ref.h" +#include "llvm/ADT/STLExtras.h" namespace clang { namespace clangd { @@ -32,27 +33,65 @@ } void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) { - auto &M = Refs[ID]; - if (M.count(S)) - return; - Ref R = S; - R.Location.FileURI = - UniqueStrings.save(R.Location.FileURI).data(); - M.insert(std::move(R)); + Refs.emplace_back(ID, S); + Refs.back().second.Location.FileURI = + UniqueStrings.save(Refs.back().second.Location.FileURI).data(); +} + +// Allows comparisons that use pointer semantics for the (interned) file URIs. +// This is considerably faster than operator<, and ref slabs are a hot path. +static auto Key(const std::pair &L) + -> decltype(std::tie(L.first, L.second.Location.Start, + L.second.Location.End, L.second.Location.FileURI, + L.second.Kind)) { + return std::tie(L.first, L.second.Location.Start, L.second.Location.End, + L.second.Location.FileURI, L.second.Kind); } RefSlab RefSlab::Builder::build() && { - // We can reuse the arena, as it only has unique strings and we need them all. - // Reallocate refs on the arena to reduce waste and indirections when reading. - std::vector>> Result; - Result.reserve(Refs.size()); - size_t NumRefs = 0; - for (auto &Sym : Refs) { - std::vector SymRefs(Sym.second.begin(), Sym.second.end()); - NumRefs += SymRefs.size(); - Result.emplace_back(Sym.first, llvm::ArrayRef(SymRefs).copy(Arena)); + if (Refs.empty()) + return RefSlab(); + // Sort entries to group equal symbols, and refs within them. + llvm::sort(Refs, + [](const std::pair &L, + const std::pair &R) { return Key(L) < Key(R); }); + + // Count distinct symbols and refs so we can size arrays correctly. + std::vector NewSymbol(Refs.size(), false); + std::vector NewRef(Refs.size(), false); + NewSymbol.front() = true; + NewRef.front() = true; + unsigned NSymbols = 1, NRefs = 1; + for (unsigned I = 1, E = Refs.size(); I < E; ++I) { + if (Key(Refs[I - 1]) != Key(Refs[I])) { + ++NRefs; + NewRef[I] = true; + if (!(Refs[I - 1].first == Refs[I].first)) { + ++NSymbols; + NewSymbol[I] = true; + } + } } - return RefSlab(std::move(Result), std::move(Arena), NumRefs); + assert(NSymbols == llvm::count(NewSymbol, true)); + assert(NRefs == llvm::count(NewRef, true)); + + // Allocate storage for the results. + StoredSymbol *SymbolsBegin = Arena.Allocate(NSymbols), + *SymbolsEnd = SymbolsBegin; + Ref *RefsBegin = Arena.Allocate(NRefs), *RefsEnd = RefsBegin; + + // Copy symbols and their distinct refs to the output, counting as we go. + for (unsigned I = 0, E = Refs.size(); I < E; ++I) { + if (!NewRef[I]) + continue; + new (RefsEnd++) Ref(Refs[I].second); + if (NewSymbol[I]) + new (SymbolsEnd++) StoredSymbol{Refs[I].first, 0}; + ++(SymbolsEnd - 1)->NumRefs; + } + + return RefSlab({SymbolsBegin, SymbolsEnd}, {RefsBegin, RefsEnd}, + std::move(Arena)); } } // namespace clangd