Index: clangd/index/FileIndex.cpp =================================================================== --- clangd/index/FileIndex.cpp +++ clangd/index/FileIndex.cpp @@ -54,7 +54,7 @@ for (const auto &FileAndSlab : FileToSlabs) { Snap->KeepAlive.push_back(FileAndSlab.second); for (const auto &Iter : *FileAndSlab.second) - Snap->Pointers.push_back(&Iter.second); + Snap->Pointers.push_back(&Iter); } } auto *Pointers = &Snap->Pointers; Index: clangd/index/Index.h =================================================================== --- clangd/index/Index.h +++ clangd/index/Index.h @@ -13,9 +13,9 @@ #include "../Context.h" #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSet.h" #include #include @@ -49,6 +49,9 @@ bool operator==(const SymbolID &Sym) const { return HashValue == Sym.HashValue; } + bool operator<(const SymbolID &Sym) const { + return HashValue < Sym.HashValue; + } private: friend llvm::hash_code hash_value(const SymbolID &ID) { @@ -70,6 +73,31 @@ // "<<" operator. void operator>>(llvm::StringRef HexStr, SymbolID &ID); +} // namespace clangd +} // namespace clang +namespace llvm { +// Support SymbolIDs as DenseMap keys. +template <> struct DenseMapInfo { + static inline clang::clangd::SymbolID getEmptyKey() { + static clang::clangd::SymbolID EmptyKey("EMPTYKEY"); + return EmptyKey; + } + static inline clang::clangd::SymbolID getTombstoneKey() { + static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY"); + return TombstoneKey; + } + static unsigned getHashValue(const clang::clangd::SymbolID &Sym) { + return hash_value(Sym); + } + static bool isEqual(const clang::clangd::SymbolID &LHS, + const clang::clangd::SymbolID &RHS) { + return LHS == RHS; + } +}; +} // namespace llvm +namespace clang { +namespace clangd { + // The class presents a C++ symbol, e.g. class, function. // // WARNING: Symbols do not own much of their underlying data - typically strings @@ -102,11 +130,11 @@ // FIXME: add code completion information. }; -// A symbol container that stores a set of symbols. The container will maintain -// the lifetime of the symbols. +// An immutable symbol container that stores a set of symbols. +// The container will maintain the lifetime of the symbols. class SymbolSlab { public: - using const_iterator = llvm::DenseMap::const_iterator; + using const_iterator = std::vector::const_iterator; SymbolSlab() = default; @@ -114,26 +142,44 @@ const_iterator end() const; const_iterator find(const SymbolID &SymID) const; - // Once called, no more symbols would be added to the SymbolSlab. This - // operation is irreversible. - void freeze(); + size_t size() const { return Symbols.size(); } + // Estimates the total memory usage. + size_t bytes() const { + return sizeof(*this) + Arena.getTotalMemory() + + Symbols.capacity() * sizeof(Symbol); + } - // Adds the symbol to this slab. - // This is a deep copy: underlying strings will be owned by the slab. - void insert(const Symbol& S); + // SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab. + // The frozen SymbolSlab will use less memory. + class Builder { + public: + // Adds a symbol, overwriting any existing one with the same ID. + // This is a deep copy: underlying strings will be owned by the slab. + void insert(const Symbol& S); + + // Returns the symbol with an ID, if it exists. Valid until next insert(). + const Symbol* find(const SymbolID &ID) { + auto I = SymbolIndex.find(ID); + return I == SymbolIndex.end() ? nullptr : &Symbols[I->second]; + } + + // Consumes the builder to finalize the slab. + SymbolSlab build() &&; + + private: + llvm::BumpPtrAllocator Arena; + // Intern table for strings. Contents are on the arena. + llvm::DenseSet Strings; + std::vector Symbols; + llvm::DenseMap SymbolIndex; + }; private: - // Replaces S with a reference to the same string, owned by this slab. - void intern(llvm::StringRef &S) { - S = S.empty() ? llvm::StringRef() : Strings.insert(S).first->getKey(); - } - - bool Frozen = false; + SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector Symbols) + : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} - // Intern table for strings. Not StringPool as we don't refcount, just insert. - // We use BumpPtrAllocator to avoid lots of tiny allocations for nodes. - llvm::StringSet Strings; - llvm::DenseMap Symbols; + llvm::BumpPtrAllocator Arena; + std::vector Symbols; }; struct FuzzyFindRequest { @@ -174,27 +220,4 @@ } // namespace clangd } // namespace clang - -namespace llvm { - -template <> struct DenseMapInfo { - static inline clang::clangd::SymbolID getEmptyKey() { - static clang::clangd::SymbolID EmptyKey("EMPTYKEY"); - return EmptyKey; - } - static inline clang::clangd::SymbolID getTombstoneKey() { - static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY"); - return TombstoneKey; - } - static unsigned getHashValue(const clang::clangd::SymbolID &Sym) { - return hash_value(Sym); - } - static bool isEqual(const clang::clangd::SymbolID &LHS, - const clang::clangd::SymbolID &RHS) { - return LHS == RHS; - } -}; - -} // namespace llvm - #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H Index: clangd/index/Index.cpp =================================================================== --- clangd/index/Index.cpp +++ clangd/index/Index.cpp @@ -9,20 +9,22 @@ #include "Index.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/SHA1.h" namespace clang { namespace clangd { +using namespace llvm; -SymbolID::SymbolID(llvm::StringRef USR) - : HashValue(llvm::SHA1::hash(arrayRefFromStringRef(USR))) {} +SymbolID::SymbolID(StringRef USR) + : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {} -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID) { - OS << toHex(llvm::toStringRef(ID.HashValue)); +raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) { + OS << toHex(toStringRef(ID.HashValue)); return OS; } -void operator>>(llvm::StringRef Str, SymbolID &ID) { +void operator>>(StringRef Str, SymbolID &ID) { std::string HexString = fromHex(Str); assert(HexString.size() == 20); std::copy(HexString.begin(), HexString.end(), ID.HashValue.begin()); @@ -32,23 +34,57 @@ SymbolSlab::const_iterator SymbolSlab::end() const { return Symbols.end(); } -SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &SymID) const { - return Symbols.find(SymID); +SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const { + auto It = std::lower_bound(Symbols.begin(), Symbols.end(), ID, + [](const Symbol &S, const SymbolID &I) { + return S.ID == I; + }); + if (It != Symbols.end() || It->ID == ID) + return It; + return Symbols.end(); } -void SymbolSlab::freeze() { Frozen = true; } +// Replace S with a reference to the same string owned by the arena. +static void intern(StringRef &S, DenseSet &StringTable, + BumpPtrAllocator &Arena) { + auto R = StringTable.insert(S); + if (R.second) { // New entry added to the table, we need to copy the string. + char *Data = Arena.Allocate(S.size()); + memcpy(Data, S.data(), S.size()); + *R.first = StringRef(Data, S.size()); + } + S = *R.first; +} + +// Copy the underlying data of the symbol into a strings table. +void own(Symbol &S, DenseSet &Strings, BumpPtrAllocator &Arena) { + auto Intern = [&](StringRef &S) { intern(S, Strings, Arena); }; -void SymbolSlab::insert(const Symbol &S) { - assert(!Frozen && "Can't insert a symbol after the slab has been frozen!"); - auto ItInserted = Symbols.try_emplace(S.ID, S); - if (!ItInserted.second) - return; - auto &Sym = ItInserted.first->second; + Intern(S.Name); + Intern(S.Scope); + Intern(S.CanonicalDeclaration.FilePath); +} + +void SymbolSlab::Builder::insert(const Symbol &S) { + auto R = SymbolIndex.try_emplace(S.ID, Symbols.size()); + if (R.second) { + Symbols.push_back(S); + own(Symbols.back(), Strings, Arena); + } else + own(Symbols[R.first->second] = S, Strings, Arena); +} - // We inserted a new symbol, so copy the underlying data. - intern(Sym.Name); - intern(Sym.Scope); - intern(Sym.CanonicalDeclaration.FilePath); +SymbolSlab SymbolSlab::Builder::build() && { + Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit. + // Sort symbols so the slab can binary search over them. + std::sort(Symbols.begin(), Symbols.end(), + [](const Symbol &L, const Symbol &R) { return L.ID < R.ID; }); + // We may have unused strings, so copy the used stuff over to another arena. + BumpPtrAllocator Arena; + DenseSet Strings; + for (auto &S : Symbols) + own(S, Strings, Arena); + return SymbolSlab(std::move(Arena), std::move(Symbols)); } } // namespace clangd Index: clangd/index/SymbolCollector.h =================================================================== --- clangd/index/SymbolCollector.h +++ clangd/index/SymbolCollector.h @@ -29,13 +29,11 @@ unsigned Offset, index::IndexDataConsumer::ASTNodeInfo ASTNode) override; - void finish() override; - - SymbolSlab takeSymbols() { return std::move(Symbols); } + SymbolSlab takeSymbols() { return std::move(Symbols).build(); } private: // All Symbols collected from the AST. - SymbolSlab Symbols; + SymbolSlab::Builder Symbols; }; } // namespace clangd Index: clangd/index/SymbolCollector.cpp =================================================================== --- clangd/index/SymbolCollector.cpp +++ clangd/index/SymbolCollector.cpp @@ -91,7 +91,7 @@ std::string USR(Buff.data(), Buff.size()); auto ID = SymbolID(USR); - if (Symbols.find(ID) != Symbols.end()) + if (Symbols.find(ID) != nullptr) return true; auto &SM = ND->getASTContext().getSourceManager(); @@ -107,7 +107,5 @@ return true; } -void SymbolCollector::finish() { Symbols.freeze(); } - } // namespace clangd } // namespace clang Index: clangd/index/SymbolYAML.cpp =================================================================== --- clangd/index/SymbolYAML.cpp +++ clangd/index/SymbolYAML.cpp @@ -127,20 +127,18 @@ std::vector S; llvm::yaml::Input Yin(YAMLContent); Yin >> S; - SymbolSlab Syms; + SymbolSlab::Builder Syms; for (auto& Sym : S) - Syms.insert(std::move(Sym)); - return Syms; + Syms.insert(Sym); + return std::move(Syms).build(); } std::string SymbolToYAML(const SymbolSlab& Symbols) { std::string Str; llvm::raw_string_ostream OS(Str); llvm::yaml::Output Yout(OS); - for (auto &Pair : Symbols) { - Symbol MutableSymbol = Pair.second; - Yout<< MutableSymbol; - } + for (Symbol S : Symbols) // copy: Yout<< requires mutability. + Yout<< S; return OS.str(); } Index: unittests/clangd/CodeCompleteTests.cpp =================================================================== --- unittests/clangd/CodeCompleteTests.cpp +++ unittests/clangd/CodeCompleteTests.cpp @@ -449,6 +449,7 @@ std::vector Pointers; }; auto Snap = std::make_shared(); + SymbolSlab::Builder Slab; for (const auto &Pair : Symbols) { Symbol Sym; Sym.ID = SymbolID(Pair.first); @@ -462,10 +463,11 @@ Sym.Scope = QName.substr(0, Pos); } Sym.SymInfo.Kind = Pair.second; - Snap->Slab.insert(std::move(Sym)); + Slab.insert(Sym); } + Snap->Slab = std::move(Slab).build(); for (auto &Iter : Snap->Slab) - Snap->Pointers.push_back(&Iter.second); + Snap->Pointers.push_back(&Iter); auto S = std::shared_ptr>(std::move(Snap), &Snap->Pointers); I->build(std::move(S)); Index: unittests/clangd/FileIndexTests.cpp =================================================================== --- unittests/clangd/FileIndexTests.cpp +++ unittests/clangd/FileIndexTests.cpp @@ -28,9 +28,11 @@ return Sym; } -void addNumSymbolsToSlab(int Begin, int End, SymbolSlab *Slab) { +std::unique_ptr numSlab(int Begin, int End) { + SymbolSlab::Builder Slab; for (int i = Begin; i <= End; i++) - Slab->insert(symbol(std::to_string(i))); + Slab.insert(symbol(std::to_string(i))); + return llvm::make_unique(std::move(Slab).build()); } std::vector @@ -45,28 +47,15 @@ FileSymbols FS; EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre()); - auto Slab = llvm::make_unique(); - addNumSymbolsToSlab(1, 3, Slab.get()); - - FS.update("f1", std::move(Slab)); - + FS.update("f1", numSlab(1, 3)); EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre("1", "2", "3")); } TEST(FileSymbolsTest, Overlap) { FileSymbols FS; - - auto Slab = llvm::make_unique(); - addNumSymbolsToSlab(1, 3, Slab.get()); - - FS.update("f1", std::move(Slab)); - - Slab = llvm::make_unique(); - addNumSymbolsToSlab(3, 5, Slab.get()); - - FS.update("f2", std::move(Slab)); - + FS.update("f1", numSlab(1, 3)); + FS.update("f2", numSlab(3, 5)); EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre("1", "2", "3", "3", "4", "5")); } @@ -74,17 +63,13 @@ TEST(FileSymbolsTest, SnapshotAliveAfterRemove) { FileSymbols FS; - auto Slab = llvm::make_unique(); - addNumSymbolsToSlab(1, 3, Slab.get()); - - FS.update("f1", std::move(Slab)); + FS.update("f1", numSlab(1, 3)); auto Symbols = FS.allSymbols(); EXPECT_THAT(getSymbolNames(*Symbols), UnorderedElementsAre("1", "2", "3")); FS.update("f1", nullptr); EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre()); - EXPECT_THAT(getSymbolNames(*Symbols), UnorderedElementsAre("1", "2", "3")); } Index: unittests/clangd/IndexTests.cpp =================================================================== --- unittests/clangd/IndexTests.cpp +++ unittests/clangd/IndexTests.cpp @@ -45,18 +45,18 @@ std::shared_ptr> generateSymbols(std::vector QualifiedNames, std::weak_ptr *WeakSymbols = nullptr) { - auto Slab = std::make_shared(); - if (WeakSymbols) - *WeakSymbols = Slab; - + SymbolSlab::Builder Slab; for (llvm::StringRef QName : QualifiedNames) - Slab->Slab.insert(symbol(QName)); + Slab.insert(symbol(QName)); - for (const auto &Sym : Slab->Slab) - Slab->Pointers.push_back(&Sym.second); - - auto *Pointers = &Slab->Pointers; - return {std::move(Slab), Pointers}; + auto Storage = std::make_shared(); + Storage->Slab = std::move(Slab).build(); + for (const auto &Sym : Storage->Slab) + Storage->Pointers.push_back(&Sym); + if (WeakSymbols) + *WeakSymbols = Storage; + auto *Pointers = &Storage->Pointers; + return {std::move(Storage), Pointers}; } // Create a slab of symbols with IDs and names [Begin, End], otherwise identical Index: unittests/clangd/SymbolCollectorTests.cpp =================================================================== --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -32,8 +32,7 @@ // GMock helpers for matching Symbol. MATCHER_P(QName, Name, "") { - return (arg.second.Scope + (arg.second.Scope.empty() ? "" : "::") + - arg.second.Name).str() == Name; + return (arg.Scope + (arg.Scope.empty() ? "" : "::") + arg.Name).str() == Name; } namespace clang {