Index: clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp =================================================================== --- clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp +++ clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp @@ -80,16 +80,16 @@ // Found compilation database, we iterate all TUs from database to get all // symbols, and then merge them into a single SymbolSlab. - SymbolSlab GlobalSymbols; + SymbolSlab::Builder GlobalSymbols; std::mutex SymbolMutex; auto AddSymbols = [&](const SymbolSlab& NewSymbols) { // Synchronize set accesses. std::unique_lock LockGuard(SymbolMutex); - for (auto It : NewSymbols) { + for (auto Sym : NewSymbols) { // FIXME: Better handling the overlap symbols, currently we overwrite it // with the latest one, but we always want to good declarations (class // definitions, instead of forward declarations). - GlobalSymbols.insert(It.second); + GlobalSymbols.insert(Sym); } }; @@ -105,6 +105,6 @@ } } - llvm::outs() << SymbolToYAML(GlobalSymbols); + llvm::outs() << SymbolToYAML(std::move(GlobalSymbols).build()); return 0; } Index: clang-tools-extra/trunk/clangd/index/FileIndex.cpp =================================================================== --- clang-tools-extra/trunk/clangd/index/FileIndex.cpp +++ clang-tools-extra/trunk/clangd/index/FileIndex.cpp @@ -54,7 +54,7 @@ for (const auto &FileAndSlab : FileToSlabs) { Snap->KeepAlive.push_back(FileAndSlab.second); for (const auto &Iter : *FileAndSlab.second) - Snap->Pointers.push_back(&Iter.second); + Snap->Pointers.push_back(&Iter); } } auto *Pointers = &Snap->Pointers; Index: clang-tools-extra/trunk/clangd/index/Index.h =================================================================== --- clang-tools-extra/trunk/clangd/index/Index.h +++ clang-tools-extra/trunk/clangd/index/Index.h @@ -13,9 +13,9 @@ #include "../Context.h" #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSet.h" #include #include @@ -49,6 +49,9 @@ bool operator==(const SymbolID &Sym) const { return HashValue == Sym.HashValue; } + bool operator<(const SymbolID &Sym) const { + return HashValue < Sym.HashValue; + } private: friend llvm::hash_code hash_value(const SymbolID &ID) { @@ -72,13 +75,38 @@ // "<<" operator. void operator>>(llvm::StringRef HexStr, SymbolID &ID); +} // namespace clangd +} // namespace clang +namespace llvm { +// Support SymbolIDs as DenseMap keys. +template <> struct DenseMapInfo { + static inline clang::clangd::SymbolID getEmptyKey() { + static clang::clangd::SymbolID EmptyKey("EMPTYKEY"); + return EmptyKey; + } + static inline clang::clangd::SymbolID getTombstoneKey() { + static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY"); + return TombstoneKey; + } + static unsigned getHashValue(const clang::clangd::SymbolID &Sym) { + return hash_value(Sym); + } + static bool isEqual(const clang::clangd::SymbolID &LHS, + const clang::clangd::SymbolID &RHS) { + return LHS == RHS; + } +}; +} // namespace llvm +namespace clang { +namespace clangd { + // The class presents a C++ symbol, e.g. class, function. // // WARNING: Symbols do not own much of their underlying data - typically strings // are owned by a SymbolSlab. They should be treated as non-owning references. // Copies are shallow. // When adding new unowned data fields to Symbol, remember to update -// SymbolSlab::insert to copy them to the slab's storage. +// SymbolSlab::Builder in Index.cpp to copy them to the slab's storage. struct Symbol { // The ID of the symbol. SymbolID ID; @@ -104,11 +132,11 @@ // FIXME: add code completion information. }; -// A symbol container that stores a set of symbols. The container will maintain -// the lifetime of the symbols. +// An immutable symbol container that stores a set of symbols. +// The container will maintain the lifetime of the symbols. class SymbolSlab { public: - using const_iterator = llvm::DenseMap::const_iterator; + using const_iterator = std::vector::const_iterator; SymbolSlab() = default; @@ -116,26 +144,45 @@ const_iterator end() const; const_iterator find(const SymbolID &SymID) const; - // Once called, no more symbols would be added to the SymbolSlab. This - // operation is irreversible. - void freeze(); - - // Adds the symbol to this slab. - // This is a deep copy: underlying strings will be owned by the slab. - void insert(const Symbol& S); + size_t size() const { return Symbols.size(); } + // Estimates the total memory usage. + size_t bytes() const { + return sizeof(*this) + Arena.getTotalMemory() + + Symbols.capacity() * sizeof(Symbol); + } + + // SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab. + // The frozen SymbolSlab will use less memory. + class Builder { + public: + // Adds a symbol, overwriting any existing one with the same ID. + // This is a deep copy: underlying strings will be owned by the slab. + void insert(const Symbol& S); + + // Returns the symbol with an ID, if it exists. Valid until next insert(). + const Symbol* find(const SymbolID &ID) { + auto I = SymbolIndex.find(ID); + return I == SymbolIndex.end() ? nullptr : &Symbols[I->second]; + } + + // Consumes the builder to finalize the slab. + SymbolSlab build() &&; + + private: + llvm::BumpPtrAllocator Arena; + // Intern table for strings. Contents are on the arena. + llvm::DenseSet Strings; + std::vector Symbols; + // Values are indices into Symbols vector. + llvm::DenseMap SymbolIndex; + }; private: - // Replaces S with a reference to the same string, owned by this slab. - void intern(llvm::StringRef &S) { - S = S.empty() ? llvm::StringRef() : Strings.insert(S).first->getKey(); - } - - bool Frozen = false; + SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector Symbols) + : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} - // Intern table for strings. Not StringPool as we don't refcount, just insert. - // We use BumpPtrAllocator to avoid lots of tiny allocations for nodes. - llvm::StringSet Strings; - llvm::DenseMap Symbols; + llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. + std::vector Symbols; // Sorted by SymbolID to allow lookup. }; struct FuzzyFindRequest { @@ -176,27 +223,4 @@ } // namespace clangd } // namespace clang - -namespace llvm { - -template <> struct DenseMapInfo { - static inline clang::clangd::SymbolID getEmptyKey() { - static clang::clangd::SymbolID EmptyKey("EMPTYKEY"); - return EmptyKey; - } - static inline clang::clangd::SymbolID getTombstoneKey() { - static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY"); - return TombstoneKey; - } - static unsigned getHashValue(const clang::clangd::SymbolID &Sym) { - return hash_value(Sym); - } - static bool isEqual(const clang::clangd::SymbolID &LHS, - const clang::clangd::SymbolID &RHS) { - return LHS == RHS; - } -}; - -} // namespace llvm - #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H Index: clang-tools-extra/trunk/clangd/index/Index.cpp =================================================================== --- clang-tools-extra/trunk/clangd/index/Index.cpp +++ clang-tools-extra/trunk/clangd/index/Index.cpp @@ -13,16 +13,17 @@ namespace clang { namespace clangd { +using namespace llvm; -SymbolID::SymbolID(llvm::StringRef USR) - : HashValue(llvm::SHA1::hash(arrayRefFromStringRef(USR))) {} +SymbolID::SymbolID(StringRef USR) + : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {} -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID) { - OS << toHex(llvm::toStringRef(ID.HashValue)); +raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) { + OS << toHex(toStringRef(ID.HashValue)); return OS; } -void operator>>(llvm::StringRef Str, SymbolID &ID) { +void operator>>(StringRef Str, SymbolID &ID) { std::string HexString = fromHex(Str); assert(HexString.size() == ID.HashValue.size()); std::copy(HexString.begin(), HexString.end(), ID.HashValue.begin()); @@ -32,23 +33,58 @@ SymbolSlab::const_iterator SymbolSlab::end() const { return Symbols.end(); } -SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &SymID) const { - return Symbols.find(SymID); +SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const { + auto It = std::lower_bound(Symbols.begin(), Symbols.end(), ID, + [](const Symbol &S, const SymbolID &I) { + return S.ID < I; + }); + if (It != Symbols.end() && It->ID == ID) + return It; + return Symbols.end(); } -void SymbolSlab::freeze() { Frozen = true; } +// Copy the underlying data of the symbol into the owned arena. +static void own(Symbol &S, DenseSet &Strings, + BumpPtrAllocator &Arena) { + // Intern replaces V with a reference to the same string owned by the arena. + auto Intern = [&](StringRef &V) { + auto R = Strings.insert(V); + if (R.second) { // New entry added to the table, copy the string. + char *Data = Arena.Allocate(V.size()); + memcpy(Data, V.data(), V.size()); + *R.first = StringRef(Data, V.size()); + } + V = *R.first; + }; + + // We need to copy every StringRef field onto the arena. + Intern(S.Name); + Intern(S.Scope); + Intern(S.CanonicalDeclaration.FilePath); +} + +void SymbolSlab::Builder::insert(const Symbol &S) { + auto R = SymbolIndex.try_emplace(S.ID, Symbols.size()); + if (R.second) { + Symbols.push_back(S); + own(Symbols.back(), Strings, Arena); + } else { + auto &Copy = Symbols[R.first->second] = S; + own(Copy, Strings, Arena); + } +} -void SymbolSlab::insert(const Symbol &S) { - assert(!Frozen && "Can't insert a symbol after the slab has been frozen!"); - auto ItInserted = Symbols.try_emplace(S.ID, S); - if (!ItInserted.second) - return; - auto &Sym = ItInserted.first->second; - - // We inserted a new symbol, so copy the underlying data. - intern(Sym.Name); - intern(Sym.Scope); - intern(Sym.CanonicalDeclaration.FilePath); +SymbolSlab SymbolSlab::Builder::build() && { + Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit. + // Sort symbols so the slab can binary search over them. + std::sort(Symbols.begin(), Symbols.end(), + [](const Symbol &L, const Symbol &R) { return L.ID < R.ID; }); + // We may have unused strings from overwritten symbols. Build a new arena. + BumpPtrAllocator NewArena; + DenseSet Strings; + for (auto &S : Symbols) + own(S, Strings, NewArena); + return SymbolSlab(std::move(NewArena), std::move(Symbols)); } } // namespace clangd Index: clang-tools-extra/trunk/clangd/index/SymbolCollector.h =================================================================== --- clang-tools-extra/trunk/clangd/index/SymbolCollector.h +++ clang-tools-extra/trunk/clangd/index/SymbolCollector.h @@ -29,13 +29,11 @@ unsigned Offset, index::IndexDataConsumer::ASTNodeInfo ASTNode) override; - void finish() override; - - SymbolSlab takeSymbols() { return std::move(Symbols); } + SymbolSlab takeSymbols() { return std::move(Symbols).build(); } private: // All Symbols collected from the AST. - SymbolSlab Symbols; + SymbolSlab::Builder Symbols; }; } // namespace clangd Index: clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp =================================================================== --- clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp +++ clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp @@ -91,7 +91,7 @@ std::string USR(Buff.data(), Buff.size()); auto ID = SymbolID(USR); - if (Symbols.find(ID) != Symbols.end()) + if (Symbols.find(ID) != nullptr) return true; auto &SM = ND->getASTContext().getSourceManager(); @@ -114,7 +114,5 @@ return true; } -void SymbolCollector::finish() { Symbols.freeze(); } - } // namespace clangd } // namespace clang Index: clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp =================================================================== --- clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp +++ clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp @@ -127,20 +127,18 @@ std::vector S; llvm::yaml::Input Yin(YAMLContent); Yin >> S; - SymbolSlab Syms; + SymbolSlab::Builder Syms; for (auto& Sym : S) - Syms.insert(std::move(Sym)); - return Syms; + Syms.insert(Sym); + return std::move(Syms).build(); } std::string SymbolToYAML(const SymbolSlab& Symbols) { std::string Str; llvm::raw_string_ostream OS(Str); llvm::yaml::Output Yout(OS); - for (auto &Pair : Symbols) { - Symbol MutableSymbol = Pair.second; - Yout<< MutableSymbol; - } + for (Symbol S : Symbols) // copy: Yout<< requires mutability. + Yout<< S; return OS.str(); } Index: clang-tools-extra/trunk/unittests/clangd/CodeCompleteTests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/CodeCompleteTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/CodeCompleteTests.cpp @@ -449,6 +449,7 @@ std::vector Pointers; }; auto Snap = std::make_shared(); + SymbolSlab::Builder Slab; for (const auto &Pair : Symbols) { Symbol Sym; Sym.ID = SymbolID(Pair.first); @@ -462,10 +463,11 @@ Sym.Scope = QName.substr(0, Pos); } Sym.SymInfo.Kind = Pair.second; - Snap->Slab.insert(std::move(Sym)); + Slab.insert(Sym); } + Snap->Slab = std::move(Slab).build(); for (auto &Iter : Snap->Slab) - Snap->Pointers.push_back(&Iter.second); + Snap->Pointers.push_back(&Iter); auto S = std::shared_ptr>(std::move(Snap), &Snap->Pointers); I->build(std::move(S)); Index: clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp @@ -28,9 +28,11 @@ return Sym; } -void addNumSymbolsToSlab(int Begin, int End, SymbolSlab *Slab) { +std::unique_ptr numSlab(int Begin, int End) { + SymbolSlab::Builder Slab; for (int i = Begin; i <= End; i++) - Slab->insert(symbol(std::to_string(i))); + Slab.insert(symbol(std::to_string(i))); + return llvm::make_unique(std::move(Slab).build()); } std::vector @@ -45,28 +47,15 @@ FileSymbols FS; EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre()); - auto Slab = llvm::make_unique(); - addNumSymbolsToSlab(1, 3, Slab.get()); - - FS.update("f1", std::move(Slab)); - + FS.update("f1", numSlab(1, 3)); EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre("1", "2", "3")); } TEST(FileSymbolsTest, Overlap) { FileSymbols FS; - - auto Slab = llvm::make_unique(); - addNumSymbolsToSlab(1, 3, Slab.get()); - - FS.update("f1", std::move(Slab)); - - Slab = llvm::make_unique(); - addNumSymbolsToSlab(3, 5, Slab.get()); - - FS.update("f2", std::move(Slab)); - + FS.update("f1", numSlab(1, 3)); + FS.update("f2", numSlab(3, 5)); EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre("1", "2", "3", "3", "4", "5")); } @@ -74,17 +63,13 @@ TEST(FileSymbolsTest, SnapshotAliveAfterRemove) { FileSymbols FS; - auto Slab = llvm::make_unique(); - addNumSymbolsToSlab(1, 3, Slab.get()); - - FS.update("f1", std::move(Slab)); + FS.update("f1", numSlab(1, 3)); auto Symbols = FS.allSymbols(); EXPECT_THAT(getSymbolNames(*Symbols), UnorderedElementsAre("1", "2", "3")); FS.update("f1", nullptr); EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre()); - EXPECT_THAT(getSymbolNames(*Symbols), UnorderedElementsAre("1", "2", "3")); } Index: clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp @@ -13,10 +13,10 @@ #include "gtest/gtest.h" using testing::UnorderedElementsAre; +using testing::Pointee; namespace clang { namespace clangd { - namespace { Symbol symbol(llvm::StringRef QName) { @@ -33,6 +33,24 @@ return Sym; } +MATCHER_P(Named, N, "") { return arg.Name == N; } + +TEST(SymbolSlab, FindAndIterate) { + SymbolSlab::Builder B; + B.insert(symbol("Z")); + B.insert(symbol("Y")); + B.insert(symbol("X")); + EXPECT_EQ(nullptr, B.find(SymbolID("W"))); + for (const char *Sym : {"X", "Y", "Z"}) + EXPECT_THAT(B.find(SymbolID(Sym)), Pointee(Named(Sym))); + + SymbolSlab S = std::move(B).build(); + EXPECT_THAT(S, UnorderedElementsAre(Named("X"), Named("Y"), Named("Z"))); + EXPECT_EQ(S.end(), S.find(SymbolID("W"))); + for (const char *Sym : {"X", "Y", "Z"}) + EXPECT_THAT(*S.find(SymbolID(Sym)), Named(Sym)); +} + struct SlabAndPointers { SymbolSlab Slab; std::vector Pointers; @@ -45,18 +63,18 @@ std::shared_ptr> generateSymbols(std::vector QualifiedNames, std::weak_ptr *WeakSymbols = nullptr) { - auto Slab = std::make_shared(); - if (WeakSymbols) - *WeakSymbols = Slab; - + SymbolSlab::Builder Slab; for (llvm::StringRef QName : QualifiedNames) - Slab->Slab.insert(symbol(QName)); + Slab.insert(symbol(QName)); - for (const auto &Sym : Slab->Slab) - Slab->Pointers.push_back(&Sym.second); - - auto *Pointers = &Slab->Pointers; - return {std::move(Slab), Pointers}; + auto Storage = std::make_shared(); + Storage->Slab = std::move(Slab).build(); + for (const auto &Sym : Storage->Slab) + Storage->Pointers.push_back(&Sym); + if (WeakSymbols) + *WeakSymbols = Storage; + auto *Pointers = &Storage->Pointers; + return {std::move(Storage), Pointers}; } // Create a slab of symbols with IDs and names [Begin, End], otherwise identical Index: clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp @@ -32,8 +32,7 @@ // GMock helpers for matching Symbol. MATCHER_P(QName, Name, "") { - return (arg.second.Scope + (arg.second.Scope.empty() ? "" : "::") + - arg.second.Name).str() == Name; + return (arg.Scope + (arg.Scope.empty() ? "" : "::") + arg.Name).str() == Name; } namespace clang {