Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -19,6 +19,7 @@ Protocol.cpp ProtocolHandlers.cpp Trace.cpp + index/MemIndex.cpp index/Index.cpp index/SymbolCollector.cpp Index: clangd/index/Index.h =================================================================== --- clangd/index/Index.h +++ clangd/index/Index.h @@ -10,6 +10,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H +#include "../Context.h" #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" @@ -110,6 +111,33 @@ llvm::DenseMap Symbols; }; +struct FuzzyFindRequest { + /// \brief A query string for the fuzzy find. This is matched against symbols' + /// qualfified names. + std::string Query; + /// \brief The maxinum number of candidates to return. + size_t MaxCandidateCount = UINT_MAX; +}; + +/// \brief Interface for symbol indexes that can be used for searching or +/// matching symbols among a set of symbols based on names or unique IDs. +class SymbolIndex { +public: + virtual ~SymbolIndex() = default; + + /// \brief Matches symbols in the index fuzzily and applies \p Callback on + /// each matched symbol before returning. + /// + /// Returns true if all candidates are matched. + virtual bool + fuzzyFind(Context &Ctx, const FuzzyFindRequest &Req, + std::function Callback) const = 0; + + // FIXME: add interfaces for more index use cases: + // - Symbol getSymbolInfo(SymbolID); + // - getAllOccurrences(SymbolID); +}; + } // namespace clangd } // namespace clang Index: clangd/index/MemIndex.h =================================================================== --- /dev/null +++ clangd/index/MemIndex.h @@ -0,0 +1,41 @@ +//===--- MemIndex.h - Dynamic in-memory symbol index. -------------- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H + +#include "Index.h" +#include + +namespace clang { +namespace clangd { + +/// \brief This implements an index for a (relatively small) set of symbols that +/// can be easily managed in memory. +class MemIndex : public SymbolIndex { +public: + /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain + /// accessible as long as `Symbols` is kept alive. + void build(std::shared_ptr> Symbols); + + bool fuzzyFind(Context &Ctx, const FuzzyFindRequest &Req, + std::function Callback) const override; + +private: + std::shared_ptr> Symbols; + // Index is a set of symbols that are deduplicated by symbol IDs. + // FIXME: build smarter index structure. + llvm::DenseMap Index; + mutable std::mutex Mutex; +}; + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H Index: clangd/index/MemIndex.cpp =================================================================== --- /dev/null +++ clangd/index/MemIndex.cpp @@ -0,0 +1,52 @@ +//===--- MemIndex.cpp - Dynamic in-memory symbol index. ----------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// + +#include "MemIndex.h" + +namespace clang { +namespace clangd { + +void MemIndex::build(std::shared_ptr> Syms) { + llvm::DenseMap TempIndex; + for (const Symbol *Sym : *Syms) + TempIndex[Sym->ID] = Sym; + + // Swap out the old symbols and index. + { + std::lock_guard Lock(Mutex); + Index = std::move(TempIndex); + Symbols = std::move(Syms); // Relase old symbols. + } +} + +bool MemIndex::fuzzyFind(Context & /*Ctx*/, const FuzzyFindRequest &Req, + std::function Callback) const { + std::string LoweredQuery = llvm::StringRef(Req.Query).lower(); + unsigned Matched = 0; + { + std::lock_guard Lock(Mutex); + for (const auto Pair : Index) { + const Symbol *Sym = Pair.second; + // Find all symbols that contain the query, igoring cases. + // FIXME: consider matching chunks in qualified names instead the whole + // string. + // FIXME: use better matching algorithm, e.g. fuzzy matcher. + if (StringRef(StringRef(Sym->QualifiedName).lower()) + .contains(LoweredQuery)) { + if (++Matched > Req.MaxCandidateCount) + return false; + Callback(*Sym); + } + } + } + return true; +} + +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt =================================================================== --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -13,6 +13,7 @@ CodeCompleteTests.cpp ContextTests.cpp FuzzyMatchTests.cpp + IndexTests.cpp JSONExprTests.cpp TestFS.cpp TraceTests.cpp Index: unittests/clangd/IndexTests.cpp =================================================================== --- /dev/null +++ unittests/clangd/IndexTests.cpp @@ -0,0 +1,116 @@ +//===-- IndexTests.cpp -------------------------------*- C++ -*-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "index/Index.h" +#include "index/MemIndex.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::UnorderedElementsAre; + +namespace clang { +namespace clangd { + +namespace { + +Symbol symbol(llvm::StringRef ID) { + Symbol Sym; + Sym.ID = SymbolID(ID); + Sym.QualifiedName = ID; + return Sym; +} + +struct SlabAndPointers { + SymbolSlab Slab; + std::vector Pointers; +}; + +// Create a slab of symbols with IDs and names [Begin, End]. The life time of +// the slab is managed by the returned shared pointer. If \p WeakSymbols is +// provided, it will be pointed to the managed object in the returned shared +// pointer. +std::shared_ptr> +generateNumSymbols(int Begin, int End, + std::weak_ptr *WeakSymbols = nullptr) { + auto Slab = std::make_shared(); + if (WeakSymbols) + *WeakSymbols = Slab; + + for (int i = Begin; i <= End; i++) + Slab->Slab.insert(symbol(std::to_string(i))); + + for (const auto &Sym : Slab->Slab) + Slab->Pointers.push_back(&Sym.second); + + auto Symbols = std::shared_ptr>(std::move(Slab), + &Slab->Pointers); + + return Symbols; +} + +std::vector match(const SymbolIndex &I, + const FuzzyFindRequest &Req) { + std::vector Matches; + auto Ctx = Context::empty(); + I.fuzzyFind(Ctx, Req, + [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); }); + return Matches; +} + +TEST(MemIndexTest, MemIndexSymbolsRecycled) { + MemIndex I; + std::weak_ptr Symbols; + I.build(generateNumSymbols(0, 10, &Symbols)); + FuzzyFindRequest Req; + Req.Query = "7"; + EXPECT_THAT(match(I, Req), UnorderedElementsAre("7")); + + // Release old symbols. + I.build(generateNumSymbols(0, 0)); + EXPECT_TRUE(Symbols.expired()); +} + +TEST(MemIndexTest, MemIndexMatchSubstring) { + MemIndex I; + I.build(generateNumSymbols(5, 25)); + FuzzyFindRequest Req; + Req.Query = "5"; + EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25")); +} + +TEST(MemIndexTest, MemIndexDeduplicate) { + auto Symbols = generateNumSymbols(0, 10); + + // Inject some duplicates and make sure we only match the same symbol once. + auto Sym = symbol("7"); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + + FuzzyFindRequest Req; + Req.Query = "7"; + MemIndex I; + I.build(std::move(Symbols)); + auto Matches = match(I, Req); + EXPECT_EQ(Matches.size(), 1u); +} + +TEST(MemIndexTest, MemIndexLimitedNumMatches) { + MemIndex I; + I.build(generateNumSymbols(0, 100)); + FuzzyFindRequest Req; + Req.Query = "5"; + Req.MaxCandidateCount = 3; + auto Matches = match(I, Req); + EXPECT_EQ(Matches.size(), Req.MaxCandidateCount); +} + +} // namespace +} // namespace clangd +} // namespace clang