Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -19,6 +19,7 @@ Protocol.cpp ProtocolHandlers.cpp Trace.cpp + index/MemIndex.cpp index/Index.cpp index/SymbolCollector.cpp Index: clangd/index/Index.h =================================================================== --- clangd/index/Index.h +++ clangd/index/Index.h @@ -110,6 +110,33 @@ llvm::DenseMap Symbols; }; +struct FuzzyFindRequest { + /// \brief A query string for the fuzzy find. This is matched against symbols' + /// qualfified names. + std::string Query; + /// \brief The maxinum number of candidates to return. + size_t MaxCandidateCount = UINT_MAX; +}; + +/// \brief Interface for symbol indexes that can be used for searching or +/// matching symbols among a set of symbols based on names or unique IDs. +class SymbolIndex { +public: + virtual ~SymbolIndex() = default; + + /// \brief Matches symbols in the index fuzzily and applies \p Callback on + /// each matched symbol. + /// + /// Returns true if all candidates are matched. + virtual bool + fuzzyFind(const FuzzyFindRequest &Req, + std::function Callback) const = 0; + + // FIXME: add interfaces for more index use cases: + // - Symbol getSymbolInfo(llvm::StringRef USR); + // - getAllOccurrences(llvm::StringRef USR); +}; + } // namespace clangd } // namespace clang Index: clangd/index/Index.cpp =================================================================== --- clangd/index/Index.cpp +++ clangd/index/Index.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "Index.h" - +#include "clang/Sema/CodeCompleteConsumer.h" #include "llvm/Support/SHA1.h" namespace clang { @@ -20,6 +20,7 @@ } } // namespace + SymbolID::SymbolID(llvm::StringRef USR) : HashValue(llvm::SHA1::hash(toArrayRef(USR))) {} Index: clangd/index/MemIndex.h =================================================================== --- /dev/null +++ clangd/index/MemIndex.h @@ -0,0 +1,41 @@ +//===--- MemIndex.h - Dynamic in-memory symbol index. -------------- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H + +#include "Index.h" +#include + +namespace clang { +namespace clangd { + +/// \brief This implements an index for a (relatively small) set of symbols that +/// can be easily managed in memory. +class MemIndex : public SymbolIndex { +public: + /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain + /// accessible as long as `Symbols` is kept alive. + void build(std::shared_ptr> Symbols); + + bool fuzzyFind(const FuzzyFindRequest &Req, + std::function Callback) const override; + +private: + std::shared_ptr> Symbols; + // Index is a set of symbols that are deduplicated by symbol IDs. + // FIXME: build smarter index structure. + llvm::DenseMap Index; + mutable std::mutex Mutex; +}; + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H Index: clangd/index/MemIndex.cpp =================================================================== --- /dev/null +++ clangd/index/MemIndex.cpp @@ -0,0 +1,51 @@ +//===--- MemIndex.cpp - Dynamic in-memory symbol index. ----------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// + +#include "MemIndex.h" + +namespace clang { +namespace clangd { + +void MemIndex::build(std::shared_ptr> Syms) { + + llvm::DenseMap TempIndex; + for (const Symbol *Sym : *Syms) + TempIndex[Sym->ID] = Sym; + + // Swap out the old symbols and index. + { + std::lock_guard Lock(Mutex); + Index = std::move(TempIndex); + Symbols = std::move(Syms); // Relase old symbols. + } +} + +bool MemIndex::fuzzyFind(const FuzzyFindRequest &Req, + std::function Callback) const { + std::string LoweredQuery = llvm::StringRef(Req.Query).lower(); + unsigned Matched = 0; + { + std::lock_guard Lock(Mutex); + for (const auto Pair : Index) { + const Symbol *Sym = Pair.second; + // Find all symbols that contain the query, igoring cases. + // FIXME: use better matching algorithm, e.g. fuzzy matcher. + if (StringRef(StringRef(Sym->QualifiedName).lower()) + .contains(LoweredQuery)) { + if (++Matched > Req.MaxCandidateCount) + return false; + Callback(*Sym); + } + } + } + return true; +} + +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt =================================================================== --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -13,6 +13,7 @@ CodeCompleteTests.cpp ContextTests.cpp FuzzyMatchTests.cpp + IndexTests.cpp JSONExprTests.cpp TestFS.cpp TraceTests.cpp Index: unittests/clangd/IndexTests.cpp =================================================================== --- /dev/null +++ unittests/clangd/IndexTests.cpp @@ -0,0 +1,118 @@ +//===-- IndexTests.cpp -------------------------------*- C++ -*-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "index/Index.h" +#include "index/MemIndex.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::UnorderedElementsAre; + +namespace clang { +namespace clangd { + +namespace { + +Symbol symbol(llvm::StringRef ID) { + Symbol Sym; + Sym.ID = SymbolID(ID); + Sym.QualifiedName = ID; + return Sym; +} + +struct CountedSymbolSlab { + CountedSymbolSlab() = delete; + + explicit CountedSymbolSlab(int &Counter) : Counter(Counter) { ++Counter; } + + ~CountedSymbolSlab() { Counter--; } + + SymbolSlab Slab; + // A counter for the number of living slabs. + int &Counter; +}; + +class MemIndexTest : public ::testing::Test { +protected: + std::vector + match(std::shared_ptr> Symbols, + const FuzzyFindRequest &Req) { + Index.build(std::move(Symbols)); + std::vector Matches; + Index.fuzzyFind( + Req, [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); }); + return Matches; + } + + // Build a CountedSymbolSlab containing symbols with IDs and names [Begin, + // End]. The life time of the slab is managed by the returned shared_ptr, + // which points to a vector of pointers pointing to all symbols in the slab. + std::shared_ptr> + generateNumSymbols(int Begin, int End) { + auto Slab = std::make_shared(SlabCounter); + + for (int i = Begin; i <= End; i++) + Slab->Slab.insert(symbol(std::to_string(i))); + auto *Symbols = new std::vector(); + + for (const auto &Sym : Slab->Slab) + Symbols->push_back(&Sym.second); + return std::shared_ptr>(std::move(Slab), + Symbols); + } + + int SlabCounter = 0; + MemIndex Index; +}; + +TEST_F(MemIndexTest, MemIndexSymbolsRecycled) { + FuzzyFindRequest Req; + Req.Query = "7"; + auto Matches = match(generateNumSymbols(0, 10), Req); + EXPECT_THAT(Matches, UnorderedElementsAre("7")); + + EXPECT_EQ(SlabCounter, 1); + // Release old symbols. + Index.build(std::make_shared>()); + EXPECT_EQ(SlabCounter, 0); +} + +TEST_F(MemIndexTest, MemIndexMatchSubstring) { + FuzzyFindRequest Req; + Req.Query = "5"; + auto Matches = match(generateNumSymbols(5, 25), Req); + EXPECT_THAT(Matches, UnorderedElementsAre("5", "15", "25")); +} + +TEST_F(MemIndexTest, MemIndexDeduplicate) { + auto Symbols = generateNumSymbols(0, 10); + + // Inject some duplicates and make sure we only match the same symbol once. + auto Sym = symbol("7"); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + Symbols->push_back(&Sym); + + FuzzyFindRequest Req; + Req.Query = "7"; + auto Matches = match(std::move(Symbols), Req); + EXPECT_EQ(Matches.size(), 1u); +} + +TEST_F(MemIndexTest, MemIndexLimitedNumMatches) { + FuzzyFindRequest Req; + Req.Query = "5"; + Req.MaxCandidateCount = 3; + auto Matches = match(generateNumSymbols(0, 100), Req); + EXPECT_EQ(Matches.size(), Req.MaxCandidateCount); +} + +} // namespace +} // namespace clangd +} // namespace clang