Index: clang-tools-extra/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/CMakeLists.txt +++ clang-tools-extra/clangd/CMakeLists.txt @@ -34,6 +34,7 @@ TUScheduler.cpp URI.cpp XRefs.cpp + index/CanonicalIncludes.cpp index/FileIndex.cpp index/Index.cpp @@ -42,6 +43,8 @@ index/SymbolCollector.cpp index/SymbolYAML.cpp + index/dex/Iterator.cpp + LINK_LIBS clangAST clangASTMatchers Index: clang-tools-extra/clangd/index/dex/Iterator.h =================================================================== --- /dev/null +++ clang-tools-extra/clangd/index/dex/Iterator.h @@ -0,0 +1,113 @@ +//===--- Iterator.h - Query Symbol Retrieval --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines interface for Query Tree Nodes - Iterators, which process +// posting lists and yield the result of contracted search query. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H + +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace clangd { +namespace dex { + +/// Symbol position in the list of all index symbols sorted by a pre-computed +/// metric such as the number of references. +using DocID = uint32_t; +/// Contains sorted sequence of DocIDs all of which belong to symbols matching +/// certain criteria, i.e. containing a Search Token. PostingLists are keys for +/// the inverted index. +using PostingList = std::vector; +/// Immutable reference to PostingList object. +using PostingListRef = llvm::ArrayRef; + +// FIXME(kbobyrev): Provide callback for matched documents. +// FIXME(kbobyrev): Implement new types of iterators: Label, Boost, Limit. +// FIXME(kbobyrev): Document this one properly. +class Iterator { +public: + /// Returns true if all valid DocIDs were processed and hence advance() and + /// advanceTo() call are invalid, false otherwise. + virtual bool reachedEnd() const = 0; + /// Moves to next valid DocID. If it doesn't exist, the iterator is exhausted + /// and proceeds to the END. + /// + /// Note: calls to advance() are invalid if reachedEnd(). + virtual void advance() = 0; + /// Moves to the first valid DocID which is equal or higher than given ID. If + /// it doesn't exist, the iterator is exhausted and proceeds to the END. + /// + /// Note: calls to advanceTo() are invalid if reachedEnd(). + virtual void advanceTo(DocID ID) = 0; + /// Returns current symbol under cursor. + virtual DocID peek() const = 0; + + virtual ~Iterator() {} + + /// Prints a convenient human-readable iterator representation by recursively + /// dumping iterators in the following format: + /// + /// (Type Child1 Child2 ...) + /// + /// Where Type is the iterator type representation: "&" for And, "|" for Or, + /// ChildN is N-th iterator child. Raw iterators over PostingList are + /// represented as "[ID1, ID2, ...]" where IDN is N-th PostingList entry. + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const Iterator &Iterator) { + return Iterator.dump(OS); + } + +private: + virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0; +}; + +/// Exhausts given iterator and returns all processed DocIDs. +std::vector consume(Iterator &It); + +/// Returns an iterator over given PostingList. +std::unique_ptr create(PostingListRef Documents); + +// FIXME(kbobyrev): Brief documentation of AndIterator here. +std::unique_ptr +createAnd(std::vector> Children); + +// FIXME(kbobyrev): Brief documentation of OrIterator here. +std::unique_ptr +createOr(std::vector> Children); + +template +std::unique_ptr +createAnd(std::unique_ptr(&&Children)[Size]) { + std::vector> Elements; + std::move(begin(Children), end(Children), std::back_inserter(Elements)); + return createAnd(move(Elements)); +} + +template +std::unique_ptr +createOr(std::unique_ptr(&&Children)[Size]) { + std::vector> Elements; + std::move(begin(Children), end(Children), std::back_inserter(Elements)); + return createOr(move(Elements)); +} + +} // namespace dex +} // namespace clangd +} // namespace clang + +#endif Index: clang-tools-extra/clangd/index/dex/Iterator.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/index/dex/Iterator.cpp @@ -0,0 +1,228 @@ +//===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Iterator.h" + +#include +#include +#include + +namespace clang { +namespace clangd { +namespace dex { + +/// Implements Iterator over a PostingList. +// FIXME(kbobyrev): Document this one properly. +class DocumentIterator : public Iterator { +public: + DocumentIterator(PostingListRef Documents) + : Documents(Documents), Index(std::begin(Documents)) {} + + bool reachedEnd() const override { return Index == std::end(Documents); } + + /// Increments PostingList Index. + void advance() override { + assert(!reachedEnd() && "DocumentIterator can't advance at the end."); + ++Index; + } + + // FIXME(kbobyrev): Properly document this one. + void advanceTo(DocID ID) override { + assert(!reachedEnd() && "DocumentIterator can't advance at the end."); + Index = std::lower_bound(Index, std::end(Documents), ID); + } + + DocID peek() const override { + assert(!reachedEnd() && "DocumentIterator can't call peek() at the end."); + return *Index; + } + + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << '['; + auto Separator = ""; + for (const auto &ID : Documents) { + OS << Separator << ID; + Separator = ", "; + } + OS << ']'; + return OS; + } + +private: + PostingListRef Documents; + PostingListRef::const_iterator Index; +}; + +// FIXME(kbobyrev): Document this one properly. +class AndIterator : public Iterator { +public: + AndIterator(std::vector> AllChildren) + : Children(std::move(AllChildren)), ReachedEnd(false) { + assert(!Children.empty() && "AndIterator should have at least one child."); + for (const auto &Child : Children) { + // Check whether any child iterator points to the end. In this case, + // advance() would be invalid. + if (Child->reachedEnd()) { + ReachedEnd = true; + return; + } + } + sync(); + } + + bool reachedEnd() const override { return ReachedEnd; } + + // FIXME(kbobyrev): Properly document this one. + void advance() override { + assert(!reachedEnd() && "AndIterator can't call advance() at the end."); + Children.front()->advance(); + ReachedEnd = Children.front()->reachedEnd(); + if (!ReachedEnd) + sync(); + } + + // FIXME(kbobyrev): Properly document this one. + void advanceTo(DocID ID) override { + assert(!reachedEnd() && "AndIterator can't call advanceTo() at the end."); + Children.front()->advanceTo(ID); + ReachedEnd = Children.front()->reachedEnd(); + if (!ReachedEnd) + sync(); + } + + DocID peek() const override { return Children.front()->peek(); } + + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << "(& "; + auto Separator = ""; + for (const auto &Child : Children) { + OS << Separator << *Child; + Separator = " "; + } + OS << ')'; + return OS; + } + +private: + // FIXME(kbobyrev): Properly document this one. + // FIXME(kbobyrev): This is not a very optimized version. + void sync() { + assert(!reachedEnd() && "AndIterator can't call sync() at the end."); + auto ID = Children.front()->peek(); + bool NeedsAdvance = false; + do { + NeedsAdvance = false; + for (auto &Child : Children) { + Child->advanceTo(ID); + ReachedEnd = Child->reachedEnd(); + // If any child reaches end And iterator can not match any other items. + // In this case, just terminate the process. + if (ReachedEnd) + break; + // If any child goes beyond given ID (i.e. ID is not the common item), + // all children should be advanced to the next common item. + if (Child->peek() > ID) { + ID = Child->peek(); + NeedsAdvance = true; + } + } + } while (!ReachedEnd && NeedsAdvance); + } + + std::vector> Children; + // Store and update local state, otherwise each reachedEnd() call would + // require the whole subtree traversal. + bool ReachedEnd; +}; + +// FIXME(kbobyrev): Document this one properly. +// FIXME(kbobyrev): Would storing Children in min-heap be faster? +class OrIterator : public Iterator { +public: + OrIterator(std::vector> AllChildren) + : Children(std::move(AllChildren)) { + assert(Children.size() > 0 && "Or Iterator must have at least one child."); + } + + bool reachedEnd() const override { + for (const auto &Child : Children) + if (!Child->reachedEnd()) + return false; + return true; + } + + void advance() override { + assert(!reachedEnd() && + "OrIterator must have at least one child to advance()."); + const auto HighestID = peek(); + for (const auto &Child : Children) { + if (!Child->reachedEnd() && Child->peek() == HighestID) { + Child->advance(); + } + } + } + + void advanceTo(DocID ID) override { + assert(!reachedEnd() && "Can't advance iterator after it reached the end."); + for (const auto &Child : Children) + if (!Child->reachedEnd()) + Child->advanceTo(ID); + } + + DocID peek() const override { + assert(!reachedEnd() && + "OrIterator must have at least one child to peek()."); + DocID Result = std::numeric_limits::max(); + + for (const auto &Child : Children) + if (!Child->reachedEnd()) + Result = std::min(Result, Child->peek()); + + return Result; + } + + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << "(| "; + auto Separator = ""; + for (const auto &Child : Children) { + OS << Separator << *Child; + Separator = " "; + } + OS << ')'; + return OS; + } + +private: + std::vector> Children; +}; + +std::vector consume(Iterator &It) { + std::vector Result; + for (; !It.reachedEnd(); It.advance()) + Result.push_back(It.peek()); + return Result; +} + +std::unique_ptr create(PostingListRef Documents) { + return llvm::make_unique(Documents); +} + +std::unique_ptr +createAnd(std::vector> Children) { + return llvm::make_unique(move(Children)); +} + +std::unique_ptr +createOr(std::vector> Children) { + return llvm::make_unique(move(Children)); +} + +} // namespace dex +} // namespace clangd +} // namespace clang Index: clang-tools-extra/unittests/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/unittests/clangd/CMakeLists.txt +++ clang-tools-extra/unittests/clangd/CMakeLists.txt @@ -15,9 +15,10 @@ CodeCompleteTests.cpp CodeCompletionStringsTests.cpp ContextTests.cpp + DexIndexTests.cpp DraftStoreTests.cpp - FileIndexTests.cpp FileDistanceTests.cpp + FileIndexTests.cpp FindSymbolsTests.cpp FuzzyMatchTests.cpp GlobalCompilationDatabaseTests.cpp @@ -27,11 +28,11 @@ SourceCodeTests.cpp SymbolCollectorTests.cpp SyncAPI.cpp + TUSchedulerTests.cpp TestFS.cpp TestTU.cpp ThreadingTests.cpp TraceTests.cpp - TUSchedulerTests.cpp URITests.cpp XRefsTests.cpp ) Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp =================================================================== --- /dev/null +++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp @@ -0,0 +1,191 @@ +//===--- DexIndexTests.cpp ----------------------------*- C++ -*-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "index/dex/Iterator.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include + +namespace clang { +namespace clangd { +namespace dex { + +using ::testing::ElementsAre; + +TEST(DexIndexIterators, DocumentIterator) { + auto DocIterator = create({4, 7, 8, 20, 42, 100}); + + EXPECT_EQ(DocIterator->peek(), 4U); + EXPECT_EQ(DocIterator->reachedEnd(), false); + + DocIterator->advance(); + EXPECT_EQ(DocIterator->peek(), 7U); + EXPECT_EQ(DocIterator->reachedEnd(), false); + + DocIterator->advanceTo(20); + EXPECT_EQ(DocIterator->peek(), 20U); + EXPECT_EQ(DocIterator->reachedEnd(), false); + + DocIterator->advanceTo(65); + EXPECT_EQ(DocIterator->peek(), 100U); + EXPECT_EQ(DocIterator->reachedEnd(), false); + + DocIterator->advanceTo(420); + EXPECT_EQ(DocIterator->reachedEnd(), true); + + EXPECT_EQ(llvm::to_string(*DocIterator), "[4, 7, 8, 20, 42, 100]"); +} + +TEST(DexIndexIterators, AndIterator) { + const PostingList EmptyList; + const PostingList FirstList = {0, 5, 7, 10, 42, 320, 9000}; + const PostingList SecondList = {0, 4, 7, 10, 30, 60, 320, 9000}; + const PostingList ThirdList = {1, 4, 7, 11, 30, 60, 320, 9000}; + + auto And = createAnd({create(EmptyList)}); + + EXPECT_EQ(llvm::to_string(*And), "(& [])"); + + And = createAnd({create(EmptyList), create(FirstList)}); + + EXPECT_EQ(llvm::to_string(*And), "(& [] [0, 5, 7, 10, 42, 320, 9000])"); + + And = createAnd({create(FirstList), create(SecondList)}); + + EXPECT_EQ(And->reachedEnd(), false); + EXPECT_THAT(consume(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U)); + + EXPECT_EQ( + llvm::to_string(*And), + "(& [0, 5, 7, 10, 42, 320, 9000] [0, 4, 7, 10, 30, 60, 320, 9000])"); + + And = createAnd({create(SecondList), create(FirstList)}); + + And->advanceTo(0); + EXPECT_EQ(And->peek(), 0U); + And->advanceTo(5); + EXPECT_EQ(And->peek(), 7U); + And->advanceTo(10); + EXPECT_EQ(And->peek(), 10U); + And->advanceTo(42); + EXPECT_EQ(And->peek(), 320U); + And->advanceTo(8999); + EXPECT_EQ(And->peek(), 9000U); + And->advanceTo(9001); + + And = createAnd({create(FirstList), create(SecondList), create(ThirdList)}); + EXPECT_EQ(And->peek(), 7U); + And->advanceTo(300); + EXPECT_EQ(And->peek(), 320U); + And->advanceTo(100000); + + EXPECT_EQ(And->reachedEnd(), true); +} + +TEST(DexIndexIterators, OrIterator) { + const PostingList EmptyList; + const PostingList FirstList = {0, 5, 7, 10, 42, 320, 9000}; + const PostingList SecondList = {0, 4, 7, 10, 30, 60, 320, 9000}; + const PostingList ThirdList = {1, 4, 7, 11, 30, 60, 320, 9000}; + + auto Or = createOr({create(EmptyList)}); + + EXPECT_EQ(Or->reachedEnd(), true); + + Or = createOr({create(FirstList), create(EmptyList)}); + + EXPECT_EQ(Or->reachedEnd(), false); + EXPECT_EQ(Or->peek(), 0U); + Or->advance(); + EXPECT_EQ(Or->peek(), 5U); + Or->advance(); + EXPECT_EQ(Or->peek(), 7U); + Or->advance(); + EXPECT_EQ(Or->peek(), 10U); + Or->advance(); + EXPECT_EQ(Or->peek(), 42U); + Or->advanceTo(42); + EXPECT_EQ(Or->peek(), 42U); + Or->advanceTo(300); + EXPECT_EQ(Or->peek(), 320U); + Or->advanceTo(9000); + EXPECT_EQ(Or->peek(), 9000U); + Or->advanceTo(9001); + + Or = createOr({create(FirstList), create(SecondList), create(ThirdList)}); + + EXPECT_EQ(Or->reachedEnd(), false); + EXPECT_EQ(Or->peek(), 0U); + + Or->advance(); + EXPECT_EQ(Or->peek(), 1U); + + Or->advance(); + EXPECT_EQ(Or->peek(), 4U); + + Or->advanceTo(7); + + Or->advanceTo(59); + EXPECT_EQ(Or->peek(), 60U); + + Or->advanceTo(9001); +} + +// FIXME(kbobyrev): The testcase below is similar to what is expected in real +// queries. It should be updated once new iterators (such as boosting, limiting, +// etc iterators) appear. However, it is not exhaustive and it would be +// beneficial to implement automatic generation of query trees for more +// comprehensive testing. +TEST(DexIndexIterators, QueryTree) { + // An example of more complicated query + // + // +-----------------+ + // |And Iterator:1, 5| + // +--------+--------+ + // | + // | + // +------------------------------------+ + // | | + // | | + // +----------v----------+ +----------v---------+ + // |And Iterator: 1, 5, 9| |Or Iterator: 0, 1, 5| + // +----------+----------+ +----------+---------+ + // | | + // +------+-----+ +---------+-----------+ + // | | | | | + // +-------v-----+ +----v-----+ +--v--+ +-V--+ +---v---+ + // |0, 3, 5, 8, 9| |0, 5, 7, 9| |Empty| |0, 5| |0, 1, 5| + // +-------------+ +----------+ +-----+ +----+ +-------+ + + // Root of the query tree: [1, 5] + auto Root = createAnd({ + // Lower And Iterator: [1, 5, 9] + createAnd({create({1, 3, 5, 8, 9}), create({1, 5, 7, 9})}), + // Lower Or Iterator: [0, 1, 5] + createOr({create({0, 5}), create({0, 1, 5}), create({})}), + }); + + EXPECT_EQ(Root->reachedEnd(), false); + EXPECT_EQ(Root->peek(), 1U); + Root->advanceTo(0); + // Advance multiple times. Shouldn't do anything. + Root->advanceTo(1); + Root->advanceTo(0); + EXPECT_EQ(Root->peek(), 1U); + Root->advance(); + EXPECT_EQ(Root->peek(), 5U); + Root->advanceTo(5); + Root->advanceTo(9000); +} + +} // namespace dex +} // namespace clangd +} // namespace clang