Index: clang-tools-extra/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/CMakeLists.txt +++ clang-tools-extra/clangd/CMakeLists.txt @@ -34,6 +34,7 @@ TUScheduler.cpp URI.cpp XRefs.cpp + index/CanonicalIncludes.cpp index/FileIndex.cpp index/Index.cpp @@ -42,6 +43,8 @@ index/SymbolCollector.cpp index/SymbolYAML.cpp + index/dex/Iterator.cpp + LINK_LIBS clangAST clangASTMatchers Index: clang-tools-extra/clangd/index/dex/Iterator.h =================================================================== --- /dev/null +++ clang-tools-extra/clangd/index/dex/Iterator.h @@ -0,0 +1,118 @@ +//===--- Iterator.h - Query Symbol Retrieval --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines interface for Query Tree Nodes - Iterators, which process +// posting lists and yield the result of contracted search query. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H + +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace clangd { +namespace dex { + +/// Symbol position in the list of all index symbols sorted by a pre-computed +/// metric such as the number of references. +using DocID = uint32_t; +/// Contains sorted sequence of DocIDs all of which belong to symbols matching +/// certain criteria, i.e. containing a Search Token. PostingLists are keys for +/// the inverted index. +using PostingList = std::vector; +/// Immutable reference to PostingList object. +using PostingListRef = llvm::ArrayRef; + +// FIXME(kbobyrev): Provide callback for matched documents. +// FIXME(kbobyrev): Implement new types of iterators: Label, Boost (with +// scoring), Limit. +// FIXME(kbobyrev): Implement iterator cost, an estimate of advance() calls +// before iterator exhaustion. +class Iterator { +public: + /// Returns true if all valid DocIDs were processed and hence advance() and + /// advanceTo() call are invalid, false otherwise. + virtual bool reachedEnd() const = 0; + /// Moves to next valid DocID. If it doesn't exist, the iterator is exhausted + /// and proceeds to the END. + /// + /// Note: calls to advance() are invalid if reachedEnd(). + virtual void advance() = 0; + /// Moves to the first valid DocID which is equal or higher than given ID. If + /// it doesn't exist, the iterator is exhausted and proceeds to the END. + /// + /// Note: calls to advanceTo() are invalid if reachedEnd(). + virtual void advanceTo(DocID ID) = 0; + /// Returns current symbol under cursor. + virtual DocID peek() const = 0; + + virtual ~Iterator() {} + + /// Prints a convenient human-readable iterator representation by recursively + /// dumping iterators in the following format: + /// + /// (Type Child1 Child2 ...) + /// + /// Where Type is the iterator type representation: "&" for And, "|" for Or, + /// ChildN is N-th iterator child. Raw iterators over PostingList are + /// represented as "[ID1, ID2, ...]" where IDN is N-th PostingList entry. + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const Iterator &Iterator) { + return Iterator.dump(OS); + } + +private: + virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0; +}; + +/// Exhausts given iterator and returns all processed DocIDs. The result +/// contains sorted DocumentIDs. +std::vector consume(Iterator &It); + +/// Returns an iterator over given PostingList. +std::unique_ptr create(PostingListRef Documents); + +/// Returns AndIterator, an iterator which performs the intersection of the +/// PostingLists of its children. +std::unique_ptr +createAnd(std::vector> Children); + +/// Returns OrIterator, an iterator which performs the union of the PostingLists +/// of its children. +std::unique_ptr +createOr(std::vector> Children); + +template +std::unique_ptr +createAnd(std::unique_ptr(&&Children)[Size]) { + std::vector> Elements; + std::move(begin(Children), end(Children), std::back_inserter(Elements)); + return createAnd(move(Elements)); +} + +template +std::unique_ptr +createOr(std::unique_ptr(&&Children)[Size]) { + std::vector> Elements; + std::move(begin(Children), end(Children), std::back_inserter(Elements)); + return createOr(move(Elements)); +} + +} // namespace dex +} // namespace clangd +} // namespace clang + +#endif Index: clang-tools-extra/clangd/index/dex/Iterator.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/index/dex/Iterator.cpp @@ -0,0 +1,245 @@ +//===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Iterator.h" + +#include +#include +#include + +namespace clang { +namespace clangd { +namespace dex { + +/// Implements Iterator over a PostingList. +class DocumentIterator : public Iterator { +public: + DocumentIterator(PostingListRef Documents) + : Documents(Documents), Index(std::begin(Documents)) {} + + bool reachedEnd() const override { return Index == std::end(Documents); } + + /// Advances cursor to the next item. + /// + /// Complexity: O(1). + void advance() override { + assert(!reachedEnd() && "DocumentIterator can't advance at the end."); + ++Index; + } + + /// Applies binary search to advance cursor to the next item with DocID equal + /// or higher than the given one. + /// + /// Complexity: O(log(N)), N is the size of underlying PostingList. + void advanceTo(DocID ID) override { + assert(!reachedEnd() && "DocumentIterator can't advance at the end."); + Index = std::lower_bound(Index, std::end(Documents), ID); + } + + DocID peek() const override { + assert(!reachedEnd() && "DocumentIterator can't call peek() at the end."); + return *Index; + } + + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << '['; + auto Separator = ""; + for (const auto &ID : Documents) { + OS << Separator << ID; + Separator = ", "; + } + OS << ']'; + return OS; + } + +private: + PostingListRef Documents; + PostingListRef::const_iterator Index; +}; + +/// Manages intersection of the children. +/// +/// AndIterator advances through items which can be pointed to by each child +/// iterator. It becomes exhausted as soon as any child becomes exhausted. After +/// each mutation, the iterator restores the invariant: all children must point +/// to the same item. +class AndIterator : public Iterator { +public: + AndIterator(std::vector> AllChildren) + : Children(std::move(AllChildren)), ReachedEnd(false) { + assert(!Children.empty() && "AndIterator should have at least one child."); + // Establish invariants. + sync(); + } + + bool reachedEnd() const override { return ReachedEnd; } + + /// Advances all children to the next common item. + void advance() override { + assert(!reachedEnd() && "AndIterator can't call advance() at the end."); + Children.front()->advance(); + sync(); + } + + /// Advances all children to the next common item with DocumentID >= ID. + void advanceTo(DocID ID) override { + assert(!reachedEnd() && "AndIterator can't call advanceTo() at the end."); + Children.front()->advanceTo(ID); + sync(); + } + + DocID peek() const override { return Children.front()->peek(); } + + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << "(& "; + auto Separator = ""; + for (const auto &Child : Children) { + OS << Separator << *Child; + Separator = " "; + } + OS << ')'; + return OS; + } + +private: + /// Restores class invariants: each child should point to the same element, + /// ReachedEnd indicates whether any child is exhausted. + void sync() { + ReachedEnd = Children.front()->reachedEnd(); + if (ReachedEnd) + return; + auto ID = Children.front()->peek(); + bool NeedsAdvance = false; + do { + NeedsAdvance = false; + for (auto &Child : Children) { + Child->advanceTo(ID); + ReachedEnd = Child->reachedEnd(); + // If any child reaches end And iterator can not match any other items. + // In this case, just terminate the process. + if (ReachedEnd) + return; + // If any child goes beyond given ID (i.e. ID is not the common item), + // all children should be advanced to the next common item. + // FIXME(kbobyrev): This is not a very optimized version; after costs + // are introduced, cycle should break whenever ID exceeds current one + // and cheapest children should be advanced over again. + if (Child->peek() > ID) { + ID = Child->peek(); + NeedsAdvance = true; + } + } + } while (NeedsAdvance); + } + + /// AndIterator owns its children and ensures that all of them point to the + /// same element. As soon as one child gets exhausted, AndIterator can no + /// longer advance and has reached its end. + std::vector> Children; + /// Local state, which indicates whether any child is exhausted. It is cheaper + /// to maintain and update a local state, rather than traversing the whole + /// subtree in each reachedEnd() call. + bool ReachedEnd; +}; + +/// Manages union of its children. +/// +/// OrIterator iterates through all items which can be pointed to by at least +/// one child. To preserve the sorted order, this iterator always advances the +/// child with smallest Child->peek() value. OrIterator becomes exhausted as +/// soon as all of its children are exhausted. +class OrIterator : public Iterator { +public: + OrIterator(std::vector> AllChildren) + : Children(std::move(AllChildren)) { + assert(Children.size() > 0 && "Or Iterator must have at least one child."); + } + + /// Returns false if any child is not exhausted, false otherwise. + bool reachedEnd() const override { + for (const auto &Child : Children) + if (!Child->reachedEnd()) + return false; + return true; + } + + /// Moves each child pointing to the smallest DocID to the next item. + void advance() override { + assert(!reachedEnd() && + "OrIterator must have at least one child to advance()."); + const auto HighestID = peek(); + for (const auto &Child : Children) { + if (!Child->reachedEnd() && Child->peek() == HighestID) { + Child->advance(); + } + } + } + + /// Advances each child to the next existing element with DocumentID >= ID. + void advanceTo(DocID ID) override { + assert(!reachedEnd() && "Can't advance iterator after it reached the end."); + for (const auto &Child : Children) + if (!Child->reachedEnd()) + Child->advanceTo(ID); + } + + /// Returns the element under cursor of the child with smallest Child->peek() + /// value. + DocID peek() const override { + assert(!reachedEnd() && + "OrIterator must have at least one child to peek()."); + DocID Result = std::numeric_limits::max(); + + for (const auto &Child : Children) + if (!Child->reachedEnd()) + Result = std::min(Result, Child->peek()); + + return Result; + } + + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << "(| "; + auto Separator = ""; + for (const auto &Child : Children) { + OS << Separator << *Child; + Separator = " "; + } + OS << ')'; + return OS; + } + +private: + // FIXME(kbobyrev): Would storing Children in min-heap be faster? + std::vector> Children; +}; + +std::vector consume(Iterator &It) { + std::vector Result; + for (; !It.reachedEnd(); It.advance()) + Result.push_back(It.peek()); + return Result; +} + +std::unique_ptr create(PostingListRef Documents) { + return llvm::make_unique(Documents); +} + +std::unique_ptr +createAnd(std::vector> Children) { + return llvm::make_unique(move(Children)); +} + +std::unique_ptr +createOr(std::vector> Children) { + return llvm::make_unique(move(Children)); +} + +} // namespace dex +} // namespace clangd +} // namespace clang Index: clang-tools-extra/unittests/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/unittests/clangd/CMakeLists.txt +++ clang-tools-extra/unittests/clangd/CMakeLists.txt @@ -15,9 +15,10 @@ CodeCompleteTests.cpp CodeCompletionStringsTests.cpp ContextTests.cpp + DexIndexTests.cpp DraftStoreTests.cpp - FileIndexTests.cpp FileDistanceTests.cpp + FileIndexTests.cpp FindSymbolsTests.cpp FuzzyMatchTests.cpp GlobalCompilationDatabaseTests.cpp @@ -27,11 +28,11 @@ SourceCodeTests.cpp SymbolCollectorTests.cpp SyncAPI.cpp + TUSchedulerTests.cpp TestFS.cpp TestTU.cpp ThreadingTests.cpp TraceTests.cpp - TUSchedulerTests.cpp URITests.cpp XRefsTests.cpp ) Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp =================================================================== --- /dev/null +++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp @@ -0,0 +1,191 @@ +//===--- DexIndexTests.cpp ----------------------------*- C++ -*-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "index/dex/Iterator.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include + +namespace clang { +namespace clangd { +namespace dex { + +using ::testing::ElementsAre; + +TEST(DexIndexIterators, DocumentIterator) { + auto DocIterator = create({4, 7, 8, 20, 42, 100}); + + EXPECT_EQ(DocIterator->peek(), 4U); + EXPECT_EQ(DocIterator->reachedEnd(), false); + + DocIterator->advance(); + EXPECT_EQ(DocIterator->peek(), 7U); + EXPECT_EQ(DocIterator->reachedEnd(), false); + + DocIterator->advanceTo(20); + EXPECT_EQ(DocIterator->peek(), 20U); + EXPECT_EQ(DocIterator->reachedEnd(), false); + + DocIterator->advanceTo(65); + EXPECT_EQ(DocIterator->peek(), 100U); + EXPECT_EQ(DocIterator->reachedEnd(), false); + + DocIterator->advanceTo(420); + EXPECT_EQ(DocIterator->reachedEnd(), true); + + EXPECT_EQ(llvm::to_string(*DocIterator), "[4, 7, 8, 20, 42, 100]"); +} + +TEST(DexIndexIterators, AndIterator) { + const PostingList EmptyList; + const PostingList FirstList = {0, 5, 7, 10, 42, 320, 9000}; + const PostingList SecondList = {0, 4, 7, 10, 30, 60, 320, 9000}; + const PostingList ThirdList = {1, 4, 7, 11, 30, 60, 320, 9000}; + + auto And = createAnd({create(EmptyList)}); + + EXPECT_EQ(llvm::to_string(*And), "(& [])"); + + And = createAnd({create(EmptyList), create(FirstList)}); + + EXPECT_EQ(llvm::to_string(*And), "(& [] [0, 5, 7, 10, 42, 320, 9000])"); + + And = createAnd({create(FirstList), create(SecondList)}); + + EXPECT_EQ(And->reachedEnd(), false); + EXPECT_THAT(consume(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U)); + + EXPECT_EQ( + llvm::to_string(*And), + "(& [0, 5, 7, 10, 42, 320, 9000] [0, 4, 7, 10, 30, 60, 320, 9000])"); + + And = createAnd({create(SecondList), create(FirstList)}); + + And->advanceTo(0); + EXPECT_EQ(And->peek(), 0U); + And->advanceTo(5); + EXPECT_EQ(And->peek(), 7U); + And->advanceTo(10); + EXPECT_EQ(And->peek(), 10U); + And->advanceTo(42); + EXPECT_EQ(And->peek(), 320U); + And->advanceTo(8999); + EXPECT_EQ(And->peek(), 9000U); + And->advanceTo(9001); + + And = createAnd({create(FirstList), create(SecondList), create(ThirdList)}); + EXPECT_EQ(And->peek(), 7U); + And->advanceTo(300); + EXPECT_EQ(And->peek(), 320U); + And->advanceTo(100000); + + EXPECT_EQ(And->reachedEnd(), true); +} + +TEST(DexIndexIterators, OrIterator) { + const PostingList EmptyList; + const PostingList FirstList = {0, 5, 7, 10, 42, 320, 9000}; + const PostingList SecondList = {0, 4, 7, 10, 30, 60, 320, 9000}; + const PostingList ThirdList = {1, 4, 7, 11, 30, 60, 320, 9000}; + + auto Or = createOr({create(EmptyList)}); + + EXPECT_EQ(Or->reachedEnd(), true); + + Or = createOr({create(FirstList), create(EmptyList)}); + + EXPECT_EQ(Or->reachedEnd(), false); + EXPECT_EQ(Or->peek(), 0U); + Or->advance(); + EXPECT_EQ(Or->peek(), 5U); + Or->advance(); + EXPECT_EQ(Or->peek(), 7U); + Or->advance(); + EXPECT_EQ(Or->peek(), 10U); + Or->advance(); + EXPECT_EQ(Or->peek(), 42U); + Or->advanceTo(42); + EXPECT_EQ(Or->peek(), 42U); + Or->advanceTo(300); + EXPECT_EQ(Or->peek(), 320U); + Or->advanceTo(9000); + EXPECT_EQ(Or->peek(), 9000U); + Or->advanceTo(9001); + + Or = createOr({create(FirstList), create(SecondList), create(ThirdList)}); + + EXPECT_EQ(Or->reachedEnd(), false); + EXPECT_EQ(Or->peek(), 0U); + + Or->advance(); + EXPECT_EQ(Or->peek(), 1U); + + Or->advance(); + EXPECT_EQ(Or->peek(), 4U); + + Or->advanceTo(7); + + Or->advanceTo(59); + EXPECT_EQ(Or->peek(), 60U); + + Or->advanceTo(9001); +} + +// FIXME(kbobyrev): The testcase below is similar to what is expected in real +// queries. It should be updated once new iterators (such as boosting, limiting, +// etc iterators) appear. However, it is not exhaustive and it would be +// beneficial to implement automatic generation of query trees for more +// comprehensive testing. +TEST(DexIndexIterators, QueryTree) { + // An example of more complicated query + // + // +-----------------+ + // |And Iterator:1, 5| + // +--------+--------+ + // | + // | + // +------------------------------------+ + // | | + // | | + // +----------v----------+ +----------v---------+ + // |And Iterator: 1, 5, 9| |Or Iterator: 0, 1, 5| + // +----------+----------+ +----------+---------+ + // | | + // +------+-----+ +---------+-----------+ + // | | | | | + // +-------v-----+ +----v-----+ +--v--+ +-V--+ +---v---+ + // |0, 3, 5, 8, 9| |0, 5, 7, 9| |Empty| |0, 5| |0, 1, 5| + // +-------------+ +----------+ +-----+ +----+ +-------+ + + // Root of the query tree: [1, 5] + auto Root = createAnd({ + // Lower And Iterator: [1, 5, 9] + createAnd({create({1, 3, 5, 8, 9}), create({1, 5, 7, 9})}), + // Lower Or Iterator: [0, 1, 5] + createOr({create({0, 5}), create({0, 1, 5}), create({})}), + }); + + EXPECT_EQ(Root->reachedEnd(), false); + EXPECT_EQ(Root->peek(), 1U); + Root->advanceTo(0); + // Advance multiple times. Shouldn't do anything. + Root->advanceTo(1); + Root->advanceTo(0); + EXPECT_EQ(Root->peek(), 1U); + Root->advance(); + EXPECT_EQ(Root->peek(), 5U); + Root->advanceTo(5); + Root->advanceTo(9000); +} + +} // namespace dex +} // namespace clangd +} // namespace clang