Index: clang-tools-extra/clangd/index/dex/Iterator.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/Iterator.cpp +++ clang-tools-extra/clangd/index/dex/Iterator.cpp @@ -125,34 +125,51 @@ bool ReachedEnd = false; }; +/// Comparator treats END as the largest DocID and ensures that peek() call is +/// valid. +bool Greater(const std::unique_ptr &LHS, + const std::unique_ptr &RHS) { + if (LHS->reachedEnd()) + return true; + if (RHS->reachedEnd()) + return false; + return LHS->peek() > RHS->peek(); +}; + /// Implements Iterator over the union of other iterators. /// /// OrIterator iterates through all items which can be pointed to by at least /// one child. To preserve the sorted order, this iterator always advances the /// child with smallest Child->peek() value. OrIterator becomes exhausted as /// soon as all of its children are exhausted. +/// +/// Invariant: Children are always stored in a min-heap which puts iterators +/// with the smallest DocID to the front. class OrIterator : public Iterator { public: explicit OrIterator(std::vector> AllChildren) : Children(std::move(AllChildren)) { assert(!Children.empty() && "OR iterator should have at least one child."); + // Initialize invariant. + std::make_heap(begin(Children), end(Children), Greater); } - /// Returns true if all children are exhausted. - bool reachedEnd() const override { - return std::all_of(begin(Children), end(Children), - [](const std::unique_ptr &Child) { - return Child->reachedEnd(); - }); - } + /// Returns true if all children are exhausted. Due to the invariant, if the + /// first child has reached the end, all of them have reached the end. + bool reachedEnd() const override { return Children.front()->reachedEnd(); } /// Moves each child pointing to the smallest DocID to the next item. void advance() override { assert(!reachedEnd() && "OR iterator can't advance() at the end."); const auto SmallestID = peek(); - for (const auto &Child : Children) - if (!Child->reachedEnd() && Child->peek() == SmallestID) - Child->advance(); + // The first element always contains Child with the smallest DocID. + while (!Children.front()->reachedEnd() && + Children.front()->peek() == SmallestID) { + Children.front()->advance(); + // Restore invariant. + std::pop_heap(begin(Children), end(Children), Greater); + std::push_heap(begin(Children), end(Children), Greater); + } } /// Advances each child to the next existing element with DocumentID >= ID. @@ -161,19 +178,15 @@ for (const auto &Child : Children) if (!Child->reachedEnd()) Child->advanceTo(ID); + // Restore invariant. + std::make_heap(begin(Children), end(Children), Greater); } /// Returns the element under cursor of the child with smallest Child->peek() /// value. DocID peek() const override { assert(!reachedEnd() && "OR iterator can't peek() at the end."); - DocID Result = std::numeric_limits::max(); - - for (const auto &Child : Children) - if (!Child->reachedEnd()) - Result = std::min(Result, Child->peek()); - - return Result; + return Children.front()->peek(); } // Returns the maximum boosting score among all Children when iterator is not @@ -181,13 +194,19 @@ float consume() override { assert(!reachedEnd() && "OR iterator can't consume() at the end."); const DocID ID = peek(); - return std::accumulate( - begin(Children), end(Children), DEFAULT_BOOST_SCORE, - [&](float Boost, const std::unique_ptr &Child) { - return (!Child->reachedEnd() && Child->peek() == ID) - ? std::max(Boost, Child->consume()) - : Boost; - }); + float Boost = DEFAULT_BOOST_SCORE; + // Call consume() on all children pointing to ID and temporally pop them out + // of heap to proceed to the next valid item. + size_t Popped = 0; + while (!Children.front()->reachedEnd() && Children.front()->peek() == ID && + Popped < Children.size()) { + Boost = std::max(Children.front()->consume(), Boost); + std::pop_heap(begin(Children), end(Children) - Popped++, Greater); + } + // Restore invariant. + while (Popped > 0) + std::push_heap(begin(Children), end(Children) - --Popped, Greater); + return Boost; } size_t estimateSize() const override { @@ -210,7 +229,6 @@ return OS; } - // FIXME(kbobyrev): Would storing Children in min-heap be faster? std::vector> Children; }; Index: clang-tools-extra/unittests/clangd/DexTests.cpp =================================================================== --- clang-tools-extra/unittests/clangd/DexTests.cpp +++ clang-tools-extra/unittests/clangd/DexTests.cpp @@ -255,20 +255,20 @@ } TEST(DexIterators, StringRepresentation) { - const PostingList L0({4, 7, 8, 20, 42, 100}); + const PostingList L0({0, 1, 8, 20, 42, 100}); const PostingList L1({1, 3, 5, 8, 9}); const PostingList L2({1, 5, 7, 9}); - const PostingList L3({0, 5}); + const PostingList L3({0, 1}); const PostingList L4({0, 1, 5}); const PostingList L5({}); - EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[4]"); + EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[0]"); - auto Nested = - createAnd(createAnd(L1.iterator(), L2.iterator()), - createOr(L3.iterator(), L4.iterator(), L5.iterator())); + auto Nested = createAnd( + createAnd(L1.iterator(), L2.iterator(), L4.iterator(), L0.iterator()), + createOr(L3.iterator(), L5.iterator())); - EXPECT_EQ(llvm::to_string(*Nested), "(& (| [5] [1] [END]) (& [1] [1]))"); + EXPECT_EQ(llvm::to_string(*Nested), "(& (| [1] [END]) (& [1] [1] [1] [1]))"); } TEST(DexIterators, Limit) {