Index: clangd/index/dex/Iterator.h =================================================================== --- clangd/index/dex/Iterator.h +++ clangd/index/dex/Iterator.h @@ -95,6 +95,8 @@ /// consume() must *not* be called on children that don't contain the current /// doc. virtual float consume() = 0; + /// Returns an estimate of advance() calls before the iterator is exhausted. + virtual size_t estimateSize() const = 0; virtual ~Iterator() {} Index: clangd/index/dex/Iterator.cpp =================================================================== --- clangd/index/dex/Iterator.cpp +++ clangd/index/dex/Iterator.cpp @@ -48,6 +48,8 @@ float consume() override { return DEFAULT_BOOST_SCORE; } + size_t estimateSize() const override { return Documents.size(); } + private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { OS << '['; @@ -85,6 +87,18 @@ assert(!Children.empty() && "AndIterator should have at least one child."); // Establish invariants. sync(); + // When children are sorted by the estimateSize(), sync() calls are more + // effective. Each sync() starts with the first child and makes sure all + // children point to the same element. If any child is "above" the previous + // ones, the algorithm resets and and advances the children to the next + // highest element starting from the front. When child iterators in the + // beginning have smaller estimated size, the sync() will have less restarts + // and become more effective. + std::sort(begin(Children), end(Children), + [](const std::unique_ptr &LHS, + const std::unique_ptr &RHS) { + return LHS->estimateSize() < RHS->estimateSize(); + }); } bool reachedEnd() const override { return ReachedEnd; } @@ -114,6 +128,10 @@ }); } + size_t estimateSize() const override { + return Children.front()->estimateSize(); + } + private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { OS << "(& "; @@ -146,9 +164,6 @@ return; // If any child goes beyond given ID (i.e. ID is not the common item), // all children should be advanced to the next common item. - // FIXME(kbobyrev): This is not a very optimized version; after costs - // are introduced, cycle should break whenever ID exceeds current one - // and cheapest children should be advanced over again. if (Child->peek() > SyncID) { SyncID = Child->peek(); NeedsAdvance = true; @@ -178,6 +193,7 @@ OrIterator(std::vector> AllChildren) : Children(std::move(AllChildren)) { assert(Children.size() > 0 && "Or Iterator must have at least one child."); + std::sort(begin(Children), end(Children)); } /// Returns true if all children are exhausted. @@ -235,6 +251,14 @@ }); } + size_t estimateSize() const override { + return std::accumulate( + begin(Children), end(Children), Children.front()->estimateSize(), + [&](size_t Current, const std::unique_ptr &Child) { + return std::max(Current, Child->estimateSize()); + }); + } + private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { OS << "(| "; @@ -277,6 +301,8 @@ float consume() override { return DEFAULT_BOOST_SCORE; } + size_t estimateSize() const override { return Size; } + private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { OS << "(TRUE {" << Index << "} out of " << Size << ")"; @@ -305,6 +331,8 @@ float consume() override { return Child->consume() * Factor; } + size_t estimateSize() const override { return Child->estimateSize(); } + private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { OS << "(BOOST " << Factor << ' ' << *Child << ')'; @@ -342,6 +370,10 @@ return Child->consume(); } + size_t estimateSize() const override { + return std::min(Child->estimateSize(), Limit); + } + private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { OS << "(LIMIT " << Limit << '(' << ItemsLeft << ") " << *Child << ')'; Index: unittests/clangd/DexIndexTests.cpp =================================================================== --- unittests/clangd/DexIndexTests.cpp +++ unittests/clangd/DexIndexTests.cpp @@ -259,8 +259,8 @@ createOr(create(L3), create(L4), create(L5))); EXPECT_EQ(llvm::to_string(*Nested), - "(& (& [{1}, 3, 5, 8, 9, END] [{1}, 5, 7, 9, END]) (| [0, {5}, " - "END] [0, {1}, 5, END] [{END}]))"); + "(& (| [{END}] [0, {5}, END] [0, {1}, 5, END]) (& [{1}, 5, 7, 9, " + "END] [{1}, 3, 5, 8, 9, END]))"); } TEST(DexIndexIterators, Limit) {