Index: clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp =================================================================== --- clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp +++ clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp @@ -128,10 +128,10 @@ // using 100x of the requested number might not be good in practice, e.g. // when the requested number of items is small. const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount; + auto Root = createLimit(move(QueryIterator), ItemsToRetrieve); // FIXME(kbobyrev): Add boosting to the query and utilize retrieved // boosting scores. - std::vector> SymbolDocIDs = - consume(*QueryIterator, ItemsToRetrieve); + std::vector> SymbolDocIDs = consume(*Root); // Retrieve top Req.MaxCandidateCount items. std::priority_queue> Top; Index: clang-tools-extra/trunk/clangd/index/dex/Iterator.h =================================================================== --- clang-tools-extra/trunk/clangd/index/dex/Iterator.h +++ clang-tools-extra/trunk/clangd/index/dex/Iterator.h @@ -87,13 +87,14 @@ /// /// Note: reachedEnd() must be false. virtual DocID peek() const = 0; - /// Retrieves boosting score. Query tree root should pass Root->peek() to this - /// function, the parameter is needed to propagate through the tree. Given ID - /// should be compared against BOOST iterator peek()s: some of the iterators - /// would not point to the item which was propagated to the top of the query - /// tree (e.g. if these iterators are branches of OR iterator) and hence - /// shouldn't apply any boosting to the consumed item. - virtual float consume(DocID ID) = 0; + /// Informs the iterator that the current document was consumed, and returns + /// its boost. + /// + /// Note: If this iterator has any child iterators that contain the document, + /// consume() should be called on those and their boosts incorporated. + /// consume() must *not* be called on children that don't contain the current + /// doc. + virtual float consume() = 0; virtual ~Iterator() {} @@ -118,17 +119,15 @@ virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0; }; -/// Advances the iterator until it is either exhausted or the number of -/// requested items is reached. Returns pairs of document IDs with the -/// corresponding boosting score. +/// Advances the iterator until it is exhausted. Returns pairs of document IDs +/// with the corresponding boosting score. /// /// Boosting can be seen as a compromise between retrieving too many items and /// calculating finals score for each of them (which might be very expensive) /// and not retrieving enough items so that items with very high final score /// would not be processed. Boosting score is a computationally efficient way /// to acquire preliminary scores of requested items. -std::vector> -consume(Iterator &It, size_t Limit = std::numeric_limits::max()); +std::vector> consume(Iterator &It); /// Returns a document iterator over given PostingList. /// @@ -165,6 +164,13 @@ std::unique_ptr createBoost(std::unique_ptr Child, float Factor); +/// Returns LIMIT iterator, which yields up to N elements of its child iterator. +/// Elements only count towards the limit if they are part of the final result +/// set. Therefore the following iterator (AND (2) (LIMIT (1 2) 1)) yields (2), +/// not (). +std::unique_ptr createLimit(std::unique_ptr Child, + size_t Limit); + /// This allows createAnd(create(...), create(...)) syntax. template std::unique_ptr createAnd(Args... args) { std::vector> Children; Index: clang-tools-extra/trunk/clangd/index/dex/Iterator.cpp =================================================================== --- clang-tools-extra/trunk/clangd/index/dex/Iterator.cpp +++ clang-tools-extra/trunk/clangd/index/dex/Iterator.cpp @@ -46,7 +46,7 @@ return *Index; } - float consume(DocID ID) override { return DEFAULT_BOOST_SCORE; } + float consume() override { return DEFAULT_BOOST_SCORE; } private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { @@ -105,16 +105,12 @@ DocID peek() const override { return Children.front()->peek(); } - // If not exhausted and points to the given item, consume() returns the - // product of Children->consume(ID). Otherwise, DEFAULT_BOOST_SCORE is - // returned. - float consume(DocID ID) override { - if (reachedEnd() || peek() != ID) - return DEFAULT_BOOST_SCORE; + float consume() override { + assert(!reachedEnd() && "AndIterator can't consume() at the end."); return std::accumulate( begin(Children), end(Children), DEFAULT_BOOST_SCORE, [&](float Current, const std::unique_ptr &Child) { - return Current * Child->consume(ID); + return Current * Child->consume(); }); } @@ -226,15 +222,16 @@ // Returns the maximum boosting score among all Children when iterator is not // exhausted and points to the given ID, DEFAULT_BOOST_SCORE otherwise. - float consume(DocID ID) override { - if (reachedEnd() || peek() != ID) - return DEFAULT_BOOST_SCORE; + float consume() override { + assert(!reachedEnd() && + "OrIterator can't consume() after it reached the end."); + const DocID ID = peek(); return std::accumulate( begin(Children), end(Children), DEFAULT_BOOST_SCORE, - [&](float Current, const std::unique_ptr &Child) { + [&](float Boost, const std::unique_ptr &Child) { return (!Child->reachedEnd() && Child->peek() == ID) - ? std::max(Current, Child->consume(ID)) - : Current; + ? std::max(Boost, Child->consume()) + : Boost; }); } @@ -278,7 +275,7 @@ return Index; } - float consume(DocID) override { return DEFAULT_BOOST_SCORE; } + float consume() override { return DEFAULT_BOOST_SCORE; } private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { @@ -306,7 +303,7 @@ DocID peek() const override { return Child->peek(); } - float consume(DocID ID) override { return Child->consume(ID) * Factor; } + float consume() override { return Child->consume() * Factor; } private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { @@ -318,15 +315,50 @@ float Factor; }; +/// This iterator limits the number of items retrieved from the child iterator +/// on top of the query tree. To ensure that query tree with LIMIT iterators +/// inside works correctly, users have to call Root->consume(Root->peek()) each +/// time item is retrieved at the root of query tree. +class LimitIterator : public Iterator { +public: + LimitIterator(std::unique_ptr Child, size_t Limit) + : Child(move(Child)), Limit(Limit), ItemsLeft(Limit) {} + + bool reachedEnd() const override { + return ItemsLeft == 0 || Child->reachedEnd(); + } + + void advance() override { Child->advance(); } + + void advanceTo(DocID ID) override { Child->advanceTo(ID); } + + DocID peek() const override { return Child->peek(); } + + /// Decreases the limit in case the element consumed at top of the query tree + /// comes from the underlying iterator. + float consume() override { + assert(!reachedEnd() && "LimitIterator can't consume at the end."); + --ItemsLeft; + return Child->consume(); + } + +private: + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << "(LIMIT " << Limit << '(' << ItemsLeft << ") " << *Child << ')'; + return OS; + } + + std::unique_ptr Child; + size_t Limit; + size_t ItemsLeft; +}; + } // end namespace -std::vector> consume(Iterator &It, size_t Limit) { +std::vector> consume(Iterator &It) { std::vector> Result; - for (size_t Retrieved = 0; !It.reachedEnd() && Retrieved < Limit; - It.advance(), ++Retrieved) { - DocID Document = It.peek(); - Result.push_back(std::make_pair(Document, It.consume(Document))); - } + for (; !It.reachedEnd(); It.advance()) + Result.push_back(std::make_pair(It.peek(), It.consume())); return Result; } @@ -353,6 +385,11 @@ return llvm::make_unique(move(Child), Factor); } +std::unique_ptr createLimit(std::unique_ptr Child, + size_t Size) { + return llvm::make_unique(move(Child), Size); +} + } // namespace dex } // namespace clangd } // namespace clang Index: clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp @@ -29,9 +29,8 @@ namespace dex { namespace { -std::vector -consumeIDs(Iterator &It, size_t Limit = std::numeric_limits::max()) { - auto IDAndScore = consume(It, Limit); +std::vector consumeIDs(Iterator &It) { + auto IDAndScore = consume(It); std::vector IDs(IDAndScore.size()); for (size_t I = 0; I < IDAndScore.size(); ++I) IDs[I] = IDAndScore[I].first; @@ -234,13 +233,13 @@ Root->advanceTo(1); Root->advanceTo(0); EXPECT_EQ(Root->peek(), 1U); - auto ElementBoost = Root->consume(Root->peek()); + auto ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, 6); Root->advance(); EXPECT_EQ(Root->peek(), 5U); Root->advanceTo(5); EXPECT_EQ(Root->peek(), 5U); - ElementBoost = Root->consume(Root->peek()); + ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, 8); Root->advanceTo(9000); EXPECT_TRUE(Root->reachedEnd()); @@ -265,24 +264,23 @@ } TEST(DexIndexIterators, Limit) { - const PostingList L0 = {4, 7, 8, 20, 42, 100}; - const PostingList L1 = {1, 3, 5, 8, 9}; - const PostingList L2 = {1, 5, 7, 9}; - const PostingList L3 = {0, 5}; - const PostingList L4 = {0, 1, 5}; - const PostingList L5; - - auto DocIterator = create(L0); - EXPECT_THAT(consumeIDs(*DocIterator, 42), ElementsAre(4, 7, 8, 20, 42, 100)); - - DocIterator = create(L0); - EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(4, 7, 8, 20, 42, 100)); - - DocIterator = create(L0); - EXPECT_THAT(consumeIDs(*DocIterator, 3), ElementsAre(4, 7, 8)); - - DocIterator = create(L0); - EXPECT_THAT(consumeIDs(*DocIterator, 0), ElementsAre()); + const PostingList L0 = {3, 6, 7, 20, 42, 100}; + const PostingList L1 = {1, 3, 5, 6, 7, 30, 100}; + const PostingList L2 = {0, 3, 5, 7, 8, 100}; + + auto DocIterator = createLimit(create(L0), 42); + EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100)); + + DocIterator = createLimit(create(L0), 3); + EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7)); + + DocIterator = createLimit(create(L0), 0); + EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre()); + + auto AndIterator = + createAnd(createLimit(createTrue(9000), 343), createLimit(create(L0), 2), + createLimit(create(L1), 3), createLimit(create(L2), 42)); + EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7)); } TEST(DexIndexIterators, True) { @@ -301,7 +299,7 @@ TEST(DexIndexIterators, Boost) { auto BoostIterator = createBoost(createTrue(5U), 42U); EXPECT_FALSE(BoostIterator->reachedEnd()); - auto ElementBoost = BoostIterator->consume(BoostIterator->peek()); + auto ElementBoost = BoostIterator->consume(); EXPECT_THAT(ElementBoost, 42U); const PostingList L0 = {2, 4}; @@ -309,20 +307,20 @@ auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U), createBoost(create(L1), 3U)); - ElementBoost = Root->consume(Root->peek()); + ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE); Root->advance(); EXPECT_THAT(Root->peek(), 1U); - ElementBoost = Root->consume(Root->peek()); + ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, 3); Root->advance(); EXPECT_THAT(Root->peek(), 2U); - ElementBoost = Root->consume(Root->peek()); + ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, 2); Root->advanceTo(4); - ElementBoost = Root->consume(Root->peek()); + ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, 3); }