Index: clangd/index/dex/Dex.cpp =================================================================== --- clangd/index/dex/Dex.cpp +++ clangd/index/dex/Dex.cpp @@ -16,6 +16,7 @@ #include "index/Index.h" #include "index/dex/Iterator.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Support/ScopedPrinter.h" #include #include @@ -85,13 +86,13 @@ // ProximityPaths. Boosting factor should depend on the distance to the // Proximity Path: the closer processed path is, the higher boosting factor. for (const auto &ParentURI : ParentURIs.keys()) { - const auto It = - InvertedIndex.find(Token(Token::Kind::ProximityURI, ParentURI)); + Token Tok(Token::Kind::ProximityURI, ParentURI); + const auto It = InvertedIndex.find(Tok); if (It != InvertedIndex.end()) { // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator. PathProximitySignals.SymbolURI = ParentURI; - BoostingIterators.push_back( - createBoost(It->second.iterator(), PathProximitySignals.evaluate())); + BoostingIterators.push_back(createBoost(It->second.iterator(&It->first), + PathProximitySignals.evaluate())); } } return BoostingIterators; @@ -142,7 +143,6 @@ llvm::function_ref Callback) const { assert(!StringRef(Req.Query).contains("::") && "There must be no :: in query."); - // FIXME: attach the query tree to the trace span. trace::Span Tracer("Dex fuzzyFind"); FuzzyMatcher Filter(Req.Query); bool More = false; @@ -156,7 +156,7 @@ for (const auto &Trigram : TrigramTokens) { const auto It = InvertedIndex.find(Trigram); if (It != InvertedIndex.end()) - TrigramIterators.push_back(It->second.iterator()); + TrigramIterators.push_back(It->second.iterator(&It->first)); } if (!TrigramIterators.empty()) TopLevelChildren.push_back(createAnd(move(TrigramIterators))); @@ -164,9 +164,10 @@ // Generate scope tokens for search query. std::vector> ScopeIterators; for (const auto &Scope : Req.Scopes) { - const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope)); + Token Tok(Token::Kind::Scope, Scope); + const auto It = InvertedIndex.find(Tok); if (It != InvertedIndex.end()) - ScopeIterators.push_back(It->second.iterator()); + ScopeIterators.push_back(It->second.iterator(&It->first)); } if (Req.AnyScope) ScopeIterators.push_back(createBoost(createTrue(Symbols.size()), @@ -189,7 +190,8 @@ if (Req.RestrictForCodeCompletion) TopLevelChildren.push_back( - InvertedIndex.find(RestrictedForCodeCompletion)->second.iterator()); + InvertedIndex.find(RestrictedForCodeCompletion) + ->second.iterator(&RestrictedForCodeCompletion)); // Use TRUE iterator if both trigrams and scopes from the query are not // present in the symbol index. @@ -203,6 +205,8 @@ // when the requested number of items is small. auto Root = Req.Limit ? createLimit(move(QueryIterator), *Req.Limit * 100) : move(QueryIterator); + SPAN_ATTACH(Tracer, "query", llvm::to_string(*Root)); + vlog("Dex query tree: {0}", *Root); using IDAndScore = std::pair; std::vector IDAndScores = consume(*Root); Index: clangd/index/dex/Iterator.cpp =================================================================== --- clangd/index/dex/Iterator.cpp +++ clangd/index/dex/Iterator.cpp @@ -243,8 +243,7 @@ private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { - OS << "(TRUE {" << Index << "} out of " << Size << ")"; - return OS; + return OS << "true"; } DocID Index = 0; @@ -273,8 +272,7 @@ private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { - OS << "(BOOST " << Factor << ' ' << *Child << ')'; - return OS; + return OS << "(* " << Factor << ' ' << *Child << ')'; } std::unique_ptr Child; @@ -314,8 +312,7 @@ private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { - OS << "(LIMIT " << Limit << '(' << ItemsLeft << ") " << *Child << ')'; - return OS; + return OS << "(LIMIT " << Limit << " " << *Child << ')'; } std::unique_ptr Child; Index: clangd/index/dex/PostingList.h =================================================================== --- clangd/index/dex/PostingList.h +++ clangd/index/dex/PostingList.h @@ -33,8 +33,7 @@ namespace clang { namespace clangd { namespace dex { - -class Iterator; +struct Token; /// NOTE: This is an implementation detail. /// @@ -64,7 +63,8 @@ /// Constructs DocumentIterator over given posting list. DocumentIterator will /// go through the chunks and decompress them on-the-fly when necessary. - std::unique_ptr iterator() const; + /// If given, Tok is only used for the string representation. + std::unique_ptr iterator(const Token *Tok = nullptr) const; /// Returns in-memory size of external storage. size_t bytes() const { return Chunks.capacity() * sizeof(Chunk); } Index: clangd/index/dex/PostingList.cpp =================================================================== --- clangd/index/dex/PostingList.cpp +++ clangd/index/dex/PostingList.cpp @@ -9,6 +9,7 @@ #include "PostingList.h" #include "Iterator.h" +#include "Token.h" #include "llvm/Support/Error.h" #include "llvm/Support/MathExtras.h" @@ -23,8 +24,8 @@ /// them on-the-fly when the contents of chunk are to be seen. class ChunkIterator : public Iterator { public: - explicit ChunkIterator(llvm::ArrayRef Chunks) - : Chunks(Chunks), CurrentChunk(Chunks.begin()) { + explicit ChunkIterator(const Token *Tok, llvm::ArrayRef Chunks) + : Tok(Tok), Chunks(Chunks), CurrentChunk(Chunks.begin()) { if (!Chunks.empty()) { DecompressedChunk = CurrentChunk->decompress(); CurrentID = DecompressedChunk.begin(); @@ -71,15 +72,16 @@ private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + if (Tok != nullptr) + return OS << *Tok; OS << '['; - if (CurrentChunk != Chunks.begin() || - (CurrentID != DecompressedChunk.begin() && !DecompressedChunk.empty())) - OS << "... "; - OS << (reachedEnd() ? "END" : std::to_string(*CurrentID)); - if (!reachedEnd() && CurrentID < DecompressedChunk.end() - 1) - OS << " ..."; - OS << ']'; - return OS; + const char *Sep = ""; + for (const Chunk &C : Chunks) + for (const DocID Doc : C.decompress()) { + OS << Sep << Doc; + Sep = " "; + } + return OS << ']'; } /// If the cursor is at the end of a chunk, place it at the start of the next @@ -110,6 +112,7 @@ } } + const Token *Tok; llvm::ArrayRef Chunks; /// Iterator over chunks. /// If CurrentChunk is valid, then DecompressedChunk is @@ -217,8 +220,8 @@ PostingList::PostingList(llvm::ArrayRef Documents) : Chunks(encodeStream(Documents)) {} -std::unique_ptr PostingList::iterator() const { - return llvm::make_unique(Chunks); +std::unique_ptr PostingList::iterator(const Token *Tok) const { + return llvm::make_unique(Tok, Chunks); } } // namespace dex Index: clangd/index/dex/Token.h =================================================================== --- clangd/index/dex/Token.h +++ clangd/index/dex/Token.h @@ -82,6 +82,20 @@ Kind TokenKind; friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) { + switch (T.TokenKind) { + case Kind::Trigram: + OS << "T="; + break; + case Kind::Scope: + OS << "S="; + break; + case Kind::ProximityURI: + OS << "U="; + break; + case Kind::Sentinel: + OS << "?="; + break; + } return OS << T.Data; } Index: unittests/clangd/DexTests.cpp =================================================================== --- unittests/clangd/DexTests.cpp +++ unittests/clangd/DexTests.cpp @@ -226,18 +226,17 @@ } TEST(DexIterators, StringRepresentation) { - const PostingList L0({4, 7, 8, 20, 42, 100}); - const PostingList L1({1, 3, 5, 8, 9}); - const PostingList L2({1, 5, 7, 9}); - const PostingList L3({0, 5}); - const PostingList L4({0, 1, 5}); - - EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[4 ...]"); - auto It = L0.iterator(); - It->advanceTo(19); - EXPECT_EQ(llvm::to_string(*It), "[... 20 ...]"); - It->advanceTo(9000); - EXPECT_EQ(llvm::to_string(*It), "[... END]"); + const PostingList L1({1, 3, 5}); + const PostingList L2({1, 7, 9}); + auto I1 = L1.iterator(); + Token Tok(Token::Kind::Trigram, "L2"); + auto I2 = L1.iterator(&Tok); + + EXPECT_EQ(llvm::to_string(*I1), "[1 3 5]"); + EXPECT_EQ(llvm::to_string(*I2), "T=L2"); + + auto Tree = createLimit(createAnd(move(I1), move(I2)), 10); + EXPECT_EQ(llvm::to_string(*Tree), "(LIMIT 10 (& [1 3 5] T=L2))"); } TEST(DexIterators, Limit) {