Index: clangd/index/dex/Dex.h =================================================================== --- clangd/index/dex/Dex.h +++ clangd/index/dex/Dex.h @@ -88,6 +88,7 @@ private: void buildIndex(); + std::unique_ptr iterator(const Token &Tok) const; /// Stores symbols sorted in the descending order of symbol quality.. std::vector Symbols; Index: clangd/index/dex/Dex.cpp =================================================================== --- clangd/index/dex/Dex.cpp +++ clangd/index/dex/Dex.cpp @@ -53,7 +53,7 @@ } // Constructs BOOST iterators for Path Proximities. -std::vector> createFileProximityIterators( +std::unique_ptr createFileProximityIterator( llvm::ArrayRef ProximityPaths, llvm::ArrayRef URISchemes, const llvm::DenseMap &InvertedIndex, @@ -96,7 +96,8 @@ It->second.iterator(&It->first), PathProximitySignals.evaluate())); } } - return BoostingIterators; + BoostingIterators.push_back(Corpus.all()); + return Corpus.unionOf(std::move(BoostingIterators)); } } // namespace @@ -138,6 +139,12 @@ {TokenToPostingList.first, PostingList(TokenToPostingList.second)}); } +std::unique_ptr Dex::iterator(const Token &Tok) const { + auto It = InvertedIndex.find(Tok); + return It == InvertedIndex.end() ? Corpus.none() + : It->second.iterator(&It->first); +} + /// Constructs iterators over tokens extracted from the query and exhausts it /// while applying Callback to each symbol in the order of decreasing quality /// of the matched symbols. @@ -149,64 +156,40 @@ FuzzyMatcher Filter(Req.Query); bool More = false; - std::vector> TopLevelChildren; + std::vector> Criteria; const auto TrigramTokens = generateQueryTrigrams(Req.Query); // Generate query trigrams and construct AND iterator over all query // trigrams. std::vector> TrigramIterators; - for (const auto &Trigram : TrigramTokens) { - const auto It = InvertedIndex.find(Trigram); - if (It != InvertedIndex.end()) - TrigramIterators.push_back(It->second.iterator(&It->first)); - } - if (!TrigramIterators.empty()) - TopLevelChildren.push_back(Corpus.intersect(move(TrigramIterators))); + for (const auto &Trigram : TrigramTokens) + TrigramIterators.push_back(iterator(Trigram)); + Criteria.push_back(Corpus.intersect(move(TrigramIterators))); // Generate scope tokens for search query. std::vector> ScopeIterators; - for (const auto &Scope : Req.Scopes) { - Token Tok(Token::Kind::Scope, Scope); - const auto It = InvertedIndex.find(Tok); - if (It != InvertedIndex.end()) - ScopeIterators.push_back(It->second.iterator(&It->first)); - } - if (Req.AnyScope) + for (const auto &Scope : Req.Scopes) + ScopeIterators.push_back(iterator(Token(Token::Kind::Scope, Scope))); + if (Req.AnyScope || /*legacy*/ Req.Scopes.empty()) ScopeIterators.push_back( Corpus.boost(Corpus.all(), ScopeIterators.empty() ? 1.0 : 0.2)); + Criteria.push_back(Corpus.unionOf(move(ScopeIterators))); - // Add OR iterator for scopes if there are any Scope Iterators. - if (!ScopeIterators.empty()) - TopLevelChildren.push_back(Corpus.unionOf(move(ScopeIterators))); - - // Add proximity paths boosting. - auto BoostingIterators = createFileProximityIterators( - Req.ProximityPaths, URISchemes, InvertedIndex, Corpus); - // Boosting iterators do not actually filter symbols. In order to preserve - // the validity of resulting query, TRUE iterator should be added along - // BOOSTs. - if (!BoostingIterators.empty()) { - BoostingIterators.push_back(Corpus.all()); - TopLevelChildren.push_back(Corpus.unionOf(move(BoostingIterators))); - } + // Add proximity paths boosting (all symbols, some boosted). + Criteria.push_back(createFileProximityIterator(Req.ProximityPaths, URISchemes, + InvertedIndex, Corpus)); if (Req.RestrictForCodeCompletion) - TopLevelChildren.push_back( - InvertedIndex.find(RestrictedForCodeCompletion) - ->second.iterator(&RestrictedForCodeCompletion)); + Criteria.push_back(iterator(RestrictedForCodeCompletion)); // Use TRUE iterator if both trigrams and scopes from the query are not // present in the symbol index. - auto QueryIterator = TopLevelChildren.empty() - ? Corpus.all() - : Corpus.intersect(move(TopLevelChildren)); + auto Root = Corpus.intersect(move(Criteria)); // Retrieve more items than it was requested: some of the items with high // final score might not be retrieved otherwise. - // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as - // using 100x of the requested number might not be good in practice, e.g. - // when the requested number of items is small. - auto Root = Req.Limit ? Corpus.limit(move(QueryIterator), *Req.Limit * 100) - : move(QueryIterator); + // FIXME(kbobyrev): Tune this ratio. + if (Req.Limit) + Root = Corpus.limit(move(Root), *Req.Limit * 100); SPAN_ATTACH(Tracer, "query", llvm::to_string(*Root)); vlog("Dex query tree: {0}", *Root); Index: clangd/index/dex/Trigram.cpp =================================================================== --- clangd/index/dex/Trigram.cpp +++ clangd/index/dex/Trigram.cpp @@ -124,6 +124,9 @@ // If the number of symbols which can form fuzzy matching trigram is not // sufficient, generate a single incomplete trigram for query. if (ValidSymbolsCount < 3) { + if (LowercaseQuery.empty()) + return {}; + // TODO(sammccall): is the incomplete trigram for *invalid* chars useful? std::string Chars = LowercaseQuery.substr(0, std::min(3UL, Query.size())); Chars.append(3 - Chars.size(), END_MARKER); Index: unittests/clangd/DexTests.cpp =================================================================== --- unittests/clangd/DexTests.cpp +++ unittests/clangd/DexTests.cpp @@ -411,6 +411,7 @@ EXPECT_THAT(generateQueryTrigrams("cl"), trigramsAre({"cl$"})); EXPECT_THAT(generateQueryTrigrams("cla"), trigramsAre({"cla"})); + EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({})); EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_$$"})); EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__$"})); EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"___"})); @@ -588,6 +589,14 @@ EXPECT_THAT(match(*I, Req), UnorderedElementsAre("ns::ABC", "ns::abc")); } +TEST(DexTest, UnknownPostingList) { + // Regression test: we used to ignore unknown scopes and accept any symbol. + auto I = Dex::build(generateSymbols({"ns::ABC", "ns::abc"}), URISchemes); + FuzzyFindRequest Req; + Req.Scopes = {"ns2::"}; + EXPECT_THAT(match(*I, Req), UnorderedElementsAre()); +} + TEST(DexTest, Lookup) { auto I = Dex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes); EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));