Index: clangd/index/dex/Dex.h =================================================================== --- clangd/index/dex/Dex.h +++ clangd/index/dex/Dex.h @@ -87,6 +87,7 @@ private: void buildIndex(); + std::unique_ptr iterator(const Token &Tok) const; /// Stores symbols sorted in the descending order of symbol quality.. std::vector Symbols; Index: clangd/index/dex/Dex.cpp =================================================================== --- clangd/index/dex/Dex.cpp +++ clangd/index/dex/Dex.cpp @@ -62,7 +62,7 @@ } // Constructs BOOST iterators for Path Proximities. -std::vector> createFileProximityIterators( +std::unique_ptr createFileProximityIterator( llvm::ArrayRef ProximityPaths, llvm::ArrayRef URISchemes, const llvm::DenseMap &InvertedIndex, @@ -105,7 +105,8 @@ It->second.iterator(&It->first), PathProximitySignals.evaluate())); } } - return BoostingIterators; + BoostingIterators.push_back(Corpus.all()); + return Corpus.unionOf(std::move(BoostingIterators)); } } // namespace @@ -147,6 +148,12 @@ {TokenToPostingList.first, PostingList(TokenToPostingList.second)}); } +std::unique_ptr Dex::iterator(const Token &Tok) const { + auto It = InvertedIndex.find(Tok); + return It == InvertedIndex.end() ? Corpus.none() + : It->second.iterator(&It->first); +} + /// Constructs iterators over tokens extracted from the query and exhausts it /// while applying Callback to each symbol in the order of decreasing quality /// of the matched symbols. @@ -160,64 +167,40 @@ // Prevent clients from postfiltering them for longer queries. bool More = !Req.Query.empty() && Req.Query.size() < 3; - std::vector> TopLevelChildren; + std::vector> Criteria; const auto TrigramTokens = generateQueryTrigrams(Req.Query); // Generate query trigrams and construct AND iterator over all query // trigrams. std::vector> TrigramIterators; - for (const auto &Trigram : TrigramTokens) { - const auto It = InvertedIndex.find(Trigram); - if (It != InvertedIndex.end()) - TrigramIterators.push_back(It->second.iterator(&It->first)); - } - if (!TrigramIterators.empty()) - TopLevelChildren.push_back(Corpus.intersect(move(TrigramIterators))); + for (const auto &Trigram : TrigramTokens) + TrigramIterators.push_back(iterator(Trigram)); + Criteria.push_back(Corpus.intersect(move(TrigramIterators))); // Generate scope tokens for search query. std::vector> ScopeIterators; - for (const auto &Scope : Req.Scopes) { - Token Tok(Token::Kind::Scope, Scope); - const auto It = InvertedIndex.find(Tok); - if (It != InvertedIndex.end()) - ScopeIterators.push_back(It->second.iterator(&It->first)); - } - if (Req.AnyScope) + for (const auto &Scope : Req.Scopes) + ScopeIterators.push_back(iterator(Token(Token::Kind::Scope, Scope))); + if (Req.AnyScope || /*legacy*/ Req.Scopes.empty()) ScopeIterators.push_back( Corpus.boost(Corpus.all(), ScopeIterators.empty() ? 1.0 : 0.2)); + Criteria.push_back(Corpus.unionOf(move(ScopeIterators))); - // Add OR iterator for scopes if there are any Scope Iterators. - if (!ScopeIterators.empty()) - TopLevelChildren.push_back(Corpus.unionOf(move(ScopeIterators))); - - // Add proximity paths boosting. - auto BoostingIterators = createFileProximityIterators( - Req.ProximityPaths, URISchemes, InvertedIndex, Corpus); - // Boosting iterators do not actually filter symbols. In order to preserve - // the validity of resulting query, TRUE iterator should be added along - // BOOSTs. - if (!BoostingIterators.empty()) { - BoostingIterators.push_back(Corpus.all()); - TopLevelChildren.push_back(Corpus.unionOf(move(BoostingIterators))); - } + // Add proximity paths boosting (all symbols, some boosted). + Criteria.push_back(createFileProximityIterator(Req.ProximityPaths, URISchemes, + InvertedIndex, Corpus)); if (Req.RestrictForCodeCompletion) - TopLevelChildren.push_back( - InvertedIndex.find(RestrictedForCodeCompletion) - ->second.iterator(&RestrictedForCodeCompletion)); + Criteria.push_back(iterator(RestrictedForCodeCompletion)); // Use TRUE iterator if both trigrams and scopes from the query are not // present in the symbol index. - auto QueryIterator = TopLevelChildren.empty() - ? Corpus.all() - : Corpus.intersect(move(TopLevelChildren)); + auto Root = Corpus.intersect(move(Criteria)); // Retrieve more items than it was requested: some of the items with high // final score might not be retrieved otherwise. - // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as - // using 100x of the requested number might not be good in practice, e.g. - // when the requested number of items is small. - auto Root = Req.Limit ? Corpus.limit(move(QueryIterator), *Req.Limit * 100) - : move(QueryIterator); + // FIXME(kbobyrev): Tune this ratio. + if (Req.Limit) + Root = Corpus.limit(move(Root), *Req.Limit * 100); SPAN_ATTACH(Tracer, "query", llvm::to_string(*Root)); vlog("Dex query tree: {0}", *Root); Index: clangd/index/dex/Iterator.h =================================================================== --- clangd/index/dex/Iterator.h +++ clangd/index/dex/Iterator.h @@ -106,7 +106,6 @@ Iterator(Kind MyKind = Kind::Other) : MyKind(MyKind) {} private: - Kind MyKind; virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0; Kind MyKind; }; Index: clangd/index/dex/Trigram.cpp =================================================================== --- clangd/index/dex/Trigram.cpp +++ clangd/index/dex/Trigram.cpp @@ -87,6 +87,8 @@ } std::vector generateQueryTrigrams(llvm::StringRef Query) { + if (Query.empty()) + return {}; std::string LowercaseQuery = Query.lower(); if (Query.size() < 3) // short-query trigrams only return {Token(Token::Kind::Trigram, LowercaseQuery)}; Index: unittests/clangd/DexTests.cpp =================================================================== --- unittests/clangd/DexTests.cpp +++ unittests/clangd/DexTests.cpp @@ -408,6 +408,7 @@ EXPECT_THAT(generateQueryTrigrams("cl"), trigramsAre({"cl"})); EXPECT_THAT(generateQueryTrigrams("cla"), trigramsAre({"cla"})); + EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({})); EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"})); EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"})); EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({})); @@ -526,8 +527,6 @@ UnorderedElementsAre("LaughingOutLoud", "LittleOldLady")); } -// TODO(sammccall): enable after D52796 bugfix. -#if 0 TEST(DexTest, ShortQuery) { auto I = Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(), URISchemes); @@ -549,7 +548,6 @@ EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("OneTwoThreeFour")); EXPECT_FALSE(Incomplete) << "3-char string is not a short query"; } -#endif TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) { auto I = Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), RefSlab(), @@ -614,6 +612,15 @@ EXPECT_THAT(match(*I, Req), UnorderedElementsAre("ns::ABC", "ns::abc")); } +TEST(DexTest, UnknownPostingList) { + // Regression test: we used to ignore unknown scopes and accept any symbol. + auto I = Dex::build(generateSymbols({"ns::ABC", "ns::abc"}), RefSlab(), + URISchemes); + FuzzyFindRequest Req; + Req.Scopes = {"ns2::"}; + EXPECT_THAT(match(*I, Req), UnorderedElementsAre()); +} + TEST(DexTest, Lookup) { auto I = Dex::build(generateSymbols({"ns::abc", "ns::xyz"}), RefSlab(), URISchemes); EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));