diff --git a/clang-tools-extra/clangd/index/dex/Trigram.cpp b/clang-tools-extra/clangd/index/dex/Trigram.cpp --- a/clang-tools-extra/clangd/index/dex/Trigram.cpp +++ b/clang-tools-extra/clangd/index/dex/Trigram.cpp @@ -101,17 +101,43 @@ std::vector generateQueryTrigrams(llvm::StringRef Query) { if (Query.empty()) return {}; - std::string LowercaseQuery = Query.lower(); - if (Query.size() < 3) // short-query trigrams only - return {Token(Token::Kind::Trigram, LowercaseQuery)}; // Apply fuzzy matching text segmentation. std::vector Roles(Query.size()); calculateRoles(Query, llvm::makeMutableArrayRef(Roles.data(), Query.size())); + std::string LowercaseQuery = Query.lower(); + + if (LowercaseQuery.size() < 3) // short-query trigrams only. + return {Token(Token::Kind::Trigram, LowercaseQuery)}; + + unsigned ValidSymbols = + llvm::count_if(Roles, [](CharRole R) { return R == Head || R == Tail; }); + // If the query does not have any alphanumeric symbols, don't restrict the + // result to the names. + if (ValidSymbols == 0) + return {}; + // For queries with very few letters, emulate what generateIdentifierTrigrams + // outputs for the beginning of the Identifier. + if (ValidSymbols < 3) { + std::string Letters = + Roles.front() == Separator ? std::string(1, Query.front()) : ""; + for (unsigned I = 0; I < LowercaseQuery.size(); ++I) { + if (Roles[I] == Head || Roles[I] == Tail) { + Letters += LowercaseQuery[I]; + // Similar to the identifier trigram generation, stop here for the + // queries starting with the separator, i.e. "_va" will only output + // "_v" here, identifier trigram generator will output "_" and "_v" + if (Roles.front() == Separator) + break; + } + } + return {Token(Token::Kind::Trigram, Letters)}; + } + llvm::DenseSet UniqueTrigrams; std::string Chars; - for (unsigned I = 0; I < Query.size(); ++I) { + for (unsigned I = 0; I < LowercaseQuery.size(); ++I) { if (Roles[I] != Head && Roles[I] != Tail) continue; // Skip delimiters. Chars.push_back(LowercaseQuery[I]); diff --git a/clang-tools-extra/clangd/unittests/DexTests.cpp b/clang-tools-extra/clangd/unittests/DexTests.cpp --- a/clang-tools-extra/clangd/unittests/DexTests.cpp +++ b/clang-tools-extra/clangd/unittests/DexTests.cpp @@ -404,6 +404,9 @@ EXPECT_THAT(identifierTrigramTokens("IsOK"), trigramsAre({"i", "is", "io", "iso", "iok", "sok"})); + EXPECT_THAT(identifierTrigramTokens("_pb"), trigramsAre({"_", "_p"})); + EXPECT_THAT(identifierTrigramTokens("__pb"), trigramsAre({"_", "__", "_p"})); + EXPECT_THAT( identifierTrigramTokens("abc_defGhij__klm"), trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "adg", "bcd", @@ -422,6 +425,14 @@ EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"})); EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({})); + EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m_"})); + + EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"})); + EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"})); + EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"})); + EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"_p"})); + EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"_p"})); + EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"})); EXPECT_THAT(generateQueryTrigrams("clangd"), @@ -545,6 +556,18 @@ Req.Query = "ttf"; EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("OneTwoThreeFour")); EXPECT_FALSE(Incomplete) << "3-char string is not a short query"; + + I = Dex::build(generateSymbols({"tok::kw_builtin_va_arg", "bar::whatever"}), + RefSlab(), RelationSlab()); + + Req.Query = "kw_"; + EXPECT_THAT(match(*I, Req, &Incomplete), + ElementsAre("tok::kw_builtin_va_arg")); + EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol"; + Req.Scopes = {"tok::"}; + EXPECT_THAT(match(*I, Req, &Incomplete), + ElementsAre("tok::kw_builtin_va_arg")); + EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol"; } TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) {