Index: clang-tools-extra/clangd/index/dex/Trigram.h =================================================================== --- clang-tools-extra/clangd/index/dex/Trigram.h +++ clang-tools-extra/clangd/index/dex/Trigram.h @@ -62,7 +62,11 @@ /// /// For short queries (less than 3 characters with Head or Tail roles in Fuzzy /// Matching segmentation) this returns a single trigram with the first -/// characters (up to 3) to perfrom prefix match. +/// characters (up to 3) to perfrom prefix match. However, if the query is short +/// but it contains two HEAD symbols then the returned trigram would be an +/// incomplete bigram with those two HEADs (unless query starts with '_' which +/// is treated as an additional information). This would help to match +/// "unique_ptr" and similar symbols with "u_p" query std::vector generateQueryTrigrams(llvm::StringRef Query); } // namespace dex Index: clang-tools-extra/clangd/index/dex/Trigram.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/Trigram.cpp +++ clang-tools-extra/clangd/index/dex/Trigram.cpp @@ -116,10 +116,16 @@ // Additional pass is necessary to count valid identifier characters. // Depending on that, this function might return incomplete trigram. + unsigned Heads = 0; unsigned ValidSymbolsCount = 0; - for (size_t I = 0; I < Roles.size(); ++I) - if (Roles[I] == Head || Roles[I] == Tail) + for (size_t I = 0; I < Roles.size(); ++I) { + if (Roles[I] == Head) { + ++ValidSymbolsCount; + ++Heads; + } else if (Roles[I] == Tail) { ++ValidSymbolsCount; + } + } std::string LowercaseQuery = Query.lower(); @@ -128,9 +134,21 @@ // If the number of symbols which can form fuzzy matching trigram is not // sufficient, generate a single incomplete trigram for query. if (ValidSymbolsCount < 3) { - std::string Chars = - LowercaseQuery.substr(0, std::min(3UL, Query.size())); - Chars.append(3 - Chars.size(), END_MARKER); + std::string Chars; + // If the query is not long enough to form a trigram but contains two heads + // the returned trigram should be "xy$" where "x" and "y" are the heads. + // This might be particulary important for cases like "u_p" to match + // "unique_ptr" and similar symbols from the C++ Standard Library. + if (Heads == 2 && !Query.startswith("_")) { + for (size_t I = 0; I < LowercaseQuery.size(); ++I) + if (Roles[I] == Head) + Chars += LowercaseQuery[I]; + + Chars += END_MARKER; + } else { + Chars = LowercaseQuery.substr(0, std::min(3UL, Query.size())); + Chars.append(3 - Chars.size(), END_MARKER); + } UniqueTrigrams.insert(Token(Token::Kind::Trigram, Chars)); } else { std::deque Chars; Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp =================================================================== --- clang-tools-extra/unittests/clangd/DexIndexTests.cpp +++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp @@ -321,6 +321,9 @@ EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__$"})); EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"___"})); + EXPECT_THAT(generateQueryTrigrams("u_p"), trigramsAre({"up$"})); + EXPECT_THAT(generateQueryTrigrams("_u_p"), trigramsAre({"_u_"})); + EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"})); EXPECT_THAT(generateQueryTrigrams("clangd"),