Index: clangd/FileDistance.h =================================================================== --- clangd/FileDistance.h +++ clangd/FileDistance.h @@ -37,6 +37,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H + #include "URI.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" @@ -107,3 +110,5 @@ } // namespace clangd } // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H Index: clangd/URI.h =================================================================== --- clangd/URI.h +++ clangd/URI.h @@ -45,6 +45,11 @@ static llvm::Expected create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme); + // Similar to above except this uses the first scheme in \p Schemes that + // works. + static llvm::Expected create(llvm::StringRef AbsolutePath, + const std::vector &Schemes); + /// This creates a file:// URI for \p AbsolutePath. The path must be absolute. static URI createFile(llvm::StringRef AbsolutePath); Index: clangd/URI.cpp =================================================================== --- clangd/URI.cpp +++ clangd/URI.cpp @@ -181,6 +181,26 @@ return S->get()->uriFromAbsolutePath(AbsolutePath); } +llvm::Expected URI::create(llvm::StringRef AbsolutePath, + const std::vector &Schemes) { + if (!llvm::sys::path::is_absolute(AbsolutePath)) + return make_string_error("Not a valid absolute path: " + AbsolutePath); + for (const auto &Scheme : Schemes) { + auto URI = URI::create(AbsolutePath, Scheme); + // For some paths, conversion to different URI schemes is impossible. These + // should be just skipped. + if (!URI) { + // Ignore the error. + llvm::consumeError(URI.takeError()); + continue; + } + return URI; + } + return make_string_error( + "Couldn't convert " + AbsolutePath + + " to any given scheme: " + llvm::join(Schemes, ", ")); +} + URI URI::createFile(llvm::StringRef AbsolutePath) { auto U = create(AbsolutePath, "file"); if (!U) Index: clangd/index/SymbolYAML.h =================================================================== --- clangd/index/SymbolYAML.h +++ clangd/index/SymbolYAML.h @@ -45,6 +45,7 @@ // The size of global symbols should be relatively small, so that all symbols // can be managed in memory. std::unique_ptr loadIndex(llvm::StringRef SymbolFilename, + llvm::ArrayRef URISchemes, bool UseDex = true); } // namespace clangd Index: clangd/index/SymbolYAML.cpp =================================================================== --- clangd/index/SymbolYAML.cpp +++ clangd/index/SymbolYAML.cpp @@ -185,6 +185,7 @@ } std::unique_ptr loadIndex(llvm::StringRef SymbolFilename, + llvm::ArrayRef URISchemes, bool UseDex) { trace::Span OverallTracer("LoadIndex"); auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); @@ -209,7 +210,7 @@ if (!Slab) return nullptr; trace::Span Tracer("BuildIndex"); - return UseDex ? dex::DexIndex::build(std::move(*Slab)) + return UseDex ? dex::DexIndex::build(std::move(*Slab), URISchemes) : MemIndex::build(std::move(*Slab), RefSlab()); } Index: clangd/index/dex/DexIndex.h =================================================================== --- clangd/index/dex/DexIndex.h +++ clangd/index/dex/DexIndex.h @@ -39,22 +39,26 @@ class DexIndex : public SymbolIndex { public: // All symbols must outlive this index. - template DexIndex(Range &&Symbols) { + template + DexIndex(Range &&Symbols, llvm::ArrayRef URISchemes) + : URISchemes(URISchemes) { for (auto &&Sym : Symbols) this->Symbols.push_back(&Sym); buildIndex(); } // Symbols are owned by BackingData, Index takes ownership. template - DexIndex(Range &&Symbols, Payload &&BackingData) - : DexIndex(std::forward(Symbols)) { + DexIndex(Range &&Symbols, Payload &&BackingData, + llvm::ArrayRef URISchemes) + : DexIndex(std::forward(Symbols), URISchemes) { KeepAlive = std::shared_ptr( std::make_shared(std::move(BackingData)), nullptr); } /// Builds an index from a slab. The index takes ownership of the slab. - static std::unique_ptr build(SymbolSlab Slab) { - return llvm::make_unique(Slab, std::move(Slab)); + static std::unique_ptr + build(SymbolSlab Slab, llvm::ArrayRef URISchemes) { + return llvm::make_unique(Slab, std::move(Slab), URISchemes); } bool @@ -72,21 +76,31 @@ private: void buildIndex(); + /// Stores symbols sorted in the descending order of symbol quality.. std::vector Symbols; + /// SymbolQuality[I] is the quality of Symbols[I]. + std::vector SymbolQuality; llvm::DenseMap LookupTable; - llvm::DenseMap SymbolQuality; - // Inverted index is a mapping from the search token to the posting list, - // which contains all items which can be characterized by such search token. - // For example, if the search token is scope "std::", the corresponding - // posting list would contain all indices of symbols defined in namespace std. - // Inverted index is used to retrieve posting lists which are processed during - // the fuzzyFind process. + /// Inverted index is a mapping from the search token to the posting list, + /// which contains all items which can be characterized by such search token. + /// For example, if the search token is scope "std::", the corresponding + /// posting list would contain all indices of symbols defined in namespace + /// std. Inverted index is used to retrieve posting lists which are processed + /// during the fuzzyFind process. llvm::DenseMap InvertedIndex; std::shared_ptr KeepAlive; // poor man's move-only std::any + + const std::vector URISchemes; }; +/// Returns Search Token for a number of parent directories of given Path. +/// Should be used within the index build process. +/// +/// This function is exposed for testing only. +std::vector generateProximityURIs(llvm::StringRef URIPath); + } // namespace dex } // namespace clangd } // namespace clang -#endif +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H Index: clangd/index/dex/DexIndex.cpp =================================================================== --- clangd/index/dex/DexIndex.cpp +++ clangd/index/dex/DexIndex.cpp @@ -8,8 +8,11 @@ //===----------------------------------------------------------------------===// #include "DexIndex.h" +#include "../../FileDistance.h" #include "../../FuzzyMatch.h" #include "../../Logger.h" +#include "../../Quality.h" +#include "llvm/ADT/StringSet.h" #include #include @@ -26,28 +29,87 @@ // Returns the tokens which are given symbols's characteristics. For example, // trigrams and scopes. // FIXME(kbobyrev): Support more token types: -// * Path proximity // * Types +// * Namespace proximity std::vector generateSearchTokens(const Symbol &Sym) { std::vector Result = generateIdentifierTrigrams(Sym.Name); Result.emplace_back(Token::Kind::Scope, Sym.Scope); + // Skip token generation for symbols with unknown declaration location. + if (!Sym.CanonicalDeclaration.FileURI.empty()) + for (const auto &ProximityURI : + generateProximityURIs(Sym.CanonicalDeclaration.FileURI)) + Result.emplace_back(Token::Kind::ProximityURI, ProximityURI); return Result; } +// Constructs BOOST iterators for Path Proximities. +std::vector> createFileProximityIterators( + llvm::ArrayRef ProximityPaths, + llvm::ArrayRef URISchemes, + const llvm::DenseMap &InvertedIndex) { + std::vector> BoostingIterators; + // Deduplicate parent URIs extracted from the ProximityPaths. + llvm::StringSet<> ParentURIs; + llvm::StringMap Sources; + for (const auto &Path : ProximityPaths) { + Sources[Path] = SourceParams(); + auto PathURI = URI::create(Path, URISchemes); + if (!PathURI) { + elog("Given ProximityPath {0} is can not be converted to any known URI " + "scheme. fuzzyFind request will ignore it.", + Path); + llvm::consumeError(PathURI.takeError()); + } + const auto PathProximityURIs = generateProximityURIs(PathURI->toString()); + for (const auto &ProximityURI : PathProximityURIs) + ParentURIs.insert(ProximityURI); + } + // Use SymbolRelevanceSignals for symbol relevance evaluation: use defaults + // for all parameters except for Proximity Path distance signal. + SymbolRelevanceSignals PathProximitySignals; + // DistanceCalculator will find the shortest distance from ProximityPaths to + // any URI extracted from the ProximityPaths. + URIDistance DistanceCalculator(Sources); + PathProximitySignals.FileProximityMatch = &DistanceCalculator; + // Try to build BOOST iterator for each Proximity Path provided by + // ProximityPaths. Boosting factor should depend on the distance to the + // Proximity Path: the closer processed path is, the higher boosting factor. + for (const auto &ParentURI : ParentURIs.keys()) { + const auto It = + InvertedIndex.find(Token(Token::Kind::ProximityURI, ParentURI)); + if (It != InvertedIndex.end()) { + // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator. + PathProximitySignals.SymbolURI = ParentURI; + BoostingIterators.push_back( + createBoost(create(It->second), PathProximitySignals.evaluate())); + } + } + return BoostingIterators; +} + } // namespace void DexIndex::buildIndex() { - for (const Symbol *Sym : Symbols) { + std::vector> ScoredSymbols(Symbols.size()); + + for (size_t I = 0; I < Symbols.size(); ++I) { + const Symbol *Sym = Symbols[I]; LookupTable[Sym->ID] = Sym; - SymbolQuality[Sym] = quality(*Sym); + ScoredSymbols[I] = {quality(*Sym), Sym}; } // Symbols are sorted by symbol qualities so that items in the posting lists // are stored in the descending order of symbol quality. - std::sort(begin(Symbols), end(Symbols), - [&](const Symbol *LHS, const Symbol *RHS) { - return SymbolQuality[LHS] > SymbolQuality[RHS]; - }); + std::sort(begin(ScoredSymbols), end(ScoredSymbols), + std::greater>()); + + // SymbolQuality was empty up until now. + SymbolQuality.resize(Symbols.size()); + // Populate internal storage using Symbol + Score pairs. + for (size_t I = 0; I < ScoredSymbols.size(); ++I) { + SymbolQuality[I] = ScoredSymbols[I].first; + Symbols[I] = ScoredSymbols[I].second; + } // Populate TempInvertedIndex with posting lists for index symbols. for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) { @@ -96,6 +158,17 @@ if (!ScopeIterators.empty()) TopLevelChildren.push_back(createOr(move(ScopeIterators))); + // Add proximity paths boosting. + auto BoostingIterators = createFileProximityIterators( + Req.ProximityPaths, URISchemes, InvertedIndex); + // Boosting iterators do not actually filter symbols. In order to preserve + // the validity of resulting query, TRUE iterator should be added along + // BOOSTs. + if (!BoostingIterators.empty()) { + BoostingIterators.push_back(createTrue(Symbols.size())); + TopLevelChildren.push_back(createOr(move(BoostingIterators))); + } + // Use TRUE iterator if both trigrams and scopes from the query are not // present in the symbol index. auto QueryIterator = TopLevelChildren.empty() @@ -106,32 +179,36 @@ // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as // using 100x of the requested number might not be good in practice, e.g. // when the requested number of items is small. - const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount; + const size_t ItemsToRetrieve = 100 * Req.MaxCandidateCount; auto Root = createLimit(move(QueryIterator), ItemsToRetrieve); - // FIXME(kbobyrev): Add boosting to the query and utilize retrieved - // boosting scores. - std::vector> SymbolDocIDs = consume(*Root); - - // Retrieve top Req.MaxCandidateCount items. - std::priority_queue> Top; - for (const auto &P : SymbolDocIDs) { - const DocID SymbolDocID = P.first; + + using IDAndScore = std::pair; + std::vector IDAndScores = consume(*Root); + + auto Compare = [](const IDAndScore &LHS, const IDAndScore &RHS) { + return LHS.second > RHS.second; + }; + TopN Top(Req.MaxCandidateCount, Compare); + for (const auto &IDAndScore : IDAndScores) { + const DocID SymbolDocID = IDAndScore.first; const auto *Sym = Symbols[SymbolDocID]; const llvm::Optional Score = Filter.match(Sym->Name); if (!Score) continue; - // Multiply score by a negative factor so that Top stores items with the - // highest actual score. - Top.emplace(-(*Score) * SymbolQuality.find(Sym)->second, Sym); - if (Top.size() > Req.MaxCandidateCount) { + // Combine Fuzzy Matching score, precomputed symbol quality and boosting + // score for a cumulative final symbol score. + const float FinalScore = + (*Score) * SymbolQuality[SymbolDocID] * IDAndScore.second; + // If Top.push(...) returns true, it means that it had to pop an item. In + // this case, it is possible to retrieve more symbols. + if (Top.push({SymbolDocID, FinalScore})) More = true; - Top.pop(); - } } - // Apply callback to the top Req.MaxCandidateCount items. - for (; !Top.empty(); Top.pop()) - Callback(*Top.top().second); + // Apply callback to the top Req.MaxCandidateCount items in the descending + // order of cumulative score. + for (const auto &Item : std::move(Top).items()) + Callback(*Symbols[Item.first]); return More; } @@ -161,6 +238,33 @@ return Bytes; } +std::vector generateProximityURIs(llvm::StringRef URIPath) { + std::vector Result; + auto ParsedURI = URI::parse(URIPath); + assert(ParsedURI && + "Non-empty argument of generateProximityURIs() should be a valid " + "URI."); + StringRef Body = ParsedURI->body(); + // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum + // size of resulting vector. Some projects might want to have higher limit if + // the file hierarchy is deeper. For the generic case, it would be useful to + // calculate Limit in the index build stage by calculating the maximum depth + // of the project source tree at runtime. + size_t Limit = 5; + // Insert original URI before the loop: this would save a redundant iteration + // with a URI parse. + Result.emplace_back(ParsedURI->toString()); + while (!Body.empty() && --Limit > 0) { + // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and + // could be optimized. + Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix); + URI TokenURI(ParsedURI->scheme(), ParsedURI->authority(), Body); + if (!Body.empty()) + Result.emplace_back(TokenURI.toString()); + } + return Result; +} + } // namespace dex } // namespace clangd } // namespace clang Index: clangd/index/dex/Token.h =================================================================== --- clangd/index/dex/Token.h +++ clangd/index/dex/Token.h @@ -41,6 +41,10 @@ /// Kind specifies Token type which defines semantics for the internal /// representation. Each Kind has different representation stored in Data /// field. + // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw + // strings. For example, PathURI store URIs of each directory and its parents, + // which induces a lot of overhead because these paths tend to be long and + // each parent directory is a prefix. enum class Kind { /// Represents trigram used for fuzzy search of unqualified symbol names. /// @@ -48,15 +52,20 @@ Trigram, /// Scope primitives, e.g. "symbol belongs to namespace foo::bar". /// - /// Data stroes full scope name , e.g. "foo::bar::baz::" or "" (for global + /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global /// scope). Scope, + /// Path Proximity URI to symbol declaration. + /// + /// Data stores path URI of symbol declaration file or its parent. + /// + /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h" + /// and some amount of its parents. + ProximityURI, /// Internal Token type for invalid/special tokens, e.g. empty tokens for /// llvm::DenseMap. Sentinel, /// FIXME(kbobyrev): Add other Token Kinds - /// * Path with full or relative path to the directory in which symbol is - /// defined /// * Type with qualified type name or its USR }; Index: clangd/tool/ClangdMain.cpp =================================================================== --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -275,8 +275,8 @@ // Load the index asynchronously. Meanwhile SwapIndex returns no results. SwapIndex *Placeholder; StaticIdx.reset(Placeholder = new SwapIndex(llvm::make_unique())); - runAsync([Placeholder] { - if (auto Idx = loadIndex(YamlSymbolFile)) + runAsync([Placeholder, &Opts] { + if (auto Idx = loadIndex(YamlSymbolFile, Opts.URISchemes, UseDex)) Placeholder->reset(std::move(Idx)); }); } Index: unittests/clangd/DexIndexTests.cpp =================================================================== --- unittests/clangd/DexIndexTests.cpp +++ unittests/clangd/DexIndexTests.cpp @@ -7,6 +7,8 @@ // //===----------------------------------------------------------------------===// +#include "FuzzyMatch.h" +#include "TestFS.h" #include "TestIndex.h" #include "index/Index.h" #include "index/Merge.h" @@ -30,6 +32,12 @@ namespace dex { namespace { +std::vector URISchemes = {"unittest"}; + +//===----------------------------------------------------------------------===// +// Query iterator tests. +//===----------------------------------------------------------------------===// + std::vector consumeIDs(Iterator &It) { auto IDAndScore = consume(It); std::vector IDs(IDAndScore.size()); @@ -325,15 +333,24 @@ EXPECT_THAT(ElementBoost, 3); } +//===----------------------------------------------------------------------===// +// Search token tests. +//===----------------------------------------------------------------------===// + testing::Matcher> -trigramsAre(std::initializer_list Trigrams) { +tokensAre(std::initializer_list Strings, Token::Kind Kind) { std::vector Tokens; - for (const auto &Symbols : Trigrams) { - Tokens.push_back(Token(Token::Kind::Trigram, Symbols)); + for (const auto &TokenData : Strings) { + Tokens.push_back(Token(Kind, TokenData)); } return testing::UnorderedElementsAreArray(Tokens); } +testing::Matcher> +trigramsAre(std::initializer_list Trigrams) { + return tokensAre(Trigrams, Token::Kind::Trigram); +} + TEST(DexIndexTrigrams, IdentifierTrigrams) { EXPECT_THAT(generateIdentifierTrigrams("X86"), trigramsAre({"x86", "x$$", "x8$"})); @@ -408,8 +425,25 @@ "hij", "ijk", "jkl", "klm"})); } +TEST(DexSearchTokens, SymbolPath) { + EXPECT_THAT(generateProximityURIs( + "unittest:///clang-tools-extra/clangd/index/Token.h"), + ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h", + "unittest:///clang-tools-extra/clangd/index", + "unittest:///clang-tools-extra/clangd", + "unittest:///clang-tools-extra", "unittest:///")); + + EXPECT_THAT(generateProximityURIs("unittest:///a/b/c.h"), + ElementsAre("unittest:///a/b/c.h", "unittest:///a/b", + "unittest:///a", "unittest:///")); +} + +//===----------------------------------------------------------------------===// +// Index tests. +//===----------------------------------------------------------------------===// + TEST(DexIndex, Lookup) { - auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"})); + auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes); EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), UnorderedElementsAre("ns::abc", "ns::xyz")); @@ -421,7 +455,8 @@ TEST(DexIndex, FuzzyFind) { auto Index = DexIndex::build( generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC", - "other::ABC", "other::A"})); + "other::ABC", "other::A"}), + URISchemes); FuzzyFindRequest Req; Req.Query = "ABC"; Req.Scopes = {"ns::"}; @@ -443,7 +478,8 @@ TEST(DexIndexTest, FuzzyMatchQ) { auto I = DexIndex::build( - generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"})); + generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}), + URISchemes); FuzzyFindRequest Req; Req.Query = "lol"; Req.MaxCandidateCount = 2; @@ -461,12 +497,12 @@ symbol("2") /* duplicate */}; FuzzyFindRequest Req; Req.Query = "2"; - DexIndex I(Symbols); + DexIndex I(Symbols, URISchemes); EXPECT_THAT(match(I, Req), ElementsAre("2", "2")); } TEST(DexIndexTest, DexIndexLimitedNumMatches) { - auto I = DexIndex::build(generateNumSymbols(0, 100)); + auto I = DexIndex::build(generateNumSymbols(0, 100), URISchemes); FuzzyFindRequest Req; Req.Query = "5"; Req.MaxCandidateCount = 3; @@ -478,7 +514,8 @@ TEST(DexIndexTest, FuzzyMatch) { auto I = DexIndex::build( - generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"})); + generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}), + URISchemes); FuzzyFindRequest Req; Req.Query = "lol"; Req.MaxCandidateCount = 2; @@ -487,14 +524,16 @@ } TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) { - auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"})); + auto I = + DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3")); } TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) { - auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"})); + auto I = + DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {""}; @@ -503,7 +542,7 @@ TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) { auto I = DexIndex::build( - generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"})); + generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::"}; @@ -512,7 +551,7 @@ TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) { auto I = DexIndex::build( - generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"})); + generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::", "b::"}; @@ -520,7 +559,7 @@ } TEST(DexIndexTest, NoMatchNestedScopes) { - auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"})); + auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::"}; @@ -528,7 +567,7 @@ } TEST(DexIndexTest, IgnoreCases) { - auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"})); + auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"}), URISchemes); FuzzyFindRequest Req; Req.Query = "AB"; Req.Scopes = {"ns::"}; @@ -536,7 +575,7 @@ } TEST(DexIndexTest, Lookup) { - auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"})); + auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes); EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), UnorderedElementsAre("ns::abc", "ns::xyz")); @@ -545,6 +584,31 @@ EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre()); } +TEST(DexIndexTest, ProximityPathsBoosting) { + auto RootSymbol = symbol("root::abc"); + RootSymbol.CanonicalDeclaration.FileURI = "unittest:///file.h"; + auto CloseSymbol = symbol("close::abc"); + CloseSymbol.CanonicalDeclaration.FileURI = "unittest:///a/b/c/d/e/f/file.h"; + + std::vector Symbols{CloseSymbol, RootSymbol}; + DexIndex I(Symbols, URISchemes); + + FuzzyFindRequest Req; + Req.Query = "abc"; + // The best candidate can change depending on the proximity paths. + Req.MaxCandidateCount = 1; + + // FuzzyFind request comes from the file which is far from the root: expect + // CloseSymbol to come out. + Req.ProximityPaths = {testPath("a/b/c/d/e/f/file.h")}; + EXPECT_THAT(match(I, Req), ElementsAre("close::abc")); + + // FuzzyFind request comes from the file which is close to the root: expect + // RootSymbol to come out. + Req.ProximityPaths = {testPath("file.h")}; + EXPECT_THAT(match(I, Req), ElementsAre("root::abc")); +} + } // namespace } // namespace dex } // namespace clangd