Index: clang-tools-extra/; =================================================================== --- /dev/null +++ clang-tools-extra/; @@ -0,0 +1,68 @@ +//===--- Token.cpp - Symbol Search primitive --------------------*- C++ -*-===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Token.h" +#include "URI.h" +#include "llvm/Support/Path.h" +#include + +namespace clang { +namespace clangd { +namespace dex { + +std::vector generateProximityPaths(llvm::StringRef URIPath, + size_t Limit) { + std::vector Result; + // Don't generate any tokens for empty paths. + if (URIPath.empty()) + return Result; + auto ParsedURI = URI::parse(URIPath); + assert( + static_cast(ParsedURI) && + "Non-empty argument of generateProximityPaths() should be a valid URI."); + const auto Scheme = ParsedURI.get().scheme(); + const auto Authority = ParsedURI.get().authority(); + StringRef Path = ParsedURI.get().body(); + // Get parent directory of the file with symbol declaration. + Path = llvm::sys::path::parent_path(Path); + while (!Path.empty() && Limit--) { + URI TokenURI(Scheme, Authority, Path); + Result.emplace_back(Token::Kind::PathURI, TokenURI.toString()); + Path = llvm::sys::path::parent_path(Path); + } + return Result; +} + +std::vector> +generateQueryProximityPaths(llvm::StringRef AbsolutePath, + llvm::ArrayRef URISchemes, + size_t Count) { + std::vector> Result; + for (const auto Scheme : URISchemes) { + unsigned Distance = 0; + auto URI = URI::create(AbsolutePath, Scheme); + // For some paths, conversion to different URI schemes is impossible. These + // should be just skipped. + if (!static_cast(URI)) { + // Ignore the error. + handleAllErrors(URI.takeError(), [](const llvm::ErrorInfoBase &) {}); + continue; + } + for (const auto ProximityPath : + generateProximityPaths(URI.get().toString(), Count)) + Result.emplace_back(ProximityPath, Distance++); + // As soon as one valid URI scheme for given absolute path is found, the + // token generation process should be stopped. + break; + } + return Result; +} + +} // namespace dex +} // namespace clangd +} // namespace clang Index: clang-tools-extra/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/CMakeLists.txt +++ clang-tools-extra/clangd/CMakeLists.txt @@ -46,6 +46,7 @@ index/dex/DexIndex.cpp index/dex/Iterator.cpp + index/dex/Token.cpp index/dex/Trigram.cpp LINK_LIBS Index: clang-tools-extra/clangd/index/dex/DexIndex.h =================================================================== --- clang-tools-extra/clangd/index/dex/DexIndex.h +++ clang-tools-extra/clangd/index/dex/DexIndex.h @@ -39,12 +39,21 @@ // index on disk and then load it if available. class DexIndex : public SymbolIndex { public: + /// URISchemes are used to convert proximity paths from FuzzyFindRequest to + /// the desired URIs. URISchemes should contain a list of schemes sorted by + /// the priority, because each proximity path from FuzzyFindRequest will be + /// converted to the first scheme from URISchemes list which is valid for that + /// path. + explicit DexIndex(llvm::ArrayRef URISchemes) + : URISchemes(URISchemes) {} + /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain /// accessible as long as `Symbols` is kept alive. - void build(std::shared_ptr> Syms); + void build(std::shared_ptr> Symbols); /// \brief Build index from a symbol slab. - static std::unique_ptr build(SymbolSlab Slab); + static std::unique_ptr + build(SymbolSlab Slab, llvm::ArrayRef URISchemes); bool fuzzyFind(const FuzzyFindRequest &Req, @@ -60,12 +69,11 @@ size_t estimateMemoryUsage() const override; private: - mutable std::mutex Mutex; std::shared_ptr> Symbols /*GUARDED_BY(Mutex)*/; llvm::DenseMap LookupTable /*GUARDED_BY(Mutex)*/; - llvm::DenseMap SymbolQuality /*GUARDED_BY(Mutex)*/; + std::vector SymbolQuality /*GUARDED_BY(Mutex)*/; // Inverted index is a mapping from the search token to the posting list, // which contains all items which can be characterized by such search token. // For example, if the search token is scope "std::", the corresponding @@ -73,10 +81,12 @@ // Inverted index is used to retrieve posting lists which are processed during // the fuzzyFind process. llvm::DenseMap InvertedIndex /*GUARDED_BY(Mutex)*/; + + const std::vector URISchemes; }; } // namespace dex } // namespace clangd } // namespace clang -#endif +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/DexIndex.cpp +++ clang-tools-extra/clangd/index/dex/DexIndex.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "DexIndex.h" +#include "../../FileDistance.h" #include "../../FuzzyMatch.h" #include "../../Logger.h" #include @@ -26,11 +27,13 @@ // Returns the tokens which are given symbols's characteristics. For example, // trigrams and scopes. // FIXME(kbobyrev): Support more token types: -// * Path proximity // * Types std::vector generateSearchTokens(const Symbol &Sym) { std::vector Result = generateIdentifierTrigrams(Sym.Name); Result.emplace_back(Token::Kind::Scope, Sym.Scope); + for (const auto &PathToken : + generateProximityPathURIs(Sym.CanonicalDeclaration.FileURI)) + Result.push_back(PathToken); return Result; } @@ -38,17 +41,17 @@ void DexIndex::build(std::shared_ptr> Syms) { llvm::DenseMap TempLookupTable; - llvm::DenseMap TempSymbolQuality; + llvm::DenseMap SymbolQualityLookup; for (const Symbol *Sym : *Syms) { TempLookupTable[Sym->ID] = Sym; - TempSymbolQuality[Sym] = quality(*Sym); + SymbolQualityLookup[Sym] = quality(*Sym); } // Symbols are sorted by symbol qualities so that items in the posting lists // are stored in the descending order of symbol quality. std::sort(begin(*Syms), end(*Syms), [&](const Symbol *LHS, const Symbol *RHS) { - return TempSymbolQuality[LHS] > TempSymbolQuality[RHS]; + return SymbolQualityLookup[LHS] > SymbolQualityLookup[RHS]; }); llvm::DenseMap TempInvertedIndex; // Populate TempInvertedIndex with posting lists for index symbols. @@ -58,6 +61,10 @@ TempInvertedIndex[Token].push_back(SymbolRank); } + std::vector TempSymbolQuality(Syms->size()); + for (size_t I = 0; I < Syms->size(); ++I) + TempSymbolQuality[I] = SymbolQualityLookup[(*Syms)[I]]; + { std::lock_guard Lock(Mutex); @@ -72,8 +79,9 @@ estimateMemoryUsage()); } -std::unique_ptr DexIndex::build(SymbolSlab Slab) { - auto Idx = llvm::make_unique(); +std::unique_ptr +DexIndex::build(SymbolSlab Slab, llvm::ArrayRef URISchemes) { + auto Idx = llvm::make_unique(URISchemes); Idx->build(getSymbolsFromSlab(std::move(Slab))); return std::move(Idx); } @@ -117,6 +125,33 @@ if (!ScopeIterators.empty()) TopLevelChildren.push_back(createOr(move(ScopeIterators))); + // Add proximity paths boosting. + std::vector> BoostingIterators; + // Try to build BOOST iterator for each Proximity Path provided by + // FuzzyFindRequest. Boosting factor should depend on the distance to the + // Proximity Path: the closer processed path is, the higher boosting factor. + for (const auto &ProximityPath : Req.ProximityPaths) { + const auto ProximityPathURIs = + generateQueryProximityPathURIs(ProximityPath, URISchemes); + for (size_t Distance = 0; Distance < ProximityPathURIs.size(); + ++Distance) { + const auto It = InvertedIndex.find(ProximityPathURIs[Distance]); + if (It != InvertedIndex.end()) { + // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator. + // Each parent directory is boosted by PARENTS_TO_BOOST + 1 - Level. + float BoostFactor = + std::exp(Distance * 0.4f / FileDistanceOptions().UpCost); + BoostingIterators.push_back( + createBoost(create(It->second), BoostFactor)); + } + } + } + // Boosting iterators do not actually filter symbols. In order to preserve + // the validity of resulting query, TRUE iterator should be added along + // BOOSTs. + BoostingIterators.push_back(createTrue(Symbols->size())); + TopLevelChildren.push_back(createOr(move(BoostingIterators))); + // Use TRUE iterator if both trigrams and scopes from the query are not // present in the symbol index. auto QueryIterator = TopLevelChildren.empty() @@ -127,23 +162,35 @@ // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as // using 100x of the requested number might not be good in practice, e.g. // when the requested number of items is small. - const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount; + const size_t ItemsToRetrieve = 100 * Req.MaxCandidateCount; auto Root = createLimit(move(QueryIterator), ItemsToRetrieve); - // FIXME(kbobyrev): Add boosting to the query and utilize retrieved - // boosting scores. + std::vector> SymbolDocIDs = consume(*Root); + // Sort items using a combination of boosting score and quality. + // FIXME(kbobyrev): Consider merging this stage with the next one? This is + // a performance/quality trade-off and if the performance is not an issue + // there's plenty of quality to be bought by sacrificing part of + // performance. + std::sort(begin(SymbolDocIDs), end(SymbolDocIDs), + [&](const std::pair &LHS, + const std::pair &RHS) { + return SymbolQuality[LHS.first] * LHS.second > + SymbolQuality[RHS.first] * RHS.second; + }); + // Retrieve top Req.MaxCandidateCount items. + const size_t ItemsToScore = 10 * Req.MaxCandidateCount; std::priority_queue> Top; - for (const auto &P : SymbolDocIDs) { - const DocID SymbolDocID = P.first; + for (size_t I = 0; I < std::min(SymbolDocIDs.size(), ItemsToScore); ++I) { + const DocID SymbolDocID = SymbolDocIDs[I].first; const auto *Sym = (*Symbols)[SymbolDocID]; const llvm::Optional Score = Filter.match(Sym->Name); if (!Score) continue; // Multiply score by a negative factor so that Top stores items with the // highest actual score. - Top.emplace(-(*Score) * SymbolQuality.find(Sym)->second, Sym); + Top.emplace(-(*Score) * SymbolQuality[SymbolDocID], Sym); if (Top.size() > Req.MaxCandidateCount) { More = true; Top.pop(); Index: clang-tools-extra/clangd/index/dex/Token.h =================================================================== --- clang-tools-extra/clangd/index/dex/Token.h +++ clang-tools-extra/clangd/index/dex/Token.h @@ -41,6 +41,10 @@ /// Kind specifies Token type which defines semantics for the internal /// representation. Each Kind has different representation stored in Data /// field. + // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw + // strings. For example, PathURI store URIs of each directory and its parents, + // which induces a lot of overhead because these paths tend to be long and + // each parent directory is a prefix. enum class Kind { /// Represents trigram used for fuzzy search of unqualified symbol names. /// @@ -48,15 +52,21 @@ Trigram, /// Scope primitives, e.g. "symbol belongs to namespace foo::bar". /// - /// Data stroes full scope name , e.g. "foo::bar::baz::" or "" (for global + /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global /// scope). Scope, + /// Path URI to symbol declaration. + /// + /// Data stores path URI of file and each parent directory of symbol + /// declaration location. + /// + /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h" + /// and all its parents. + PathURI, /// Internal Token type for invalid/special tokens, e.g. empty tokens for /// llvm::DenseMap. Sentinel, /// FIXME(kbobyrev): Add other Token Kinds - /// * Path with full or relative path to the directory in which symbol is - /// defined /// * Type with qualified type name or its USR }; @@ -81,6 +91,16 @@ } }; +/// Returns Search Token for a number of parent directories of given Path. +/// Should be used within the index build process. +std::vector generateProximityPathURIs(llvm::StringRef Path); + +/// Returns Search Token and its distance from the absolute path for the first +/// URI scheme allows valid conversions from AbsolutePath. +std::vector +generateQueryProximityPathURIs(llvm::StringRef AbsolutePath, + llvm::ArrayRef URISchemes); + } // namespace dex } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/index/dex/Token.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/index/dex/Token.cpp @@ -0,0 +1,70 @@ +//===--- Token.cpp - Symbol Search primitive --------------------*- C++ -*-===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Token.h" +#include "URI.h" +#include "llvm/Support/Path.h" +#include + +namespace clang { +namespace clangd { +namespace dex { + +std::vector generateProximityPathURIs(llvm::StringRef URIPath) { + std::vector Result; + // Don't generate any tokens for empty paths. + if (URIPath.empty()) + return Result; + auto ParsedURI = URI::parse(URIPath); + assert(static_cast(ParsedURI) && + "Non-empty argument of generateProximityPathURIs() should be a valid " + "URI."); + const auto Scheme = ParsedURI->scheme(); + const auto Authority = ParsedURI->authority(); + StringRef Path = ParsedURI->body(); + // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum + // size of resulting vector. Some projects might want to have higher limit if + // the file hierarchy is deeper. For the generic case, it would be useful to + // calculate Limit in the index build stage by calculating the maximum depth + // of the project source tree at runtime. + size_t Limit = 5; + while (!Path.empty() && Limit--) { + // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and + // could be optimized. + URI TokenURI(Scheme, Authority, Path); + Result.emplace_back(Token::Kind::PathURI, TokenURI.toString()); + Path = llvm::sys::path::parent_path(Path, llvm::sys::path::Style::posix); + } + return Result; +} + +std::vector +generateQueryProximityPathURIs(llvm::StringRef AbsolutePath, + llvm::ArrayRef URISchemes) { + std::vector Result; + for (const auto Scheme : URISchemes) { + auto URI = URI::create(AbsolutePath, Scheme); + // For some paths, conversion to different URI schemes is impossible. These + // should be just skipped. + if (!static_cast(URI)) { + // Ignore the error. + llvm::consumeError(URI.takeError()); + continue; + } + for (const auto ProximityPath : generateProximityPathURIs(URI->toString())) + Result.emplace_back(ProximityPath); + // As soon as one valid URI scheme for given absolute path is found, the + // token generation process should be stopped. + break; + } + return Result; +} + +} // namespace dex +} // namespace clangd +} // namespace clang Index: clang-tools-extra/clangd/tool/ClangdMain.cpp =================================================================== --- clang-tools-extra/clangd/tool/ClangdMain.cpp +++ clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -42,7 +42,9 @@ // Build an in-memory static index for global symbols from a YAML-format file. // The size of global symbols should be relatively small, so that all symbols // can be managed in memory. -std::unique_ptr buildStaticIndex(llvm::StringRef YamlSymbolFile) { +std::unique_ptr +buildStaticIndex(llvm::StringRef YamlSymbolFile, + std::vector &URISchemes) { auto Buffer = llvm::MemoryBuffer::getFile(YamlSymbolFile); if (!Buffer) { llvm::errs() << "Can't open " << YamlSymbolFile << "\n"; @@ -53,9 +55,10 @@ for (auto Sym : Slab) SymsBuilder.insert(Sym); - return UseDex ? dex::DexIndex::build(std::move(SymsBuilder).build()) - : MemIndex::build(std::move(SymsBuilder).build(), - SymbolOccurrenceSlab::createEmpty()); + return UseDex + ? dex::DexIndex::build(std::move(SymsBuilder).build(), URISchemes) + : MemIndex::build(std::move(SymsBuilder).build(), + SymbolOccurrenceSlab::createEmpty()); } } // namespace @@ -299,7 +302,7 @@ Opts.BuildDynamicSymbolIndex = EnableIndex; std::unique_ptr StaticIdx; if (EnableIndex && !YamlSymbolFile.empty()) { - StaticIdx = buildStaticIndex(YamlSymbolFile); + StaticIdx = buildStaticIndex(YamlSymbolFile, Opts.URISchemes); Opts.StaticIndex = StaticIdx.get(); } Opts.AsyncThreadsCount = WorkerThreadsCount; Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp =================================================================== --- clang-tools-extra/unittests/clangd/DexIndexTests.cpp +++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp @@ -7,6 +7,8 @@ // //===----------------------------------------------------------------------===// +#include "FuzzyMatch.h" +#include "TestFS.h" #include "TestIndex.h" #include "index/Index.h" #include "index/Merge.h" @@ -29,6 +31,8 @@ namespace dex { namespace { +std::vector URISchemes = {"unittest"}; + std::vector consumeIDs(Iterator &It) { auto IDAndScore = consume(It); std::vector IDs(IDAndScore.size()); @@ -325,14 +329,33 @@ } testing::Matcher> -trigramsAre(std::initializer_list Trigrams) { +tokensAre(std::initializer_list Strings, Token::Kind Kind) { std::vector Tokens; - for (const auto &Symbols : Trigrams) { - Tokens.push_back(Token(Token::Kind::Trigram, Symbols)); + for (const auto &TokenData : Strings) { + Tokens.push_back(Token(Kind, TokenData)); } return testing::UnorderedElementsAreArray(Tokens); } +testing::Matcher> +trigramsAre(std::initializer_list Trigrams) { + return tokensAre(Trigrams, Token::Kind::Trigram); +} + +testing::Matcher> +pathsAre(std::initializer_list Paths) { + return tokensAre(Paths, Token::Kind::PathURI); +} + +testing::Matcher> +proximityPathsAre(std::initializer_list ProximityPaths) { + std::vector Result; + for (const auto &Path : ProximityPaths) { + Result.emplace_back(Token::Kind::PathURI, Path); + } + return testing::UnorderedElementsAreArray(Result); +} + TEST(DexIndexTrigrams, IdentifierTrigrams) { EXPECT_THAT(generateIdentifierTrigrams("X86"), trigramsAre({"x86", "x$$", "x8$"})); @@ -407,8 +430,27 @@ "hij", "ijk", "jkl", "klm"})); } +TEST(DexSearchTokens, SymbolPath) { + EXPECT_THAT(generateProximityPathURIs( + "unittest:///clang-tools-extra/clangd/index/Token.h"), + pathsAre({"unittest:///clang-tools-extra/clangd/index/Token.h", + "unittest:///clang-tools-extra/clangd/index", + "unittest:///clang-tools-extra/clangd", + "unittest:///clang-tools-extra", "unittest:///"})); +} + +TEST(DexSearchTokens, QueryProximityDistances) { + EXPECT_THAT(generateQueryProximityPathURIs(testRoot() + "/a/b/c/d/e/f/g.h", + URISchemes), + proximityPathsAre({{"unittest:///a/b/c/d/e/f/g.h"}, + {"unittest:///a/b/c/d/e/f"}, + {"unittest:///a/b/c/d/e"}, + {"unittest:///a/b/c/d"}, + {"unittest:///a/b/c"}})); +} + TEST(DexIndex, Lookup) { - DexIndex I; + DexIndex I(URISchemes); I.build(generateSymbols({"ns::abc", "ns::xyz"})); EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), @@ -419,7 +461,7 @@ } TEST(DexIndex, FuzzyFind) { - DexIndex Index; + DexIndex Index(URISchemes); Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC", "other::ABC", "other::A"})); FuzzyFindRequest Req; @@ -442,7 +484,7 @@ } TEST(DexIndexTest, FuzzyMatchQ) { - DexIndex I; + DexIndex I(URISchemes); I.build( generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"})); FuzzyFindRequest Req; @@ -453,7 +495,7 @@ } TEST(DexIndexTest, DexIndexSymbolsRecycled) { - DexIndex I; + DexIndex I(URISchemes); std::weak_ptr Symbols; I.build(generateNumSymbols(0, 10, &Symbols)); FuzzyFindRequest Req; @@ -482,14 +524,14 @@ FuzzyFindRequest Req; Req.Query = "7"; - DexIndex I; + DexIndex I(URISchemes); I.build(std::move(Symbols)); auto Matches = match(I, Req); EXPECT_EQ(Matches.size(), 4u); } TEST(DexIndexTest, DexIndexLimitedNumMatches) { - DexIndex I; + DexIndex I(URISchemes); I.build(generateNumSymbols(0, 100)); FuzzyFindRequest Req; Req.Query = "5"; @@ -501,7 +543,7 @@ } TEST(DexIndexTest, FuzzyMatch) { - DexIndex I; + DexIndex I(URISchemes); I.build( generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"})); FuzzyFindRequest Req; @@ -512,7 +554,7 @@ } TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) { - DexIndex I; + DexIndex I(URISchemes); I.build(generateSymbols({"a::y1", "b::y2", "y3"})); FuzzyFindRequest Req; Req.Query = "y"; @@ -520,7 +562,7 @@ } TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) { - DexIndex I; + DexIndex I(URISchemes); I.build(generateSymbols({"a::y1", "b::y2", "y3"})); FuzzyFindRequest Req; Req.Query = "y"; @@ -529,7 +571,7 @@ } TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) { - DexIndex I; + DexIndex I(URISchemes); I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"})); FuzzyFindRequest Req; Req.Query = "y"; @@ -538,7 +580,7 @@ } TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) { - DexIndex I; + DexIndex I(URISchemes); I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"})); FuzzyFindRequest Req; Req.Query = "y"; @@ -547,7 +589,7 @@ } TEST(DexIndexTest, NoMatchNestedScopes) { - DexIndex I; + DexIndex I(URISchemes); I.build(generateSymbols({"a::y1", "a::b::y2"})); FuzzyFindRequest Req; Req.Query = "y"; @@ -556,7 +598,7 @@ } TEST(DexIndexTest, IgnoreCases) { - DexIndex I; + DexIndex I(URISchemes); I.build(generateSymbols({"ns::ABC", "ns::abc"})); FuzzyFindRequest Req; Req.Query = "AB"; @@ -565,7 +607,7 @@ } TEST(DexIndexTest, Lookup) { - DexIndex I; + DexIndex I(URISchemes); I.build(generateSymbols({"ns::abc", "ns::xyz"})); EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), @@ -575,6 +617,58 @@ EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre()); } +TEST(DexIndexTest, ProximityPathsBoosting) { + DexIndex I(URISchemes); + + // All strings should be stored in local variables to ensure their lifetime is + // the same as Slab's. + auto RootURI = URI::create(testRoot() + "/file.h", "unittest"); + assert(static_cast(RootURI)); + auto CloseFileURI = URI::create(testRoot() + "/a/b/c/d/e/file.h", "unittest"); + assert(static_cast(CloseFileURI)); + + // Populate Symbol Slab + std::vector Slab; + // Symbol in the root will perfectly match the request, but its canonical + // declaration is in the root directory. + Symbol RootSymbol; + std::string RootSymbolName = "abc"; + RootSymbol.Name = RootSymbolName; + std::string RootFilename = RootURI->toString(); + RootSymbol.CanonicalDeclaration.FileURI = RootFilename; + Slab.push_back(&RootSymbol); + + // Another symbol has much lower Fuzzy Matching Score, but it is very close to + // the directory the request would come from. + Symbol CloseSymbol; + std::string CloseSymbolName = "AbsolutelyBizzareCoincidenceMatch"; + CloseSymbol.Name = CloseSymbolName; + std::string CloseFileName = CloseFileURI->toString(); + CloseSymbol.CanonicalDeclaration.FileURI = CloseFileName; + + FuzzyFindRequest Req; + Req.Query = "abc"; + Req.MaxCandidateCount = 1; + // FuzzyFind request comes from the file which is far from the root. + Req.ProximityPaths.push_back(testRoot() + "/a/b/c/d/e/f/file.h"); + + FuzzyMatcher Matcher(Req.Query); + EXPECT_TRUE(Matcher.match(RootSymbolName) > Matcher.match(CloseSymbolName)); + + // Insert duplicates so that only the boosted symbols end up in the scoring + // stage. + // FIXME(kbobyrev): 10 is the number of items passed to the last stage which + // involves fuzzy scoring. Would having access to that information be better + // here? + for (size_t Index = 0; Index < 10; ++Index) + Slab.push_back(&CloseSymbol); + + I.build(std::make_shared>(Slab)); + + EXPECT_THAT(match(I, Req), + UnorderedElementsAre("AbsolutelyBizzareCoincidenceMatch")); +} + } // namespace } // namespace dex } // namespace clangd Index: clang-tools-extra/unittests/clangd/TestFS.h =================================================================== --- clang-tools-extra/unittests/clangd/TestFS.h +++ clang-tools-extra/unittests/clangd/TestFS.h @@ -59,7 +59,7 @@ }; // Returns an absolute (fake) test directory for this OS. -const char *testRoot(); +std::string testRoot(); // Returns a suitable absolute path for this OS. std::string testPath(PathRef File); Index: clang-tools-extra/unittests/clangd/TestFS.cpp =================================================================== --- clang-tools-extra/unittests/clangd/TestFS.cpp +++ clang-tools-extra/unittests/clangd/TestFS.cpp @@ -64,7 +64,7 @@ FileName, std::move(CommandLine), "")}; } -const char *testRoot() { +std::string testRoot() { #ifdef _WIN32 return "C:\\clangd-test"; #else