Index: clang-tools-extra/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/CMakeLists.txt +++ clang-tools-extra/clangd/CMakeLists.txt @@ -46,6 +46,7 @@ index/dex/DexIndex.cpp index/dex/Iterator.cpp + index/dex/Token.cpp index/dex/Trigram.cpp LINK_LIBS Index: clang-tools-extra/clangd/Quality.h =================================================================== --- clang-tools-extra/clangd/Quality.h +++ clang-tools-extra/clangd/Quality.h @@ -125,6 +125,10 @@ /// Combine symbol quality and relevance into a single score. float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); +/// Returns proximity score given distance between a parent and a child +/// directories URIs. +float parentProximityScore(unsigned Distance); + /// TopN is a lossy container that preserves only the "best" N elements. template > class TopN { public: Index: clang-tools-extra/clangd/Quality.cpp =================================================================== --- clang-tools-extra/clangd/Quality.cpp +++ clang-tools-extra/clangd/Quality.cpp @@ -296,13 +296,17 @@ NeedsFixIts = !SemaCCResult.FixIts.empty(); } +float parentProximityScore(unsigned Distance) { + return std::exp(Distance * -.4f / FileDistanceOptions().UpCost); +} + static std::pair proximityScore(llvm::StringRef SymbolURI, URIDistance *D) { if (!D || SymbolURI.empty()) return {0.f, 0u}; unsigned Distance = D->distance(SymbolURI); // Assume approximately default options are used for sensible scoring. - return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance}; + return {parentProximityScore(Distance), Distance}; } float SymbolRelevanceSignals::evaluate() const { Index: clang-tools-extra/clangd/index/dex/DexIndex.h =================================================================== --- clang-tools-extra/clangd/index/dex/DexIndex.h +++ clang-tools-extra/clangd/index/dex/DexIndex.h @@ -39,22 +39,26 @@ class DexIndex : public SymbolIndex { public: // All symbols must outlive this index. - template DexIndex(Range &&Symbols) { + template + DexIndex(Range &&Symbols, llvm::ArrayRef URISchemes) + : URISchemes(URISchemes) { for (auto &&Sym : Symbols) this->Symbols.push_back(&Sym); buildIndex(); } // Symbols are owned by BackingData, Index takes ownership. template - DexIndex(Range &&Symbols, Payload &&BackingData) - : DexIndex(std::forward(Symbols)) { + DexIndex(Range &&Symbols, Payload &&BackingData, + llvm::ArrayRef URISchemes) + : DexIndex(std::forward(Symbols), URISchemes) { KeepAlive = std::shared_ptr( std::make_shared(std::move(BackingData)), nullptr); } /// Builds an index from a slab. The index takes ownership of the slab. - static std::unique_ptr build(SymbolSlab Slab) { - return llvm::make_unique(Slab, std::move(Slab)); + static std::unique_ptr + build(SymbolSlab Slab, llvm::ArrayRef URISchemes) { + return llvm::make_unique(Slab, std::move(Slab), URISchemes); } bool @@ -73,21 +77,25 @@ private: void buildIndex(); + /// Stores symbols in the descending order using symbol quality as the key. std::vector Symbols; + /// Maps DocID to the quality of underlying symbol. + std::vector SymbolQuality; llvm::DenseMap LookupTable; - llvm::DenseMap SymbolQuality; - // Inverted index is a mapping from the search token to the posting list, - // which contains all items which can be characterized by such search token. - // For example, if the search token is scope "std::", the corresponding - // posting list would contain all indices of symbols defined in namespace std. - // Inverted index is used to retrieve posting lists which are processed during - // the fuzzyFind process. + /// Inverted index is a mapping from the search token to the posting list, + /// which contains all items which can be characterized by such search token. + /// For example, if the search token is scope "std::", the corresponding + /// posting list would contain all indices of symbols defined in namespace + /// std. Inverted index is used to retrieve posting lists which are processed + /// during the fuzzyFind process. llvm::DenseMap InvertedIndex; std::shared_ptr KeepAlive; // poor man's move-only std::any + + const std::vector URISchemes; }; } // namespace dex } // namespace clangd } // namespace clang -#endif +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/DexIndex.cpp +++ clang-tools-extra/clangd/index/dex/DexIndex.cpp @@ -8,8 +8,10 @@ //===----------------------------------------------------------------------===// #include "DexIndex.h" +#include "../../FileDistance.h" #include "../../FuzzyMatch.h" #include "../../Logger.h" +#include "../../Quality.h" #include #include @@ -31,23 +33,44 @@ std::vector generateSearchTokens(const Symbol &Sym) { std::vector Result = generateIdentifierTrigrams(Sym.Name); Result.emplace_back(Token::Kind::Scope, Sym.Scope); + for (const auto &ProximityToken : + generateProximityURIs(Sym.CanonicalDeclaration.FileURI)) + Result.emplace_back(ProximityToken); return Result; } +struct SymbolAndScore { + const Symbol *Sym; + float Score; +}; + +bool operator>(const SymbolAndScore &LHS, const SymbolAndScore &RHS) { + return LHS.Score > RHS.Score; +} + } // namespace void DexIndex::buildIndex() { - for (const Symbol *Sym : Symbols) { + std::vector SymbolAndScores(Symbols.size()); + + for (size_t I = 0; I < Symbols.size(); ++I) { + const Symbol *Sym = Symbols[I]; LookupTable[Sym->ID] = Sym; - SymbolQuality[Sym] = quality(*Sym); + SymbolAndScores[I] = {Sym, static_cast(quality(*Sym))}; } // Symbols are sorted by symbol qualities so that items in the posting lists // are stored in the descending order of symbol quality. - std::sort(begin(Symbols), end(Symbols), - [&](const Symbol *LHS, const Symbol *RHS) { - return SymbolQuality[LHS] > SymbolQuality[RHS]; - }); + std::sort(begin(SymbolAndScores), end(SymbolAndScores), + std::greater()); + + // SymbolQuality was empty up until now. + SymbolQuality.resize(Symbols.size()); + // Populate internal storage using Symbol + Score pairs. + for (size_t I = 0; I < SymbolAndScores.size(); ++I) { + Symbols[I] = SymbolAndScores[I].Sym; + SymbolQuality[I] = SymbolAndScores[I].Score; + } // Populate TempInvertedIndex with posting lists for index symbols. for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) { @@ -96,6 +119,34 @@ if (!ScopeIterators.empty()) TopLevelChildren.push_back(createOr(move(ScopeIterators))); + // Add proximity paths boosting. + std::vector> BoostingIterators; + // Try to build BOOST iterator for each Proximity Path provided by + // FuzzyFindRequest. Boosting factor should depend on the distance to the + // Proximity Path: the closer processed path is, the higher boosting factor. + for (const auto &ProximityPath : Req.ProximityPaths) { + const auto ProximityURIs = + generateQueryProximityURIs(ProximityPath, URISchemes); + for (size_t Distance = 0; Distance < ProximityURIs.size(); ++Distance) { + const auto It = InvertedIndex.find(ProximityURIs[Distance]); + if (It != InvertedIndex.end()) { + // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator. + // parentProximityScore() scales score within [0, 1] range so that it is + // aligned with final Quality measurements. + float BoostFactor = 1 + parentProximityScore(Distance); + BoostingIterators.push_back( + createBoost(create(It->second), BoostFactor)); + } + } + } + // Boosting iterators do not actually filter symbols. In order to preserve + // the validity of resulting query, TRUE iterator should be added along + // BOOSTs. + if (!BoostingIterators.empty()) { + BoostingIterators.push_back(createTrue(Symbols.size())); + TopLevelChildren.push_back(createOr(move(BoostingIterators))); + } + // Use TRUE iterator if both trigrams and scopes from the query are not // present in the symbol index. auto QueryIterator = TopLevelChildren.empty() @@ -106,32 +157,43 @@ // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as // using 100x of the requested number might not be good in practice, e.g. // when the requested number of items is small. - const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount; + const size_t ItemsToRetrieve = 100 * Req.MaxCandidateCount; auto Root = createLimit(move(QueryIterator), ItemsToRetrieve); - // FIXME(kbobyrev): Add boosting to the query and utilize retrieved - // boosting scores. - std::vector> SymbolDocIDs = consume(*Root); + + std::vector IDAndScores = consume(*Root); + + // Multiply scores by the quality once to use the cumulative score in both + // subsequent filtering stages. + for (size_t I = 0; I < IDAndScores.size(); ++I) + IDAndScores[I].Score *= SymbolQuality[IDAndScores[I].ID]; + + // Sort items using a combination of boosting score and quality. + // FIXME(kbobyrev): Consider merging this stage with the next one? This is + // a performance/quality trade-off and if the performance is not an issue + // there's plenty of quality to be bought by sacrificing part of + // performance. + std::sort(begin(IDAndScores), end(IDAndScores), + std::greater()); // Retrieve top Req.MaxCandidateCount items. - std::priority_queue> Top; - for (const auto &P : SymbolDocIDs) { - const DocID SymbolDocID = P.first; + const size_t ItemsToScore = 10 * Req.MaxCandidateCount; + TopN Top(Req.MaxCandidateCount); + for (size_t I = 0; I < std::min(IDAndScores.size(), ItemsToScore); ++I) { + const DocID SymbolDocID = IDAndScores[I].ID; const auto *Sym = Symbols[SymbolDocID]; const llvm::Optional Score = Filter.match(Sym->Name); if (!Score) continue; - // Multiply score by a negative factor so that Top stores items with the - // highest actual score. - Top.emplace(-(*Score) * SymbolQuality.find(Sym)->second, Sym); - if (Top.size() > Req.MaxCandidateCount) { + // If Top.push(...) returns true, it means that it had to pop an item. In + // this case, it is possible to retrieve more symbols. + if (Top.push({SymbolDocID, (*Score) * SymbolQuality[SymbolDocID]})) More = true; - Top.pop(); - } } - // Apply callback to the top Req.MaxCandidateCount items. - for (; !Top.empty(); Top.pop()) - Callback(*Top.top().second); + // Apply callback to the top Req.MaxCandidateCount items in the descending + // order of cumulative score. + for (const auto &Item : std::move(Top).items()) + Callback(*Symbols[Item.ID]); return More; } Index: clang-tools-extra/clangd/index/dex/Iterator.h =================================================================== --- clang-tools-extra/clangd/index/dex/Iterator.h +++ clang-tools-extra/clangd/index/dex/Iterator.h @@ -121,6 +121,13 @@ virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0; }; +struct DocIDAndScore { + DocID ID; + float Score; +}; + +bool operator>(const DocIDAndScore &LHS, const DocIDAndScore &RHS); + /// Advances the iterator until it is exhausted. Returns pairs of document IDs /// with the corresponding boosting score. /// @@ -129,7 +136,7 @@ /// and not retrieving enough items so that items with very high final score /// would not be processed. Boosting score is a computationally efficient way /// to acquire preliminary scores of requested items. -std::vector> consume(Iterator &It); +std::vector consume(Iterator &It); /// Returns a document iterator over given PostingList. /// Index: clang-tools-extra/clangd/index/dex/Iterator.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/Iterator.cpp +++ clang-tools-extra/clangd/index/dex/Iterator.cpp @@ -389,10 +389,14 @@ } // end namespace -std::vector> consume(Iterator &It) { - std::vector> Result; +bool operator>(const DocIDAndScore &LHS, const DocIDAndScore &RHS) { + return LHS.Score > RHS.Score; +} + +std::vector consume(Iterator &It) { + std::vector Result; for (; !It.reachedEnd(); It.advance()) - Result.emplace_back(It.peek(), It.consume()); + Result.push_back({It.peek(), It.consume()}); return Result; } Index: clang-tools-extra/clangd/index/dex/Token.h =================================================================== --- clang-tools-extra/clangd/index/dex/Token.h +++ clang-tools-extra/clangd/index/dex/Token.h @@ -41,6 +41,10 @@ /// Kind specifies Token type which defines semantics for the internal /// representation. Each Kind has different representation stored in Data /// field. + // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw + // strings. For example, PathURI store URIs of each directory and its parents, + // which induces a lot of overhead because these paths tend to be long and + // each parent directory is a prefix. enum class Kind { /// Represents trigram used for fuzzy search of unqualified symbol names. /// @@ -48,15 +52,20 @@ Trigram, /// Scope primitives, e.g. "symbol belongs to namespace foo::bar". /// - /// Data stroes full scope name , e.g. "foo::bar::baz::" or "" (for global + /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global /// scope). Scope, + /// Path Proximity URI to symbol declaration. + /// + /// Data stores path URI of symbol declaration file or its parent. + /// + /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h" + /// and some amount of its parents. + ProximityURI, /// Internal Token type for invalid/special tokens, e.g. empty tokens for /// llvm::DenseMap. Sentinel, /// FIXME(kbobyrev): Add other Token Kinds - /// * Path with full or relative path to the directory in which symbol is - /// defined /// * Type with qualified type name or its USR }; @@ -81,6 +90,16 @@ } }; +/// Returns Search Token for a number of parent directories of given Path. +/// Should be used within the index build process. +std::vector generateProximityURIs(llvm::StringRef Path); + +/// Returns Search Token and its distance from the absolute path for the first +/// URI scheme allows valid conversions from AbsolutePath. +std::vector +generateQueryProximityURIs(llvm::StringRef AbsolutePath, + llvm::ArrayRef URISchemes); + } // namespace dex } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/index/dex/Token.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/index/dex/Token.cpp @@ -0,0 +1,73 @@ +//===--- Token.cpp - Symbol Search primitive --------------------*- C++ -*-===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Token.h" +#include "URI.h" +#include "llvm/Support/Path.h" +#include + +namespace clang { +namespace clangd { +namespace dex { + +std::vector generateProximityURIs(llvm::StringRef URIPath) { + std::vector Result; + // Don't generate any tokens for empty paths. + if (URIPath.empty()) + return Result; + auto ParsedURI = URI::parse(URIPath); + assert(ParsedURI && + "Non-empty argument of generateProximityURIs() should be a valid " + "URI."); + const auto Scheme = ParsedURI->scheme(); + const auto Authority = ParsedURI->authority(); + StringRef Path = ParsedURI->body(); + // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum + // size of resulting vector. Some projects might want to have higher limit if + // the file hierarchy is deeper. For the generic case, it would be useful to + // calculate Limit in the index build stage by calculating the maximum depth + // of the project source tree at runtime. + size_t Limit = 5; + // Insert original URI before the loop: this would save a redundant iteration + // with a URI parse. + Result.emplace_back(Token::Kind::ProximityURI, ParsedURI->toString()); + while (!Path.empty() && --Limit) { + // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and + // could be optimized. + Path = llvm::sys::path::parent_path(Path, llvm::sys::path::Style::posix); + URI TokenURI(Scheme, Authority, Path); + Result.emplace_back(Token::Kind::ProximityURI, TokenURI.toString()); + } + return Result; +} + +std::vector +generateQueryProximityURIs(llvm::StringRef AbsolutePath, + llvm::ArrayRef URISchemes) { + std::vector Result; + for (const auto Scheme : URISchemes) { + auto URI = URI::create(AbsolutePath, Scheme); + // For some paths, conversion to different URI schemes is impossible. These + // should be just skipped. + if (!URI) { + // Ignore the error. + llvm::consumeError(URI.takeError()); + continue; + } + for (const auto ProximityPath : generateProximityURIs(URI->toString())) + Result.emplace_back(ProximityPath); + // As soon as one valid URI scheme for given absolute path is found, the + // token generation process should be stopped. + break; + } + return Result; +} + +} // namespace dex +} // namespace clangd +} // namespace clang Index: clang-tools-extra/clangd/tool/ClangdMain.cpp =================================================================== --- clang-tools-extra/clangd/tool/ClangdMain.cpp +++ clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -42,7 +42,9 @@ // Build an in-memory static index for global symbols from a YAML-format file. // The size of global symbols should be relatively small, so that all symbols // can be managed in memory. -std::unique_ptr buildStaticIndex(llvm::StringRef YamlSymbolFile) { +std::unique_ptr +buildStaticIndex(llvm::StringRef YamlSymbolFile, + llvm::ArrayRef URISchemes) { auto Buffer = llvm::MemoryBuffer::getFile(YamlSymbolFile); if (!Buffer) { llvm::errs() << "Can't open " << YamlSymbolFile << "\n"; @@ -53,9 +55,10 @@ for (auto Sym : Slab) SymsBuilder.insert(Sym); - return UseDex ? dex::DexIndex::build(std::move(SymsBuilder).build()) - : MemIndex::build(std::move(SymsBuilder).build(), - SymbolOccurrenceSlab::createEmpty()); + return UseDex + ? dex::DexIndex::build(std::move(SymsBuilder).build(), URISchemes) + : MemIndex::build(std::move(SymsBuilder).build(), + SymbolOccurrenceSlab::createEmpty()); } } // namespace @@ -299,7 +302,7 @@ Opts.BuildDynamicSymbolIndex = EnableIndex; std::unique_ptr StaticIdx; if (EnableIndex && !YamlSymbolFile.empty()) { - StaticIdx = buildStaticIndex(YamlSymbolFile); + StaticIdx = buildStaticIndex(YamlSymbolFile, Opts.URISchemes); Opts.StaticIndex = StaticIdx.get(); } Opts.AsyncThreadsCount = WorkerThreadsCount; Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp =================================================================== --- clang-tools-extra/unittests/clangd/DexIndexTests.cpp +++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp @@ -7,6 +7,8 @@ // //===----------------------------------------------------------------------===// +#include "FuzzyMatch.h" +#include "TestFS.h" #include "TestIndex.h" #include "index/Index.h" #include "index/Merge.h" @@ -30,11 +32,17 @@ namespace dex { namespace { +std::vector URISchemes = {"unittest"}; + +//===----------------------------------------------------------------------===// +// Query iterator tests. +//===----------------------------------------------------------------------===// + std::vector consumeIDs(Iterator &It) { auto IDAndScore = consume(It); std::vector IDs(IDAndScore.size()); for (size_t I = 0; I < IDAndScore.size(); ++I) - IDs[I] = IDAndScore[I].first; + IDs[I] = IDAndScore[I].ID; return IDs; } @@ -325,15 +333,38 @@ EXPECT_THAT(ElementBoost, 3); } +//===----------------------------------------------------------------------===// +// Search token tests. +//===----------------------------------------------------------------------===// + testing::Matcher> -trigramsAre(std::initializer_list Trigrams) { +tokensAre(std::initializer_list Strings, Token::Kind Kind) { std::vector Tokens; - for (const auto &Symbols : Trigrams) { - Tokens.push_back(Token(Token::Kind::Trigram, Symbols)); + for (const auto &TokenData : Strings) { + Tokens.push_back(Token(Kind, TokenData)); } return testing::UnorderedElementsAreArray(Tokens); } +testing::Matcher> +trigramsAre(std::initializer_list Trigrams) { + return tokensAre(Trigrams, Token::Kind::Trigram); +} + +testing::Matcher> +pathsAre(std::initializer_list Paths) { + return tokensAre(Paths, Token::Kind::ProximityURI); +} + +testing::Matcher> +proximityPathsAre(std::initializer_list ProximityPaths) { + std::vector Result; + for (const auto &Path : ProximityPaths) { + Result.emplace_back(Token::Kind::ProximityURI, Path); + } + return testing::UnorderedElementsAreArray(Result); +} + TEST(DexIndexTrigrams, IdentifierTrigrams) { EXPECT_THAT(generateIdentifierTrigrams("X86"), trigramsAre({"x86", "x$$", "x8$"})); @@ -408,8 +439,32 @@ "hij", "ijk", "jkl", "klm"})); } +TEST(DexSearchTokens, SymbolPath) { + EXPECT_THAT(generateProximityURIs( + "unittest:///clang-tools-extra/clangd/index/Token.h"), + pathsAre({"unittest:///clang-tools-extra/clangd/index/Token.h", + "unittest:///clang-tools-extra/clangd/index", + "unittest:///clang-tools-extra/clangd", + "unittest:///clang-tools-extra", "unittest:///"})); +} + +TEST(DexSearchTokens, QueryProximityDistances) { + llvm::SmallString<30> Path(testRoot()); + llvm::sys::path::append(Path, "/a/b/c/d/e/f/g.h"); + EXPECT_THAT(generateQueryProximityURIs(Path, URISchemes), + proximityPathsAre({{"unittest:///a/b/c/d/e/f/g.h"}, + {"unittest:///a/b/c/d/e/f"}, + {"unittest:///a/b/c/d/e"}, + {"unittest:///a/b/c/d"}, + {"unittest:///a/b/c"}})); +} + +//===----------------------------------------------------------------------===// +// Index tests. +//===----------------------------------------------------------------------===// + TEST(DexIndex, Lookup) { - auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"})); + auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes); EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), UnorderedElementsAre("ns::abc", "ns::xyz")); @@ -421,7 +476,8 @@ TEST(DexIndex, FuzzyFind) { auto Index = DexIndex::build( generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC", - "other::ABC", "other::A"})); + "other::ABC", "other::A"}), + URISchemes); FuzzyFindRequest Req; Req.Query = "ABC"; Req.Scopes = {"ns::"}; @@ -443,7 +499,8 @@ TEST(DexIndexTest, FuzzyMatchQ) { auto I = DexIndex::build( - generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"})); + generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}), + URISchemes); FuzzyFindRequest Req; Req.Query = "lol"; Req.MaxCandidateCount = 2; @@ -461,12 +518,12 @@ symbol("2") /* duplicate */}; FuzzyFindRequest Req; Req.Query = "2"; - DexIndex I(Symbols); + DexIndex I(Symbols, URISchemes); EXPECT_THAT(match(I, Req), ElementsAre("2", "2")); } TEST(DexIndexTest, DexIndexLimitedNumMatches) { - auto I = DexIndex::build(generateNumSymbols(0, 100)); + auto I = DexIndex::build(generateNumSymbols(0, 100), URISchemes); FuzzyFindRequest Req; Req.Query = "5"; Req.MaxCandidateCount = 3; @@ -478,7 +535,8 @@ TEST(DexIndexTest, FuzzyMatch) { auto I = DexIndex::build( - generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"})); + generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}), + URISchemes); FuzzyFindRequest Req; Req.Query = "lol"; Req.MaxCandidateCount = 2; @@ -487,14 +545,16 @@ } TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) { - auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"})); + auto I = + DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3")); } TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) { - auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"})); + auto I = + DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {""}; @@ -503,7 +563,7 @@ TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) { auto I = DexIndex::build( - generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"})); + generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::"}; @@ -512,7 +572,7 @@ TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) { auto I = DexIndex::build( - generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"})); + generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::", "b::"}; @@ -520,7 +580,7 @@ } TEST(DexIndexTest, NoMatchNestedScopes) { - auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"})); + auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::"}; @@ -528,7 +588,7 @@ } TEST(DexIndexTest, IgnoreCases) { - auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"})); + auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"}), URISchemes); FuzzyFindRequest Req; Req.Query = "AB"; Req.Scopes = {"ns::"}; @@ -536,7 +596,7 @@ } TEST(DexIndexTest, Lookup) { - auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"})); + auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes); EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), UnorderedElementsAre("ns::abc", "ns::xyz")); @@ -545,6 +605,62 @@ EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre()); } +TEST(DexIndexTest, ProximityPathsBoosting) { + // All strings should be stored in local variables to ensure their lifetime is + // the same as Slab's. + std::string Name = "abc"; + + // Populate Symbol Slab: generate two symbols with the same characteristics + // and hence quality, but different CanonicalDeclaration file. + SymbolSlab::Builder Builder; + + Symbol RootSymbol; + RootSymbol.Name = Name; + llvm::SmallString<30> RootPath(testRoot()); + llvm::sys::path::append(RootPath, "file.h"); + auto RootURI = URI::create(RootPath, "unittest"); + assert(RootURI); + std::string RootFilename = RootURI->toString(); + RootSymbol.CanonicalDeclaration.FileURI = RootFilename; + RootSymbol.ID = SymbolID("RootSymbol"); + Builder.insert(RootSymbol); + + Symbol CloseSymbol; + CloseSymbol.Name = Name; + llvm::SmallString<30> ClosePath(testRoot()); + llvm::sys::path::append(ClosePath, "/a/b/c/d/e/f/file.h"); + auto CloseFileURI = URI::create(ClosePath, "unittest"); + assert(CloseFileURI); + std::string CloseFileName = CloseFileURI->toString(); + CloseSymbol.CanonicalDeclaration.FileURI = CloseFileName; + CloseSymbol.ID = SymbolID("CloseSymbol"); + Builder.insert(CloseSymbol); + + auto I = DexIndex::build(std::move(Builder).build(), URISchemes); + + FuzzyFindRequest Req; + Req.Query = "abc"; + // Return only one element, because the order of callback calls is not + // specified: items with lower score can be matched first. Therefore, to check + // whether the necessary item has higher score, test should only retrieve one + // element (with highest cumulative score). + Req.MaxCandidateCount = 1; + + // FuzzyFind request comes from the file which is far from the root: expect + // CloseSymbol to come out. + llvm::SmallString<30> RequestPath(testRoot()); + llvm::sys::path::append(RequestPath, "/a/b/c/d/e/f/file.h"); + Req.ProximityPaths = {RequestPath.str()}; + EXPECT_THAT(matchDeclURIs(*I, Req), ElementsAre(CloseFileURI->toString())); + + // FuzzyFind request comes from the file which is close to the root: expect + // RootSymbol to come out. + RequestPath = testRoot(); + llvm::sys::path::append(RequestPath, "file.h"); + Req.ProximityPaths = {RequestPath.str()}; + EXPECT_THAT(matchDeclURIs(*I, Req), ElementsAre(RootURI->toString())); +} + } // namespace } // namespace dex } // namespace clangd Index: clang-tools-extra/unittests/clangd/TestIndex.h =================================================================== --- clang-tools-extra/unittests/clangd/TestIndex.h +++ clang-tools-extra/unittests/clangd/TestIndex.h @@ -39,6 +39,13 @@ const FuzzyFindRequest &Req, bool *Incomplete = nullptr); +// Performs fuzzy matching-based symbol lookup given a query and an index. +// Incomplete is set true if more items than requested can be retrieved, false +// otherwise. Returns filename URIs of matched symbols canonical declarations. +std::vector matchDeclURIs(const SymbolIndex &I, + const FuzzyFindRequest &Req, + bool *Incomplete = nullptr); + // Returns qualified names of symbols with any of IDs in the index. std::vector lookup(const SymbolIndex &I, llvm::ArrayRef IDs); Index: clang-tools-extra/unittests/clangd/TestIndex.cpp =================================================================== --- clang-tools-extra/unittests/clangd/TestIndex.cpp +++ clang-tools-extra/unittests/clangd/TestIndex.cpp @@ -55,6 +55,18 @@ return Matches; } +std::vector matchDeclURIs(const SymbolIndex &I, + const FuzzyFindRequest &Req, + bool *Incomplete) { + std::vector Matches; + bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) { + Matches.push_back(Sym.CanonicalDeclaration.FileURI); + }); + if (Incomplete) + *Incomplete = IsIncomplete; + return Matches; +} + // Returns qualified names of symbols with any of IDs in the index. std::vector lookup(const SymbolIndex &I, llvm::ArrayRef IDs) {