Index: clang-tools-extra/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/CMakeLists.txt +++ clang-tools-extra/clangd/CMakeLists.txt @@ -48,6 +48,7 @@ index/dex/DexIndex.cpp index/dex/Iterator.cpp + index/dex/Token.cpp index/dex/Trigram.cpp LINK_LIBS Index: clang-tools-extra/clangd/FileDistance.h =================================================================== --- clang-tools-extra/clangd/FileDistance.h +++ clang-tools-extra/clangd/FileDistance.h @@ -37,6 +37,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H + #include "URI.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" @@ -107,3 +110,5 @@ } // namespace clangd } // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H Index: clang-tools-extra/clangd/Quality.cpp =================================================================== --- clang-tools-extra/clangd/Quality.cpp +++ clang-tools-extra/clangd/Quality.cpp @@ -6,6 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// + #include "Quality.h" #include "FileDistance.h" #include "URI.h" @@ -302,7 +303,7 @@ return {0.f, 0u}; unsigned Distance = D->distance(SymbolURI); // Assume approximately default options are used for sensible scoring. - return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance}; + return {std::exp(Distance * -.4f / FileDistanceOptions().UpCost), Distance}; } float SymbolRelevanceSignals::evaluate() const { Index: clang-tools-extra/clangd/index/SymbolYAML.h =================================================================== --- clang-tools-extra/clangd/index/SymbolYAML.h +++ clang-tools-extra/clangd/index/SymbolYAML.h @@ -45,6 +45,7 @@ // The size of global symbols should be relatively small, so that all symbols // can be managed in memory. std::unique_ptr loadIndex(llvm::StringRef SymbolFilename, + llvm::ArrayRef URISchemes, bool UseDex = true); } // namespace clangd Index: clang-tools-extra/clangd/index/SymbolYAML.cpp =================================================================== --- clang-tools-extra/clangd/index/SymbolYAML.cpp +++ clang-tools-extra/clangd/index/SymbolYAML.cpp @@ -185,6 +185,7 @@ } std::unique_ptr loadIndex(llvm::StringRef SymbolFilename, + llvm::ArrayRef URISchemes, bool UseDex) { trace::Span OverallTracer("LoadIndex"); auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); @@ -209,7 +210,7 @@ if (!Slab) return nullptr; trace::Span Tracer("BuildIndex"); - return UseDex ? dex::DexIndex::build(std::move(*Slab)) + return UseDex ? dex::DexIndex::build(std::move(*Slab), URISchemes) : MemIndex::build(std::move(*Slab), RefSlab()); } Index: clang-tools-extra/clangd/index/dex/DexIndex.h =================================================================== --- clang-tools-extra/clangd/index/dex/DexIndex.h +++ clang-tools-extra/clangd/index/dex/DexIndex.h @@ -39,22 +39,26 @@ class DexIndex : public SymbolIndex { public: // All symbols must outlive this index. - template DexIndex(Range &&Symbols) { + template + DexIndex(Range &&Symbols, llvm::ArrayRef URISchemes) + : URISchemes(URISchemes) { for (auto &&Sym : Symbols) this->Symbols.push_back(&Sym); buildIndex(); } // Symbols are owned by BackingData, Index takes ownership. template - DexIndex(Range &&Symbols, Payload &&BackingData) - : DexIndex(std::forward(Symbols)) { + DexIndex(Range &&Symbols, Payload &&BackingData, + llvm::ArrayRef URISchemes) + : DexIndex(std::forward(Symbols), URISchemes) { KeepAlive = std::shared_ptr( std::make_shared(std::move(BackingData)), nullptr); } /// Builds an index from a slab. The index takes ownership of the slab. - static std::unique_ptr build(SymbolSlab Slab) { - return llvm::make_unique(Slab, std::move(Slab)); + static std::unique_ptr + build(SymbolSlab Slab, llvm::ArrayRef URISchemes) { + return llvm::make_unique(Slab, std::move(Slab), URISchemes); } bool @@ -72,21 +76,25 @@ private: void buildIndex(); + /// Stores symbols sorted in the descending order of symbol quality.. std::vector Symbols; + /// SymbolQuality[I] is the quality of Symbols[I]. + std::vector SymbolQuality; llvm::DenseMap LookupTable; - llvm::DenseMap SymbolQuality; - // Inverted index is a mapping from the search token to the posting list, - // which contains all items which can be characterized by such search token. - // For example, if the search token is scope "std::", the corresponding - // posting list would contain all indices of symbols defined in namespace std. - // Inverted index is used to retrieve posting lists which are processed during - // the fuzzyFind process. + /// Inverted index is a mapping from the search token to the posting list, + /// which contains all items which can be characterized by such search token. + /// For example, if the search token is scope "std::", the corresponding + /// posting list would contain all indices of symbols defined in namespace + /// std. Inverted index is used to retrieve posting lists which are processed + /// during the fuzzyFind process. llvm::DenseMap InvertedIndex; std::shared_ptr KeepAlive; // poor man's move-only std::any + + const std::vector URISchemes; }; } // namespace dex } // namespace clangd } // namespace clang -#endif +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/DexIndex.cpp +++ clang-tools-extra/clangd/index/dex/DexIndex.cpp @@ -8,8 +8,10 @@ //===----------------------------------------------------------------------===// #include "DexIndex.h" +#include "../../FileDistance.h" #include "../../FuzzyMatch.h" #include "../../Logger.h" +#include "../../Quality.h" #include #include @@ -26,28 +28,39 @@ // Returns the tokens which are given symbols's characteristics. For example, // trigrams and scopes. // FIXME(kbobyrev): Support more token types: -// * Path proximity // * Types +// * Namespace proximity std::vector generateSearchTokens(const Symbol &Sym) { std::vector Result = generateIdentifierTrigrams(Sym.Name); Result.emplace_back(Token::Kind::Scope, Sym.Scope); + for (const auto &ProximityToken : + generatePathProximityTokens(Sym.CanonicalDeclaration.FileURI)) + Result.emplace_back(ProximityToken); return Result; } } // namespace void DexIndex::buildIndex() { - for (const Symbol *Sym : Symbols) { + std::vector> ScoredSymbols(Symbols.size()); + + for (size_t I = 0; I < Symbols.size(); ++I) { + const Symbol *Sym = Symbols[I]; LookupTable[Sym->ID] = Sym; - SymbolQuality[Sym] = quality(*Sym); + ScoredSymbols[I] = {static_cast(quality(*Sym)), Sym}; } // Symbols are sorted by symbol qualities so that items in the posting lists // are stored in the descending order of symbol quality. - std::sort(begin(Symbols), end(Symbols), - [&](const Symbol *LHS, const Symbol *RHS) { - return SymbolQuality[LHS] > SymbolQuality[RHS]; - }); + std::sort(begin(ScoredSymbols), end(ScoredSymbols)); + + // SymbolQuality was empty up until now. + SymbolQuality.resize(Symbols.size()); + // Populate internal storage using Symbol + Score pairs. + for (size_t I = 0; I < ScoredSymbols.size(); ++I) { + SymbolQuality[I] = ScoredSymbols[I].first; + Symbols[I] = ScoredSymbols[I].second; + } // Populate TempInvertedIndex with posting lists for index symbols. for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) { @@ -96,6 +109,40 @@ if (!ScopeIterators.empty()) TopLevelChildren.push_back(createOr(move(ScopeIterators))); + // Add proximity paths boosting. + std::vector> BoostingIterators; + // Try to build BOOST iterator for each Proximity Path provided by + // FuzzyFindRequest. Boosting factor should depend on the distance to the + // Proximity Path: the closer processed path is, the higher boosting factor. + SymbolRelevanceSignals PathProximitySignals; + for (const auto &ProximityPath : Req.ProximityPaths) { + const auto PathProximityTokens = + generateQueryPathProximityTokens(ProximityPath, URISchemes); + llvm::StringMap URIToParams; + // Use default distance parameters for the first URI in ProximityURIs, which + // corresponds to the query path URI. + URIToParams[PathProximityTokens.front().Data] = SourceParams(); + URIDistance DistanceCalculator(URIToParams); + PathProximitySignals.FileProximityMatch = &DistanceCalculator; + for (const auto &ProximityToken : PathProximityTokens) { + const auto It = InvertedIndex.find(ProximityToken); + if (It != InvertedIndex.end()) { + // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator. + PathProximitySignals.SymbolURI = ProximityToken.Data; + float BoostFactor = 1 + PathProximitySignals.evaluate(); + BoostingIterators.push_back( + createBoost(create(It->second), BoostFactor)); + } + } + } + // Boosting iterators do not actually filter symbols. In order to preserve + // the validity of resulting query, TRUE iterator should be added along + // BOOSTs. + if (!BoostingIterators.empty()) { + BoostingIterators.push_back(createTrue(Symbols.size())); + TopLevelChildren.push_back(createOr(move(BoostingIterators))); + } + // Use TRUE iterator if both trigrams and scopes from the query are not // present in the symbol index. auto QueryIterator = TopLevelChildren.empty() @@ -106,32 +153,37 @@ // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as // using 100x of the requested number might not be good in practice, e.g. // when the requested number of items is small. - const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount; + const size_t ItemsToRetrieve = 100 * Req.MaxCandidateCount; auto Root = createLimit(move(QueryIterator), ItemsToRetrieve); - // FIXME(kbobyrev): Add boosting to the query and utilize retrieved - // boosting scores. - std::vector> SymbolDocIDs = consume(*Root); - - // Retrieve top Req.MaxCandidateCount items. - std::priority_queue> Top; - for (const auto &P : SymbolDocIDs) { - const DocID SymbolDocID = P.first; + + std::vector> IDAndScores = consume(*Root); + + auto Compare = [](const std::pair &LHS, + const std::pair &RHS) { + return LHS.second > RHS.second; + }; + TopN, decltype(Compare)> Top(Req.MaxCandidateCount, + Compare); + for (const auto &IDAndScore : IDAndScores) { + const DocID SymbolDocID = IDAndScore.first; const auto *Sym = Symbols[SymbolDocID]; const llvm::Optional Score = Filter.match(Sym->Name); if (!Score) continue; - // Multiply score by a negative factor so that Top stores items with the - // highest actual score. - Top.emplace(-(*Score) * SymbolQuality.find(Sym)->second, Sym); - if (Top.size() > Req.MaxCandidateCount) { + // Combine Fuzzy Matching score, precomputed symbol quality and boosting + // score for a cumulative final symbol score. + const float FinalScore = + (*Score) * SymbolQuality[SymbolDocID] * IDAndScore.second; + // If Top.push(...) returns true, it means that it had to pop an item. In + // this case, it is possible to retrieve more symbols. + if (Top.push({SymbolDocID, FinalScore})) More = true; - Top.pop(); - } } - // Apply callback to the top Req.MaxCandidateCount items. - for (; !Top.empty(); Top.pop()) - Callback(*Top.top().second); + // Apply callback to the top Req.MaxCandidateCount items in the descending + // order of cumulative score. + for (const auto &Item : std::move(Top).items()) + Callback(*Symbols[Item.first]); return More; } Index: clang-tools-extra/clangd/index/dex/Iterator.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/Iterator.cpp +++ clang-tools-extra/clangd/index/dex/Iterator.cpp @@ -392,7 +392,7 @@ std::vector> consume(Iterator &It) { std::vector> Result; for (; !It.reachedEnd(); It.advance()) - Result.emplace_back(It.peek(), It.consume()); + Result.push_back({It.peek(), It.consume()}); return Result; } Index: clang-tools-extra/clangd/index/dex/Token.h =================================================================== --- clang-tools-extra/clangd/index/dex/Token.h +++ clang-tools-extra/clangd/index/dex/Token.h @@ -41,6 +41,10 @@ /// Kind specifies Token type which defines semantics for the internal /// representation. Each Kind has different representation stored in Data /// field. + // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw + // strings. For example, PathURI store URIs of each directory and its parents, + // which induces a lot of overhead because these paths tend to be long and + // each parent directory is a prefix. enum class Kind { /// Represents trigram used for fuzzy search of unqualified symbol names. /// @@ -48,15 +52,20 @@ Trigram, /// Scope primitives, e.g. "symbol belongs to namespace foo::bar". /// - /// Data stroes full scope name , e.g. "foo::bar::baz::" or "" (for global + /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global /// scope). Scope, + /// Path Proximity URI to symbol declaration. + /// + /// Data stores path URI of symbol declaration file or its parent. + /// + /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h" + /// and some amount of its parents. + ProximityURI, /// Internal Token type for invalid/special tokens, e.g. empty tokens for /// llvm::DenseMap. Sentinel, /// FIXME(kbobyrev): Add other Token Kinds - /// * Path with full or relative path to the directory in which symbol is - /// defined /// * Type with qualified type name or its USR }; @@ -81,6 +90,17 @@ } }; +/// Returns Search Token for a number of parent directories of given Path. +/// Should be used within the index build process. +std::vector generatePathProximityTokens(llvm::StringRef Path); + +/// Returns Search Token and its distance from the absolute path for the first +/// URI scheme allows valid conversions from AbsolutePath. The first token of +/// result is always the AbsolutePath converted to a valid scheme. +std::vector +generateQueryPathProximityTokens(llvm::StringRef AbsolutePath, + llvm::ArrayRef URISchemes); + } // namespace dex } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/index/dex/Token.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/index/dex/Token.cpp @@ -0,0 +1,74 @@ +//===--- Token.cpp - Symbol Search primitive --------------------*- C++ -*-===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Token.h" +#include "URI.h" +#include "llvm/Support/Path.h" +#include + +namespace clang { +namespace clangd { +namespace dex { + +std::vector generatePathProximityTokens(llvm::StringRef URIPath) { + std::vector Result; + // Don't generate any tokens for empty paths. + if (URIPath.empty()) + return Result; + auto ParsedURI = URI::parse(URIPath); + assert(ParsedURI && + "Non-empty argument of generateProximityURIs() should be a valid " + "URI."); + const auto Scheme = ParsedURI->scheme(); + const auto Authority = ParsedURI->authority(); + StringRef Path = ParsedURI->body(); + // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum + // size of resulting vector. Some projects might want to have higher limit if + // the file hierarchy is deeper. For the generic case, it would be useful to + // calculate Limit in the index build stage by calculating the maximum depth + // of the project source tree at runtime. + size_t Limit = 5; + // Insert original URI before the loop: this would save a redundant iteration + // with a URI parse. + Result.emplace_back(Token::Kind::ProximityURI, ParsedURI->toString()); + while (!Path.empty() && --Limit > 0) { + // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and + // could be optimized. + Path = llvm::sys::path::parent_path(Path, llvm::sys::path::Style::posix); + URI TokenURI(Scheme, Authority, Path); + Result.emplace_back(Token::Kind::ProximityURI, TokenURI.toString()); + } + return Result; +} + +std::vector +generateQueryPathProximityTokens(llvm::StringRef AbsolutePath, + llvm::ArrayRef URISchemes) { + std::vector Result; + for (const auto Scheme : URISchemes) { + auto URI = URI::create(AbsolutePath, Scheme); + // For some paths, conversion to different URI schemes is impossible. These + // should be just skipped. + if (!URI) { + // Ignore the error. + llvm::consumeError(URI.takeError()); + continue; + } + for (const auto ProximityPath : + generatePathProximityTokens(URI->toString())) + Result.emplace_back(ProximityPath); + // As soon as one valid URI scheme for given absolute path is found, the + // token generation process should be stopped. + break; + } + return Result; +} + +} // namespace dex +} // namespace clangd +} // namespace clang Index: clang-tools-extra/clangd/tool/ClangdMain.cpp =================================================================== --- clang-tools-extra/clangd/tool/ClangdMain.cpp +++ clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -284,8 +284,8 @@ // Load the index asynchronously. Meanwhile SwapIndex returns no results. SwapIndex *Placeholder; StaticIdx.reset(Placeholder = new SwapIndex(llvm::make_unique())); - runAsync([Placeholder] { - if (auto Idx = loadIndex(YamlSymbolFile)) + runAsync([Placeholder, Opts] { + if (auto Idx = loadIndex(YamlSymbolFile, Opts.URISchemes, UseDex)) Placeholder->reset(std::move(Idx)); }); } Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp =================================================================== --- clang-tools-extra/unittests/clangd/DexIndexTests.cpp +++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp @@ -7,6 +7,8 @@ // //===----------------------------------------------------------------------===// +#include "FuzzyMatch.h" +#include "TestFS.h" #include "TestIndex.h" #include "index/Index.h" #include "index/Merge.h" @@ -30,6 +32,12 @@ namespace dex { namespace { +std::vector URISchemes = {"unittest"}; + +//===----------------------------------------------------------------------===// +// Query iterator tests. +//===----------------------------------------------------------------------===// + std::vector consumeIDs(Iterator &It) { auto IDAndScore = consume(It); std::vector IDs(IDAndScore.size()); @@ -325,15 +333,38 @@ EXPECT_THAT(ElementBoost, 3); } +//===----------------------------------------------------------------------===// +// Search token tests. +//===----------------------------------------------------------------------===// + testing::Matcher> -trigramsAre(std::initializer_list Trigrams) { +tokensAre(std::initializer_list Strings, Token::Kind Kind) { std::vector Tokens; - for (const auto &Symbols : Trigrams) { - Tokens.push_back(Token(Token::Kind::Trigram, Symbols)); + for (const auto &TokenData : Strings) { + Tokens.push_back(Token(Kind, TokenData)); } return testing::UnorderedElementsAreArray(Tokens); } +testing::Matcher> +trigramsAre(std::initializer_list Trigrams) { + return tokensAre(Trigrams, Token::Kind::Trigram); +} + +testing::Matcher> +pathsAre(std::initializer_list Paths) { + return tokensAre(Paths, Token::Kind::ProximityURI); +} + +testing::Matcher> +proximityPathsAre(std::initializer_list ProximityPaths) { + std::vector Result; + for (const auto &Path : ProximityPaths) { + Result.emplace_back(Token::Kind::ProximityURI, Path); + } + return testing::UnorderedElementsAreArray(Result); +} + TEST(DexIndexTrigrams, IdentifierTrigrams) { EXPECT_THAT(generateIdentifierTrigrams("X86"), trigramsAre({"x86", "x$$", "x8$"})); @@ -408,8 +439,32 @@ "hij", "ijk", "jkl", "klm"})); } +TEST(DexSearchTokens, SymbolPath) { + EXPECT_THAT(generatePathProximityTokens( + "unittest:///clang-tools-extra/clangd/index/Token.h"), + pathsAre({"unittest:///clang-tools-extra/clangd/index/Token.h", + "unittest:///clang-tools-extra/clangd/index", + "unittest:///clang-tools-extra/clangd", + "unittest:///clang-tools-extra", "unittest:///"})); +} + +TEST(DexSearchTokens, QueryProximityDistances) { + llvm::SmallString<30> Path(testRoot()); + llvm::sys::path::append(Path, "/a/b/c/d/e/f/g.h"); + EXPECT_THAT(generateQueryPathProximityTokens(Path, URISchemes), + proximityPathsAre({{"unittest:///a/b/c/d/e/f/g.h"}, + {"unittest:///a/b/c/d/e/f"}, + {"unittest:///a/b/c/d/e"}, + {"unittest:///a/b/c/d"}, + {"unittest:///a/b/c"}})); +} + +//===----------------------------------------------------------------------===// +// Index tests. +//===----------------------------------------------------------------------===// + TEST(DexIndex, Lookup) { - auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"})); + auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes); EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), UnorderedElementsAre("ns::abc", "ns::xyz")); @@ -421,7 +476,8 @@ TEST(DexIndex, FuzzyFind) { auto Index = DexIndex::build( generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC", - "other::ABC", "other::A"})); + "other::ABC", "other::A"}), + URISchemes); FuzzyFindRequest Req; Req.Query = "ABC"; Req.Scopes = {"ns::"}; @@ -443,7 +499,8 @@ TEST(DexIndexTest, FuzzyMatchQ) { auto I = DexIndex::build( - generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"})); + generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}), + URISchemes); FuzzyFindRequest Req; Req.Query = "lol"; Req.MaxCandidateCount = 2; @@ -461,12 +518,12 @@ symbol("2") /* duplicate */}; FuzzyFindRequest Req; Req.Query = "2"; - DexIndex I(Symbols); + DexIndex I(Symbols, URISchemes); EXPECT_THAT(match(I, Req), ElementsAre("2", "2")); } TEST(DexIndexTest, DexIndexLimitedNumMatches) { - auto I = DexIndex::build(generateNumSymbols(0, 100)); + auto I = DexIndex::build(generateNumSymbols(0, 100), URISchemes); FuzzyFindRequest Req; Req.Query = "5"; Req.MaxCandidateCount = 3; @@ -478,7 +535,8 @@ TEST(DexIndexTest, FuzzyMatch) { auto I = DexIndex::build( - generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"})); + generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}), + URISchemes); FuzzyFindRequest Req; Req.Query = "lol"; Req.MaxCandidateCount = 2; @@ -487,14 +545,16 @@ } TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) { - auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"})); + auto I = + DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3")); } TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) { - auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"})); + auto I = + DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {""}; @@ -503,7 +563,7 @@ TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) { auto I = DexIndex::build( - generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"})); + generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::"}; @@ -512,7 +572,7 @@ TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) { auto I = DexIndex::build( - generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"})); + generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::", "b::"}; @@ -520,7 +580,7 @@ } TEST(DexIndexTest, NoMatchNestedScopes) { - auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"})); + auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"}), URISchemes); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a::"}; @@ -528,7 +588,7 @@ } TEST(DexIndexTest, IgnoreCases) { - auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"})); + auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"}), URISchemes); FuzzyFindRequest Req; Req.Query = "AB"; Req.Scopes = {"ns::"}; @@ -536,7 +596,7 @@ } TEST(DexIndexTest, Lookup) { - auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"})); + auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes); EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc")); EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}), UnorderedElementsAre("ns::abc", "ns::xyz")); @@ -545,6 +605,62 @@ EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre()); } +TEST(DexIndexTest, ProximityPathsBoosting) { + // All strings should be stored in local variables to ensure their lifetime is + // the same as Slab's. + std::string Name = "abc"; + + // Populate Symbol Slab: generate two symbols with the same characteristics + // and hence quality, but different CanonicalDeclaration file. + SymbolSlab::Builder Builder; + + Symbol RootSymbol; + RootSymbol.Name = Name; + llvm::SmallString<30> RootPath(testRoot()); + llvm::sys::path::append(RootPath, "file.h"); + auto RootURI = URI::create(RootPath, "unittest"); + assert(RootURI); + std::string RootFilename = RootURI->toString(); + RootSymbol.CanonicalDeclaration.FileURI = RootFilename; + RootSymbol.ID = SymbolID("RootSymbol"); + Builder.insert(RootSymbol); + + Symbol CloseSymbol; + CloseSymbol.Name = Name; + llvm::SmallString<30> ClosePath(testRoot()); + llvm::sys::path::append(ClosePath, "/a/b/c/d/e/f/file.h"); + auto CloseFileURI = URI::create(ClosePath, "unittest"); + assert(CloseFileURI); + std::string CloseFileName = CloseFileURI->toString(); + CloseSymbol.CanonicalDeclaration.FileURI = CloseFileName; + CloseSymbol.ID = SymbolID("CloseSymbol"); + Builder.insert(CloseSymbol); + + auto I = DexIndex::build(std::move(Builder).build(), URISchemes); + + FuzzyFindRequest Req; + Req.Query = "abc"; + // Return only one element, because the order of callback calls is not + // specified: items with lower score can be matched first. Therefore, to check + // whether the necessary item has higher score, test should only retrieve one + // element (with highest cumulative score). + Req.MaxCandidateCount = 1; + + // FuzzyFind request comes from the file which is far from the root: expect + // CloseSymbol to come out. + llvm::SmallString<30> RequestPath(testRoot()); + llvm::sys::path::append(RequestPath, "/a/b/c/d/e/f/file.h"); + Req.ProximityPaths = {RequestPath.str()}; + EXPECT_THAT(matchDeclURIs(*I, Req), ElementsAre(CloseFileURI->toString())); + + // FuzzyFind request comes from the file which is close to the root: expect + // RootSymbol to come out. + RequestPath = testRoot(); + llvm::sys::path::append(RequestPath, "file.h"); + Req.ProximityPaths = {RequestPath.str()}; + EXPECT_THAT(matchDeclURIs(*I, Req), ElementsAre(RootURI->toString())); +} + } // namespace } // namespace dex } // namespace clangd Index: clang-tools-extra/unittests/clangd/TestIndex.h =================================================================== --- clang-tools-extra/unittests/clangd/TestIndex.h +++ clang-tools-extra/unittests/clangd/TestIndex.h @@ -39,6 +39,13 @@ const FuzzyFindRequest &Req, bool *Incomplete = nullptr); +// Performs fuzzy matching-based symbol lookup given a query and an index. +// Incomplete is set true if more items than requested can be retrieved, false +// otherwise. Returns filename URIs of matched symbols canonical declarations. +std::vector matchDeclURIs(const SymbolIndex &I, + const FuzzyFindRequest &Req, + bool *Incomplete = nullptr); + // Returns qualified names of symbols with any of IDs in the index. std::vector lookup(const SymbolIndex &I, llvm::ArrayRef IDs); Index: clang-tools-extra/unittests/clangd/TestIndex.cpp =================================================================== --- clang-tools-extra/unittests/clangd/TestIndex.cpp +++ clang-tools-extra/unittests/clangd/TestIndex.cpp @@ -55,6 +55,18 @@ return Matches; } +std::vector matchDeclURIs(const SymbolIndex &I, + const FuzzyFindRequest &Req, + bool *Incomplete) { + std::vector Matches; + bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) { + Matches.push_back(Sym.CanonicalDeclaration.FileURI); + }); + if (Incomplete) + *Incomplete = IsIncomplete; + return Matches; +} + // Returns qualified names of symbols with any of IDs in the index. std::vector lookup(const SymbolIndex &I, llvm::ArrayRef IDs) {