diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -1612,13 +1612,14 @@ SymbolQualitySignals Quality; SymbolRelevanceSignals Relevance; Relevance.Context = CCContextKind; - Relevance.Name = Bundle.front().Name; Relevance.Query = SymbolRelevanceSignals::CodeComplete; Relevance.FileProximityMatch = FileProximity.getPointer(); if (ScopeProximity) Relevance.ScopeProximityMatch = ScopeProximity.getPointer(); if (PreferredType) Relevance.HadContextType = true; + + Relevance.Name = Bundle.front().Name; Relevance.ContextWords = &ContextWords; auto &First = Bundle.front(); @@ -1663,6 +1664,7 @@ CodeCompletion::Scores Scores; Scores.Quality = Quality.evaluate(); + Relevance.calculateDerivedSignals(); Scores.Relevance = Relevance.evaluate(); Scores.Total = evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance); // NameMatch is in fact a multiplier on total score, so rescoring is sound. diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -109,7 +109,6 @@ SymbolQualitySignals Quality; Quality.merge(Sym); SymbolRelevanceSignals Relevance; - Relevance.Name = Sym.Name; Relevance.Query = SymbolRelevanceSignals::Generic; if (auto NameMatch = Filter.match(Sym.Name)) Relevance.NameMatch = *NameMatch; diff --git a/clang-tools-extra/clangd/Quality.h b/clang-tools-extra/clangd/Quality.h --- a/clang-tools-extra/clangd/Quality.h +++ b/clang-tools-extra/clangd/Quality.h @@ -85,29 +85,19 @@ /// Attributes of a symbol-query pair that affect how much we like it. struct SymbolRelevanceSignals { - /// The name of the symbol (for ContextWords). Must be explicitly assigned. - llvm::StringRef Name; /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. float NameMatch = 1; - /// Lowercase words relevant to the context (e.g. near the completion point). - llvm::StringSet<>* ContextWords = nullptr; + bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). /// Whether fixits needs to be applied for that completion or not. bool NeedsFixIts = false; bool InBaseClass = false; // A member from base class of the accessed class. - URIDistance *FileProximityMatch = nullptr; - /// These are used to calculate proximity between the index symbol and the - /// query. - llvm::StringRef SymbolURI; /// FIXME: unify with index proximity score - signals should be /// source-independent. /// Proximity between best declaration and the query. [0-1], 1 is closest. float SemaFileProximityScore = 0; - // Scope proximity is only considered (both index and sema) when this is set. - ScopeDistance *ScopeProximityMatch = nullptr; - llvm::Optional SymbolScope; // A symbol from sema should be accessible from the current scope. bool SemaSaysInScope = false; @@ -136,6 +126,34 @@ // Whether the item matches the type expected in the completion context. bool TypeMatchesPreferred = false; + // Properties and utilites used to compute derived signals. These are ignored + // by a scoring function. Must be explicitly assigned. + llvm::StringRef SymbolURI; + URIDistance *FileProximityMatch = nullptr; + /// These are used to calculate proximity between the index symbol and the + /// query. + llvm::Optional SymbolScope; + // Scope proximity is only considered (both index and sema) when this is set. + ScopeDistance *ScopeProximityMatch = nullptr; + /// The name of the symbol (for ContextWords). + llvm::StringRef Name; + /// Lowercase words relevant to the context (e.g. near the completion + /// point). + llvm::StringSet<> *ContextWords = nullptr; + + /// Set of derived signals computed by calculateDerivedSignals(). Must not be + /// set explicitly. + struct DerivedSignals { + /// Whether Name contains some word from context. + bool NameMatchesContext = false; + /// Min distance between SymbolURI and all the headers included by the TU. + unsigned FileProximityDistance = FileDistance::Unreachable; + /// Min distance between SymbolScope and all the available scopes. + unsigned ScopeProximityDistance = FileDistance::Unreachable; + } Derived; + + void calculateDerivedSignals(); + void merge(const CodeCompletionResult &SemaResult); void merge(const Symbol &IndexResult); diff --git a/clang-tools-extra/clangd/Quality.h.rej b/clang-tools-extra/clangd/Quality.h.rej new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/Quality.h.rej @@ -0,0 +1,12 @@ +--- third_party/llvm/llvm-project/clang-tools-extra/clangd/Quality.h ++++ third_party/llvm/llvm-project/clang-tools-extra/clangd/Quality.h +@@ -85,9 +85,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ + + /// Attributes of a symbol-query pair that affect how much we like it. + struct SymbolRelevanceSignals { +- /// Whether Name contains some word in context. Must not be set explicitly. +- /// Computed by calcNameMatchesContext(). +- bool NameMatchesContext = false; + /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. + float NameMatch = 1; + diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp --- a/clang-tools-extra/clangd/Quality.cpp +++ b/clang-tools-extra/clangd/Quality.cpp @@ -320,23 +320,23 @@ NeedsFixIts = !SemaCCResult.FixIts.empty(); } -static std::pair uriProximity(llvm::StringRef SymbolURI, - URIDistance *D) { - if (!D || SymbolURI.empty()) - return {0.f, 0u}; - unsigned Distance = D->distance(SymbolURI); +static float fileProximityScore(unsigned FileDistance) { + // Range: [0, 1] + // FileDistance = [0, 1, 2, 3, 4, .., FileDistance::Unreachable] + // Score = [1, 0.82, 0.67, 0.55, 0.45, .., 0] + if (FileDistance == FileDistance::Unreachable) + return 0; // Assume approximately default options are used for sensible scoring. - return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance}; + return std::exp(FileDistance * -0.4f / FileDistanceOptions().UpCost); } -static float scopeBoost(ScopeDistance &Distance, - llvm::Optional SymbolScope) { - if (!SymbolScope) - return 1; - auto D = Distance.distance(*SymbolScope); - if (D == FileDistance::Unreachable) +static float scopeProximityScore(unsigned ScopeDistance) { + // Range: [0.6, 2]. + // ScopeDistance = [0, 1, 2, 3, 4, 5, 6, 7, .., FileDistance::Unreachable] + // Score = [2.0, 1.55, 1.2, 0.93, 0.72, 0.65, 0.65, 0.65, .., 0.6] + if (ScopeDistance == FileDistance::Unreachable) return 0.6f; - return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0)); + return std::max(0.65, 2.0 * std::pow(0.6, ScopeDistance / 2.0)); } static llvm::Optional @@ -348,6 +348,18 @@ return llvm::None; } +void SymbolRelevanceSignals::calculateDerivedSignals() { + Derived.NameMatchesContext = wordMatching(Name, ContextWords).hasValue(); + Derived.FileProximityDistance = !FileProximityMatch || SymbolURI.empty() + ? FileDistance::Unreachable + : FileProximityMatch->distance(SymbolURI); + if (ScopeProximityMatch) { + // For global symbol, the distance is 0. + Derived.ScopeProximityDistance = + SymbolScope ? ScopeProximityMatch->distance(*SymbolScope) : 0; + } +} + float SymbolRelevanceSignals::evaluate() const { float Score = 1; @@ -358,7 +370,7 @@ // File proximity scores are [0,1] and we translate them into a multiplier in // the range from 1 to 3. - Score *= 1 + 2 * std::max(uriProximity(SymbolURI, FileProximityMatch).first, + Score *= 1 + 2 * std::max(fileProximityScore(Derived.FileProximityDistance), SemaFileProximityScore); if (ScopeProximityMatch) @@ -366,10 +378,11 @@ // can be tricky (e.g. class/function scope). Set to the max boost as we // don't load top-level symbols from the preamble and sema results are // always in the accessible scope. - Score *= - SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope); + Score *= SemaSaysInScope + ? 2.0 + : scopeProximityScore(Derived.ScopeProximityDistance); - if (wordMatching(Name, ContextWords)) + if (Derived.NameMatchesContext) Score *= 1.5; // Symbols like local variables may only be referenced within their scope. @@ -446,16 +459,16 @@ S.SymbolScope ? *S.SymbolScope : ""); if (S.FileProximityMatch) { - auto Score = uriProximity(S.SymbolURI, S.FileProximityMatch); - OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n", - Score.first, Score.second); + unsigned Score = fileProximityScore(S.Derived.FileProximityDistance); + OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n", Score, + S.Derived.FileProximityDistance); } OS << llvm::formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore); OS << llvm::formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope); if (S.ScopeProximityMatch) OS << llvm::formatv("\tIndex scope boost: {0}\n", - scopeBoost(*S.ScopeProximityMatch, S.SymbolScope)); + scopeProximityScore(S.Derived.ScopeProximityDistance)); OS << llvm::formatv( "\tType matched preferred: {0} (Context type: {1}, Symbol type: {2}\n", diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -427,7 +427,6 @@ SymbolQualitySignals Quality; Quality.merge(Sym); SymbolRelevanceSignals Relevance; - Relevance.Name = Sym.Name; Relevance.Query = SymbolRelevanceSignals::Generic; Relevance.merge(Sym); auto Score = diff --git a/clang-tools-extra/clangd/index/dex/Dex.cpp b/clang-tools-extra/clangd/index/dex/Dex.cpp --- a/clang-tools-extra/clangd/index/dex/Dex.cpp +++ b/clang-tools-extra/clangd/index/dex/Dex.cpp @@ -126,7 +126,6 @@ // DistanceCalculator will find the shortest distance from ProximityPaths to // any URI extracted from the ProximityPaths. URIDistance DistanceCalculator(Sources); - PathProximitySignals.FileProximityMatch = &DistanceCalculator; // Try to build BOOST iterator for each Proximity Path provided by // ProximityPaths. Boosting factor should depend on the distance to the // Proximity Path: the closer processed path is, the higher boosting factor. @@ -134,7 +133,9 @@ // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator. auto It = iterator(Token(Token::Kind::ProximityURI, ParentURI)); if (It->kind() != Iterator::Kind::False) { + PathProximitySignals.FileProximityMatch = &DistanceCalculator; PathProximitySignals.SymbolURI = ParentURI; + PathProximitySignals.calculateDerivedSignals(); BoostingIterators.push_back( Corpus.boost(std::move(It), PathProximitySignals.evaluate())); } diff --git a/clang-tools-extra/clangd/unittests/QualityTests.cpp b/clang-tools-extra/clangd/unittests/QualityTests.cpp --- a/clang-tools-extra/clangd/unittests/QualityTests.cpp +++ b/clang-tools-extra/clangd/unittests/QualityTests.cpp @@ -268,9 +268,11 @@ ProxSources.try_emplace(testPath("foo/baz.h")); URIDistance Distance(ProxSources); IndexProximate.FileProximityMatch = &Distance; + IndexProximate.calculateDerivedSignals(); EXPECT_GT(IndexProximate.evaluate(), Default.evaluate()); SymbolRelevanceSignals IndexDistant = IndexProximate; IndexDistant.SymbolURI = "unittest:/elsewhere/path.h"; + IndexDistant.calculateDerivedSignals(); EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate()) << IndexProximate << IndexDistant; EXPECT_GT(IndexDistant.evaluate(), Default.evaluate()); @@ -294,13 +296,17 @@ EXPECT_LT(InBaseClass.evaluate(), Default.evaluate()); llvm::StringSet<> Words = {"one", "two", "three"}; + SymbolRelevanceSignals WithoutMatchingWord; WithoutMatchingWord.ContextWords = &Words; WithoutMatchingWord.Name = "four"; + WithoutMatchingWord.calculateDerivedSignals(); EXPECT_EQ(WithoutMatchingWord.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithMatchingWord; WithMatchingWord.ContextWords = &Words; WithMatchingWord.Name = "TheTwoTowers"; + WithMatchingWord.calculateDerivedSignals(); EXPECT_GT(WithMatchingWord.evaluate(), Default.evaluate()); } @@ -310,25 +316,31 @@ Relevance.ScopeProximityMatch = &ScopeProximity; Relevance.SymbolScope = "other::"; + Relevance.calculateDerivedSignals(); float NotMatched = Relevance.evaluate(); Relevance.SymbolScope = ""; + Relevance.calculateDerivedSignals(); float Global = Relevance.evaluate(); EXPECT_GT(Global, NotMatched); Relevance.SymbolScope = "llvm::"; + Relevance.calculateDerivedSignals(); float NonParent = Relevance.evaluate(); EXPECT_GT(NonParent, Global); Relevance.SymbolScope = "x::"; + Relevance.calculateDerivedSignals(); float GrandParent = Relevance.evaluate(); EXPECT_GT(GrandParent, Global); Relevance.SymbolScope = "x::y::"; + Relevance.calculateDerivedSignals(); float Parent = Relevance.evaluate(); EXPECT_GT(Parent, GrandParent); Relevance.SymbolScope = "x::y::z::"; + Relevance.calculateDerivedSignals(); float Enclosing = Relevance.evaluate(); EXPECT_GT(Enclosing, Parent); }