Index: clangd/CodeComplete.cpp =================================================================== --- clangd/CodeComplete.cpp +++ clangd/CodeComplete.cpp @@ -54,7 +54,9 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" @@ -1215,6 +1217,7 @@ llvm::Optional PreferredType; // Initialized once Sema runs. // Whether to query symbols from any scope. Initialized once Sema runs. bool AllScopes = false; + llvm::StringSet<> ContextWords; // Include-insertion and proximity scoring rely on the include structure. // This is available after Sema has run. llvm::Optional Inserter; // Available during runWithSema. @@ -1237,6 +1240,7 @@ trace::Span Tracer("CodeCompleteFlow"); HeuristicPrefix = guessCompletionPrefix(SemaCCInput.Contents, SemaCCInput.Offset); + populateContextWords(SemaCCInput.Contents); if (Opts.Index && SpecFuzzyFind && SpecFuzzyFind->CachedReq.hasValue()) { assert(!SpecFuzzyFind->Result.valid()); SpecReq = speculativeFuzzyFindRequestForCompletion( @@ -1323,6 +1327,7 @@ trace::Span Tracer("CodeCompleteWithoutSema"); // Fill in fields normally set by runWithSema() HeuristicPrefix = guessCompletionPrefix(Content, Offset); + populateContextWords(Content); CCContextKind = CodeCompletionContext::CCC_Recovery; Filter = FuzzyMatcher(HeuristicPrefix.Name); auto Pos = offsetToPosition(Content, Offset); @@ -1380,6 +1385,24 @@ } private: + void populateContextWords(llvm::StringRef Content) { + // Take last 3 lines before the completion point. + unsigned RangeEnd = HeuristicPrefix.Qualifier.begin() - Content.data(), + RangeBegin = RangeEnd; + for (size_t I = 0; I < 3 && RangeBegin > 0; ++I) { + auto PrevNL = Content.rfind('\n', RangeBegin - 1); + if (PrevNL == StringRef::npos) { + RangeBegin = 0; + break; + } + RangeBegin = PrevNL + 1; + } + + ContextWords = collectWords(Content.slice(RangeBegin, RangeEnd)); + dlog("Completion context words: {0}", + llvm::join(ContextWords.keys(), ", ")); + } + // This is called by run() once Sema code completion is done, but before the // Sema data structures are torn down. It does all the real work. CodeCompleteResult runWithSema() { @@ -1563,12 +1586,14 @@ SymbolQualitySignals Quality; SymbolRelevanceSignals Relevance; Relevance.Context = CCContextKind; + Relevance.Name = Bundle.front().Name; Relevance.Query = SymbolRelevanceSignals::CodeComplete; Relevance.FileProximityMatch = FileProximity.getPointer(); if (ScopeProximity) Relevance.ScopeProximityMatch = ScopeProximity.getPointer(); if (PreferredType) Relevance.HadContextType = true; + Relevance.ContextWords = &ContextWords; auto &First = Bundle.front(); if (auto FuzzyScore = fuzzyScore(First)) Index: clangd/FindSymbols.cpp =================================================================== --- clangd/FindSymbols.cpp +++ clangd/FindSymbols.cpp @@ -100,6 +100,7 @@ SymbolQualitySignals Quality; Quality.merge(Sym); SymbolRelevanceSignals Relevance; + Relevance.Name = Sym.Name; Relevance.Query = SymbolRelevanceSignals::Generic; if (auto NameMatch = Filter.match(Sym.Name)) Relevance.NameMatch = *NameMatch; Index: clangd/Quality.h =================================================================== --- clangd/Quality.h +++ clangd/Quality.h @@ -32,13 +32,14 @@ #include "clang/Sema/CodeCompleteConsumer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include #include #include namespace llvm { class raw_ostream; -} +} // namespace llvm namespace clang { class CodeCompletionResult; @@ -84,8 +85,12 @@ /// Attributes of a symbol-query pair that affect how much we like it. struct SymbolRelevanceSignals { + /// The name of the symbol (for ContextWords). Must be explicitly assigned. + llvm::StringRef Name; /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. float NameMatch = 1; + /// Lowercase words relevant to the context (e.g. near the completion point). + llvm::StringSet<>* ContextWords = nullptr; bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). /// Whether fixits needs to be applied for that completion or not. bool NeedsFixIts = false; Index: clangd/Quality.cpp =================================================================== --- clangd/Quality.cpp +++ clangd/Quality.cpp @@ -336,6 +336,15 @@ return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0)); } +static llvm::Optional +wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) { + if (ContextWords) + for (const auto& Word : ContextWords->keys()) + if (Name.contains_lower(Word)) + return Word; + return llvm::None; +} + float SymbolRelevanceSignals::evaluate() const { float Score = 1; @@ -357,6 +366,9 @@ Score *= SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope); + if (wordMatching(Name, ContextWords)) + Score *= 1.5; + // Symbols like local variables may only be referenced within their scope. // Conversely if we're in that scope, it's likely we'll reference them. if (Query == CodeComplete) { @@ -413,7 +425,12 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolRelevanceSignals &S) { OS << llvm::formatv("=== Symbol relevance: {0}\n", S.evaluate()); + OS << llvm::formatv("\tName: {0}\n", S.Name); OS << llvm::formatv("\tName match: {0}\n", S.NameMatch); + if (S.ContextWords) + OS << llvm::formatv( + "\tMatching context word: {0}\n", + wordMatching(S.Name, S.ContextWords).getValueOr("")); OS << llvm::formatv("\tForbidden: {0}\n", S.Forbidden); OS << llvm::formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts); OS << llvm::formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember); Index: clangd/SourceCode.h =================================================================== --- clangd/SourceCode.h +++ clangd/SourceCode.h @@ -20,6 +20,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/SHA1.h" @@ -165,6 +166,13 @@ llvm::StringMap collectIdentifiers(llvm::StringRef Content, const format::FormatStyle &Style); +/// Collects words from the source code. +/// Unlike collectIdentifiers: +/// - also finds text in comments: +/// - splits text into words +/// - drops stopwords like "get" and "for" +llvm::StringSet<> collectWords(llvm::StringRef Content); + /// Heuristically determine namespaces visible at a point, without parsing Code. /// This considers using-directives and enclosing namespace-declarations that /// are visible (and not obfuscated) in the file itself (not headers). Index: clangd/SourceCode.cpp =================================================================== --- clangd/SourceCode.cpp +++ clangd/SourceCode.cpp @@ -8,6 +8,7 @@ #include "SourceCode.h" #include "Context.h" +#include "FuzzyMatch.h" #include "Logger.h" #include "Protocol.h" #include "clang/AST/ASTContext.h" @@ -18,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -602,5 +604,43 @@ return Found; } +llvm::StringSet<> collectWords(llvm::StringRef Content) { + // We assume short words are not significant. + // We may want to consider other stopwords, e.g. language keywords. + // (A very naive implementation showed no benefit, but lexing might do better) + static constexpr int MinWordLength = 4; + + std::vector Roles(Content.size()); + calculateRoles(Content, Roles); + + llvm::StringSet<> Result; + llvm::SmallString<256> Word; + auto Flush = [&] { + if (Word.size() >= MinWordLength) { + for (char &C : Word) + C = llvm::toLower(C); + Result.insert(Word); + } + Word.clear(); + }; + for (unsigned I = 0; I < Content.size(); ++I) { + switch (Roles[I]) { + case Head: + Flush(); + LLVM_FALLTHROUGH; + case Tail: + Word.push_back(Content[I]); + break; + case Unknown: + case Separator: + Flush(); + break; + } + } + Flush(); + + return Result; +} + } // namespace clangd } // namespace clang Index: clangd/test/completion-auto-trigger.test =================================================================== --- clangd/test/completion-auto-trigger.test +++ clangd/test/completion-auto-trigger.test @@ -23,7 +23,7 @@ # CHECK-NEXT: "insertTextFormat": 1, # CHECK-NEXT: "kind": 5, # CHECK-NEXT: "label": " size", -# CHECK-NEXT: "sortText": "3eacccccsize", +# CHECK-NEXT: "sortText": "{{.*}}size", # CHECK-NEXT: "textEdit": { # CHECK-NEXT: "newText": "size", # CHECK-NEXT: "range": { @@ -45,7 +45,7 @@ # CHECK-NEXT: "insertTextFormat": 1, # CHECK-NEXT: "kind": 10, # CHECK-NEXT: "label": " default_capacity", -# CHECK-NEXT: "sortText": "3fd70a3ddefault_capacity", +# CHECK-NEXT: "sortText": "{{.*}}default_capacity", # CHECK-NEXT: "textEdit": { # CHECK-NEXT: "newText": "default_capacity", # CHECK-NEXT: "range": { @@ -84,7 +84,7 @@ # CHECK-NEXT: "insertTextFormat": 1, # CHECK-NEXT: "kind": 6, # CHECK-NEXT: "label": " ns_member", -# CHECK-NEXT: "sortText": "3f2cccccns_member", +# CHECK-NEXT: "sortText": "{{.*}}ns_member", # CHECK-NEXT: "textEdit": { # CHECK-NEXT: "newText": "ns_member", # CHECK-NEXT: "range": { Index: clangd/unittests/CodeCompleteTests.cpp =================================================================== --- clangd/unittests/CodeCompleteTests.cpp +++ clangd/unittests/CodeCompleteTests.cpp @@ -174,6 +174,7 @@ int BBB(); int CCC(); }; + int main() { ClassWithMembers().^ } )cpp", /*IndexSymbols=*/{}, Opts); @@ -324,7 +325,7 @@ } } -TEST(CompletionTest, Priorities) { +TEST(CompletionTest, Accessible) { auto Internal = completions(R"cpp( class Foo { public: void pub(); @@ -334,7 +335,7 @@ void Foo::pub() { this->^ } )cpp"); EXPECT_THAT(Internal.Completions, - HasSubsequence(Named("priv"), Named("prot"), Named("pub"))); + AllOf(Has("priv"), Has("prot"), Has("pub"))); auto External = completions(R"cpp( class Foo { @@ -502,6 +503,21 @@ HasSubsequence(Named("absl"), Named("absb"))); } +TEST(CompletionTest, ContextWords) { + auto Results = completions(R"cpp( + enum class Color { RED, YELLOW, BLUE }; + + // (blank lines so the definition above isn't "context") + + // "It was a yellow car," he said. "Big yellow car, new." + auto Finish = Color::^ + )cpp"); + // Yellow would normally sort last (alphabetic). + // But the recent mention shuold bump it up. + ASSERT_THAT(Results.Completions, + HasSubsequence(Named("YELLOW"), Named("BLUE"))); +} + TEST(CompletionTest, GlobalQualified) { auto Results = completions( R"cpp( Index: clangd/unittests/QualityTests.cpp =================================================================== --- clangd/unittests/QualityTests.cpp +++ clangd/unittests/QualityTests.cpp @@ -292,6 +292,16 @@ SymbolRelevanceSignals InBaseClass; InBaseClass.InBaseClass = true; EXPECT_LT(InBaseClass.evaluate(), Default.evaluate()); + + llvm::StringSet<> Words = {"one", "two", "three"}; + SymbolRelevanceSignals WithoutMatchingWord; + WithoutMatchingWord.ContextWords = &Words; + WithoutMatchingWord.Name = "four"; + EXPECT_EQ(WithoutMatchingWord.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithMatchingWord; + WithMatchingWord.ContextWords = &Words; + WithMatchingWord.Name = "TheTwoTowers"; + EXPECT_GT(WithMatchingWord.evaluate(), Default.evaluate()); } TEST(QualityTests, ScopeProximity) { Index: clangd/unittests/SourceCodeTests.cpp =================================================================== --- clangd/unittests/SourceCodeTests.cpp +++ clangd/unittests/SourceCodeTests.cpp @@ -22,6 +22,7 @@ using llvm::Failed; using llvm::HasValue; +using ::testing::UnorderedElementsAreArray; MATCHER_P2(Pos, Line, Col, "") { return arg.line == int(Line) && arg.character == int(Col); @@ -322,6 +323,19 @@ EXPECT_EQ(IDs["foo"], 2u); } +TEST(SourceCodeTests, CollectWords) { + auto Words = collectWords(R"cpp( + #define FIZZ_BUZZ + // this is a comment + std::string getSomeText() { return "magic word"; } + )cpp"); + std::set ActualWords(Words.keys().begin(), Words.keys().end()); + std::set ExpectedWords = {"define", "fizz", "buzz", "this", + "comment", "string", "some", "text", + "return", "magic", "word"}; + EXPECT_EQ(ActualWords, ExpectedWords); +} + TEST(SourceCodeTests, VisibleNamespaces) { std::vector>> Cases = { {