Index: clangd/CodeComplete.cpp =================================================================== --- clangd/CodeComplete.cpp +++ clangd/CodeComplete.cpp @@ -22,6 +22,7 @@ #include "index/Index.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendActions.h" +#include "clang/Index/USRGeneration.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "clang/Sema/Sema.h" #include "llvm/Support/Format.h" @@ -185,48 +186,53 @@ return Result; } -/// A scored code completion result. -/// It may be promoted to a CompletionItem if it's among the top-ranked results. -/// -/// We score candidates by multiplying the symbolScore ("quality" of the result) -/// with the filterScore (how well it matched the query). -/// This is sensitive to the distribution of both component scores! -struct CompletionCandidate { - CompletionCandidate(CodeCompletionResult &Result, float FilterScore) - : Result(&Result) { - Scores.symbolScore = score(Result); // Higher is better. - Scores.filterScore = FilterScore; // 0-1, higher is better. - Scores.finalScore = Scores.symbolScore * Scores.filterScore; - } +// Produces an integer that sorts in the same order as F. +// That is: a < b <==> encodeFloat(a) < encodeFloat(b). +uint32_t encodeFloat(float F) { + static_assert(std::numeric_limits::is_iec559, ""); + static_assert(sizeof(float) == sizeof(uint32_t), ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U; + memcpy(&U, &F, sizeof(float)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} - CodeCompletionResult *Result; - CompletionItemScores Scores; +// Returns a string that sorts in the same order as (-Score, Name), for LSP. +std::string sortText(float Score, llvm::StringRef Name) { + // We convert -Score to an integer, and hex-encode for readability. + // Example: [0.5, "foo"] -> "41000000foo" + std::string S; + llvm::raw_string_ostream OS(S); + write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower, + /*Width=*/2 * sizeof(Score)); + OS << Name; + OS.flush(); + return S; +} - // Comparison reflects rank: better candidates are smaller. - bool operator<(const CompletionCandidate &C) const { - if (Scores.finalScore != C.Scores.finalScore) - return Scores.finalScore > C.Scores.finalScore; - return *Result < *C.Result; - } +/// A code completion result, in clang-native form. +/// It may be promoted to a CompletionItem if it's among the top-ranked results. +struct CompletionCandidate { + llvm::StringRef Name; // Used for filtering and sorting. + // We may have a result from Sema, from the index, or both. + const CodeCompletionResult *SemaResult = nullptr; + const Symbol *IndexResult = nullptr; - // Returns a string that sorts in the same order as operator<, for LSP. - // Conceptually, this is [-Score, Name]. We convert -Score to an integer, and - // hex-encode it for readability. Example: [0.5, "foo"] -> "41000000foo" - std::string sortText() const { - std::string S, NameStorage; - llvm::raw_string_ostream OS(S); - write_hex(OS, encodeFloat(-Scores.finalScore), llvm::HexPrintStyle::Lower, - /*Width=*/2 * sizeof(Scores.finalScore)); - OS << Result->getOrderedName(NameStorage); - return OS.str(); - } + // Computes the "symbol quality" score for this completion. Higher is better. + float score() const { + // For now we just use the Sema priority, mapping it onto a 0-1 interval. + if (!SemaResult) // FIXME(sammccall): better scoring for index results. + return 0.3; // fixed mediocre score for index-only results. -private: - static float score(const CodeCompletionResult &Result) { // Priority 80 is a really bad score. - float Score = 1 - std::min(80, Result.Priority) / 80; + float Score = 1 - std::min(80, SemaResult->Priority) / 80; - switch (static_cast(Result.Availability)) { + switch (static_cast(SemaResult->Availability)) { case CXAvailability_Available: // No penalty. break; @@ -241,23 +247,68 @@ return Score; } - // Produces an integer that sorts in the same order as F. - // That is: a < b <==> encodeFloat(a) < encodeFloat(b). - static uint32_t encodeFloat(float F) { - static_assert(std::numeric_limits::is_iec559, ""); - static_assert(sizeof(float) == sizeof(uint32_t), ""); - constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); - - // Get the bits of the float. Endianness is the same as for integers. - uint32_t U; - memcpy(&U, &F, sizeof(float)); - // IEEE 754 floats compare like sign-magnitude integers. - if (U & TopBit) // Negative float. - return 0 - U; // Map onto the low half of integers, order reversed. - return U + TopBit; // Positive floats map onto the high half of integers. + // Builds an LSP completion item. + CompletionItem build(const CompletionItemScores &Scores, + const CodeCompleteOptions &Opts, + CodeCompletionString *SemaCCS) const { + assert(bool(SemaResult) == bool(SemaCCS)); + CompletionItem I; + if (SemaResult) { + I.kind = + toCompletionItemKind(SemaResult->Kind, SemaResult->CursorKind); + getLabelAndInsertText(*SemaCCS, &I.label, &I.insertText, + Opts.EnableSnippets); + I.filterText = getFilterText(*SemaCCS); + I.documentation = getDocumentation(*SemaCCS); + I.detail = getDetail(*SemaCCS); + } + if (IndexResult) { + if (I.kind == CompletionItemKind::Missing) + I.kind = toCompletionItemKind(IndexResult->SymInfo.Kind); + // FIXME: reintroduce a way to show the index source for debugging. + if (I.label.empty()) + I.label = IndexResult->CompletionLabel; + if (I.filterText.empty()) + I.filterText = IndexResult->Name; + + // FIXME(ioeric): support inserting/replacing scope qualifiers. + if (I.insertText.empty()) + I.insertText = Opts.EnableSnippets + ? IndexResult->CompletionSnippetInsertText + : IndexResult->CompletionPlainInsertText; + + if (auto *D = IndexResult->Detail) { + if (I.documentation.empty()) + I.documentation = D->Documentation; + if (I.detail.empty()) + I.detail = D->CompletionDetail; + } + } + I.scoreInfo = Scores; + I.sortText = sortText(Scores.finalScore, Name); + I.insertTextFormat = Opts.EnableSnippets ? InsertTextFormat::Snippet + : InsertTextFormat::PlainText; + return I; } }; +// Determine the symbol ID for a Sema code completion result, if possible. +llvm::Optional getSymbolID(const CodeCompletionResult &R) { + switch (R.Kind) { + case CodeCompletionResult::RK_Declaration: + case CodeCompletionResult::RK_Pattern: { + llvm::SmallString<128> USR; + if (/*Ignore=*/ clang::index::generateUSRForDecl(R.Declaration, USR)) + return None; + return SymbolID(USR); + } + case CodeCompletionResult::RK_Macro: + // FIXME: Macros do have USRs, but the CCR doesn't contain enough info. + case CodeCompletionResult::RK_Keyword: + return None; + } +} + /// \brief Information about the scope specifier in the qualified-id code /// completion (e.g. "ns::ab?"). struct SpecifiedScope { @@ -268,133 +319,135 @@ // context), this will be set to the fully qualfied name of the corresponding // context. std::string Resolved; -}; -/// \brief Information from sema about (parital) symbol names to be completed. -/// For example, for completion "ns::ab^", this stores the scope specifier -/// "ns::" and the completion filter text "ab". -struct NameToComplete { - // The partial identifier being completed, without qualifier. - std::string Filter; - - /// This is set if the completion is for qualified IDs, e.g. "abc::x^". - llvm::Optional SSInfo; + llvm::StringRef forIndex() { + llvm::StringRef Chosen = Resolved.empty() ? Written : Resolved; + return Chosen.trim(':'); + } }; -SpecifiedScope extraCompletionScope(Sema &S, const CXXScopeSpec &SS); - -class CompletionItemsCollector : public CodeCompleteConsumer { -public: - CompletionItemsCollector(const CodeCompleteOptions &CodeCompleteOpts, - CompletionList &Items, NameToComplete &CompletedName) - : CodeCompleteConsumer(CodeCompleteOpts.getClangCompleteOpts(), +// The CompletionRecorder captures Sema code-complete output, including context. +// It filters out ignored results (but doesn't apply fuzzy-filtering yet). +// It doesn't do scoring or conversion to CompletionItem yet, as we want to +// merge with index results first. +struct CompletionRecorder : public CodeCompleteConsumer { + CompletionRecorder(const CodeCompleteOptions &Opts) + : CodeCompleteConsumer(Opts.getClangCompleteOpts(), /*OutputIsBinary=*/false), - ClangdOpts(CodeCompleteOpts), Items(Items), - Allocator(std::make_shared()), - CCTUInfo(Allocator), CompletedName(CompletedName), - EnableSnippets(CodeCompleteOpts.EnableSnippets) {} - - void ProcessCodeCompleteResults(Sema &S, CodeCompletionContext Context, - CodeCompletionResult *Results, + CCContext(CodeCompletionContext::CCC_Other), + Opts(Opts), + CCAllocator(std::make_shared()), + CCTUInfo(CCAllocator) {} + std::vector Results; + CodeCompletionContext CCContext; + Sema *Sema = nullptr; // The Sema that created the results. + + void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context, + CodeCompletionResult *InResults, unsigned NumResults) override final { - FuzzyMatcher Filter(S.getPreprocessor().getCodeCompletionFilter()); - if (auto SS = Context.getCXXScopeSpecifier()) - CompletedName.SSInfo = extraCompletionScope(S, **SS); + // Record the completion context. + assert(!Sema && "ProcessCodeCompleteResults called multiple times!"); + Sema = &S; + CCContext = Context; - CompletedName.Filter = S.getPreprocessor().getCodeCompletionFilter(); - std::priority_queue Candidates; + // Retain the results we might want. for (unsigned I = 0; I < NumResults; ++I) { - auto &Result = Results[I]; - // We drop hidden items, as they cannot be found by the lookup after - // inserting the corresponding completion item and only produce noise and - // duplicates in the completion list. However, there is one exception. If - // Result has a Qualifier which is non-informative, we can refer to an - // item by adding that qualifier, so we don't filter out this item. + auto &Result = InResults[I]; + // Drop hidden items which cannot be found by lookup after completion. + // Exception: some items can be named by using a qualifier. if (Result.Hidden && (!Result.Qualifier || Result.QualifierIsInformative)) continue; - if (!ClangdOpts.IncludeIneligibleResults && + if (!Opts.IncludeIneligibleResults && (Result.Availability == CXAvailability_NotAvailable || Result.Availability == CXAvailability_NotAccessible)) continue; - auto FilterScore = fuzzyMatch(S, Context, Filter, Result); - if (!FilterScore) - continue; - Candidates.emplace(Result, *FilterScore); - if (ClangdOpts.Limit && Candidates.size() > ClangdOpts.Limit) { - Candidates.pop(); - Items.isIncomplete = true; - } - } - while (!Candidates.empty()) { - auto &Candidate = Candidates.top(); - const auto *CCS = Candidate.Result->CreateCodeCompletionString( - S, Context, *Allocator, CCTUInfo, - CodeCompleteOpts.IncludeBriefComments); - assert(CCS && "Expected the CodeCompletionString to be non-null"); - Items.items.push_back(ProcessCodeCompleteResult(Candidate, *CCS)); - Candidates.pop(); + Results.push_back(Result); } - std::reverse(Items.items.begin(), Items.items.end()); } - GlobalCodeCompletionAllocator &getAllocator() override { return *Allocator; } - + CodeCompletionAllocator &getAllocator() override { return *CCAllocator; } CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; } -private: - llvm::Optional fuzzyMatch(Sema &S, const CodeCompletionContext &CCCtx, - FuzzyMatcher &Filter, - CodeCompletionResult Result) { + // Returns the filtering/sorting name for Result, which must be from Results. + // Returned string is owned by this recorder (or the AST). + llvm::StringRef getName(const CodeCompletionResult &Result) { switch (Result.Kind) { case CodeCompletionResult::RK_Declaration: if (auto *ID = Result.Declaration->getIdentifier()) - return Filter.match(ID->getName()); + return ID->getName(); break; case CodeCompletionResult::RK_Keyword: - return Filter.match(Result.Keyword); + return Result.Keyword; case CodeCompletionResult::RK_Macro: - return Filter.match(Result.Macro->getName()); + return Result.Macro->getName(); case CodeCompletionResult::RK_Pattern: - return Filter.match(Result.Pattern->getTypedText()); + return Result.Pattern->getTypedText(); } - auto *CCS = Result.CreateCodeCompletionString( - S, CCCtx, *Allocator, CCTUInfo, /*IncludeBriefComments=*/false); - return Filter.match(CCS->getTypedText()); + auto *CCS = codeCompletionString(Result, /*IncludeBriefComments=*/false); + return CCS->getTypedText(); } - CompletionItem - ProcessCodeCompleteResult(const CompletionCandidate &Candidate, - const CodeCompletionString &CCS) const { - - // Adjust this to InsertTextFormat::Snippet iff we encounter a - // CK_Placeholder chunk in SnippetCompletionItemsCollector. - CompletionItem Item; + // Build a CodeCompletion string for R, which must be from Results. + // The CCS will be owned by this recorder. + CodeCompletionString *codeCompletionString(const CodeCompletionResult &R, + bool IncludeBriefComments) { + // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway. + return const_cast(R).CreateCodeCompletionString( + *Sema, CCContext, *CCAllocator, CCTUInfo, IncludeBriefComments); + } - Item.documentation = getDocumentation(CCS); - Item.sortText = Candidate.sortText(); - Item.scoreInfo = Candidate.Scores; +private: + CodeCompleteOptions Opts; + std::shared_ptr CCAllocator; + CodeCompletionTUInfo CCTUInfo; +}; - Item.detail = getDetail(CCS); - Item.filterText = getFilterText(CCS); - getLabelAndInsertText(CCS, &Item.label, &Item.insertText, EnableSnippets); +// Tracks a bounded number of candidates with the best scores. +class TopN { +public: + using value_type = std::pair; + + TopN(size_t N) : N(N) {} + + // Adds a candidate to the set, and drops one if needed to get back under N. + void push(value_type &&V) { + if (Heap.size() >= N) { + Dropped = true; + if (N == 0 || !greater(V, Heap.front())) + return; + std::pop_heap(Heap.begin(), Heap.end(), greater); + Heap.back() = std::move(V); + std::push_heap(Heap.begin(), Heap.end(), greater); + } else { + Heap.push_back(std::move(V)); + std::push_heap(Heap.begin(), Heap.end(), greater); + } + assert(Heap.size() <= N); + assert(std::is_heap(Heap.begin(), Heap.end(), greater)); + } - Item.insertTextFormat = EnableSnippets ? InsertTextFormat::Snippet - : InsertTextFormat::PlainText; + // Returns candidates from best to worst. + std::vector items() && { + std::sort_heap(Heap.begin(), Heap.end(), greater); + assert(Heap.size() <= N); + return std::move(Heap); + } - // Fill in the kind field of the CompletionItem. - Item.kind = toCompletionItemKind(Candidate.Result->Kind, - Candidate.Result->CursorKind); + // Returns true if we had more than N candidates, and dropped some. + bool dropped() const { return Dropped; } - return Item; +private: + static bool greater(const value_type &L, const value_type &R) { + if (L.second.finalScore != R.second.finalScore) + return L.second.finalScore > R.second.finalScore; + return L.first.Name < R.first.Name; // Earlier name is better. } - CodeCompleteOptions ClangdOpts; - CompletionList &Items; - std::shared_ptr Allocator; - CodeCompletionTUInfo CCTUInfo; - NameToComplete &CompletedName; - bool EnableSnippets; -}; // CompletionItemsCollector + const size_t N; + bool Dropped = false; + std::vector Heap; // Min-heap, comparator is greater(). +}; + class SignatureHelpCollector final : public CodeCompleteConsumer { @@ -491,14 +544,16 @@ }; // SignatureHelpCollector -bool invokeCodeComplete(const Context &Ctx, - std::unique_ptr Consumer, - const clang::CodeCompleteOptions &Options, - PathRef FileName, - const tooling::CompileCommand &Command, - PrecompiledPreamble const *Preamble, StringRef Contents, - Position Pos, IntrusiveRefCntPtr VFS, - std::shared_ptr PCHs) { +// Invokes Sema code completion on a file. +// Callback will be invoked once completion is done, but before cleaning up. +bool semaCodeComplete(const Context &Ctx, + std::unique_ptr Consumer, + const clang::CodeCompleteOptions &Options, + PathRef FileName, const tooling::CompileCommand &Command, + PrecompiledPreamble const *Preamble, StringRef Contents, + Position Pos, IntrusiveRefCntPtr VFS, + std::shared_ptr PCHs, + llvm::function_ref Callback = nullptr) { std::vector ArgStrs; for (const auto &S : Command.CommandLine) ArgStrs.push_back(S.c_str()); @@ -551,69 +606,14 @@ return false; } + if (Callback) + Callback(); Action.EndSourceFile(); return true; } -CompletionItem indexCompletionItem(const Symbol &Sym, llvm::StringRef Filter, - const SpecifiedScope &SSInfo, - llvm::StringRef DebuggingLabel = "") { - CompletionItem Item; - Item.kind = toCompletionItemKind(Sym.SymInfo.Kind); - // Add DebuggingLabel to the completion results if DebuggingLabel is not - // empty. - // - // For symbols from static index, there are prefix "[G]" in the - // results (which is used for debugging purpose). - // So completion list will be like: - // clang::symbol_from_dynamic_index - // [G]clang::symbol_from_static_index - // - // FIXME: Find out a better way to show the index source. - if (!DebuggingLabel.empty()) { - llvm::raw_string_ostream Label(Item.label); - Label << llvm::format("[%s]%s", DebuggingLabel.str().c_str(), - Sym.Name.str().c_str()); - } else { - Item.label = Sym.Name; - } - // FIXME(ioeric): support inserting/replacing scope qualifiers. - - // FIXME(ioeric): support snippets. - Item.insertText = Sym.CompletionPlainInsertText; - Item.insertTextFormat = InsertTextFormat::PlainText; - Item.filterText = Sym.Name; - - // FIXME(ioeric): sort symbols appropriately. - Item.sortText = ""; - - if (Sym.Detail) { - Item.documentation = Sym.Detail->Documentation; - Item.detail = Sym.Detail->CompletionDetail; - } - - return Item; -} - -void completeWithIndex(const Context &Ctx, const SymbolIndex &Index, - llvm::StringRef Code, const SpecifiedScope &SSInfo, - llvm::StringRef Filter, CompletionList *Items, - llvm::StringRef DebuggingLabel = "") { - FuzzyFindRequest Req; - Req.Query = Filter; - // FIXME(ioeric): add more possible scopes based on using namespaces and - // containing namespaces. - StringRef Scope = SSInfo.Resolved.empty() ? SSInfo.Written : SSInfo.Resolved; - Req.Scopes = {Scope.trim(':').str()}; - - Items->isIncomplete |= !Index.fuzzyFind(Ctx, Req, [&](const Symbol &Sym) { - Items->items.push_back( - indexCompletionItem(Sym, Filter, SSInfo, DebuggingLabel)); - }); -} - -SpecifiedScope extraCompletionScope(Sema &S, const CXXScopeSpec &SS) { +SpecifiedScope getSpecifiedScope(Sema &S, const CXXScopeSpec &SS) { SpecifiedScope Info; auto &SM = S.getSourceManager(); auto SpecifierRange = SS.getRange(); @@ -633,6 +633,50 @@ return Info; } +// Should we perform index-based completion in this context? +// FIXME: consider allowing completion, but restricting the result types. +bool allowIndex(enum CodeCompletionContext::Kind K) { + switch (K) { + case CodeCompletionContext::CCC_TopLevel: + case CodeCompletionContext::CCC_ObjCInterface: + case CodeCompletionContext::CCC_ObjCImplementation: + case CodeCompletionContext::CCC_ObjCIvarList: + case CodeCompletionContext::CCC_ClassStructUnion: + case CodeCompletionContext::CCC_Statement: + case CodeCompletionContext::CCC_Expression: + case CodeCompletionContext::CCC_ObjCMessageReceiver: + case CodeCompletionContext::CCC_EnumTag: + case CodeCompletionContext::CCC_UnionTag: + case CodeCompletionContext::CCC_ClassOrStructTag: + case CodeCompletionContext::CCC_ObjCProtocolName: + case CodeCompletionContext::CCC_Namespace: + case CodeCompletionContext::CCC_Type: + case CodeCompletionContext::CCC_Name: // FIXME: why does ns::^ give this? + case CodeCompletionContext::CCC_PotentiallyQualifiedName: + case CodeCompletionContext::CCC_ParenthesizedExpression: + case CodeCompletionContext::CCC_ObjCInterfaceName: + case CodeCompletionContext::CCC_ObjCCategoryName: + return true; + case CodeCompletionContext::CCC_Other: // Be conservative. + case CodeCompletionContext::CCC_OtherWithMacros: + case CodeCompletionContext::CCC_DotMemberAccess: + case CodeCompletionContext::CCC_ArrowMemberAccess: + case CodeCompletionContext::CCC_ObjCPropertyAccess: + case CodeCompletionContext::CCC_MacroName: + case CodeCompletionContext::CCC_MacroNameUse: + case CodeCompletionContext::CCC_PreprocessorExpression: + case CodeCompletionContext::CCC_PreprocessorDirective: + case CodeCompletionContext::CCC_NaturalLanguage: + case CodeCompletionContext::CCC_SelectorName: + case CodeCompletionContext::CCC_TypeQualifiers: + case CodeCompletionContext::CCC_ObjCInstanceMessage: + case CodeCompletionContext::CCC_ObjCClassMessage: + case CodeCompletionContext::CCC_Recovery: + return false; + } + llvm_unreachable("unknown code completion context"); +} + } // namespace clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const { @@ -650,6 +694,35 @@ return Result; } +// Runs Sema-based (AST) and Index-based completion, returns merged results. +// +// There are a few tricky considerations: +// - the AST provides information needed for the index query (e.g. which +// namespaces to search in). So Sema must start first. +// - we only want to return the top results (Opts.Limit). +// Building CompletionItems for everything else is wasteful, so we want to +// preserve the "native" format until we're done with scoring. +// - the data underlying Sema completion items is owned by the AST and various +// other arenas, which must stay alive for us to build CompletionItems. +// - we may get duplicate results from Sema and the Index, we need to merge. +// +// So we start Sema completion first, but defer its cleanup until we're done. +// We use the Sema context information to query the index. +// Then we merge the two result sets, producing items that are Sema/Index/Both. +// These items are scored, and the top N are synthesized into the LSP response. +// Finally, we can clean up the data structures created by Sema completion. +// +// Main collaborators are: +// - semaCodeComplete sets up the compiler machinery to run code completion. +// - CompletionRecorder captures Sema completion results, including context. +// - SymbolIndex (Opts.Index) provides index completion results as Symbols +// - CompletionCandidates are the result of merging Sema and Index results. +// Each candidate points to an underlying CodeCompletionResult (Sema), a +// Symbol (Index), or both. It computes the result quality score. +// CompletionCandidate also does conversion to CompletionItem (at the end). +// - FuzzyMatcher scores how the candidate matches the partial identifier. +// This score is combined with the result quality score for the final score. +// - TopN determines the results with the best score. CompletionList codeComplete(const Context &Ctx, PathRef FileName, const tooling::CompileCommand &Command, PrecompiledPreamble const *Preamble, @@ -657,21 +730,123 @@ IntrusiveRefCntPtr VFS, std::shared_ptr PCHs, CodeCompleteOptions Opts) { - CompletionList Results; - NameToComplete CompletedName; - auto Consumer = - llvm::make_unique(Opts, Results, CompletedName); - invokeCodeComplete(Ctx, std::move(Consumer), Opts.getClangCompleteOpts(), - FileName, Command, Preamble, Contents, Pos, std::move(VFS), - std::move(PCHs)); - - // Got scope specifier (ns::f^) for code completion from sema, try to query - // global symbols from indexes. - // FIXME: merge with Sema results, and respect limits. - if (CompletedName.SSInfo && Opts.Index) - completeWithIndex(Ctx, *Opts.Index, Contents, *CompletedName.SSInfo, - CompletedName.Filter, &Results, /*DebuggingLabel=*/"I"); - return Results; + // We only keep the best N results at any time. We keep their "native" data + // structures, and only serialize the winners to LSP at the end. + TopN Candidates(Opts.Limit ? Opts.Limit : std::numeric_limits::max()); + CompletionRecorder *Recorder; // Forward declare for AddCandidate. + llvm::Optional Filter; // Initialized later, after Sema. + int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging. + // AddCandidate computes scores, and adds a candidate to the TopN structure. + auto AddCandidate = [&](const CodeCompletionResult *SemaResult, + const Symbol *IndexResult) { + CompletionCandidate C; + C.SemaResult = SemaResult; + C.IndexResult = IndexResult; + C.Name = IndexResult ? IndexResult->Name : Recorder->getName(*SemaResult); + + CompletionItemScores Scores; + if (auto FuzzyScore = Filter->match(C.Name)) + Scores.filterScore = *FuzzyScore; + else + return; + Scores.symbolScore = C.score(); + // We score candidates by multiplying symbolScore ("quality" of the result) + // with filterScore (how well it matched the query). + // This is sensitive to the distribution of both component scores! + Scores.finalScore = Scores.filterScore * Scores.symbolScore; + + NSema += bool(SemaResult); + NIndex += bool(IndexResult); + NBoth += SemaResult && IndexResult; + Candidates.push({C, Scores}); + }; + + // We run Sema code completion first. It builds an AST and calculates: + // - completion results based on the AST. These are saved for merging. + // - the partial identifier and namespaces to look in. We need these for + // the index query. + auto RecorderOwner = llvm::make_unique(Opts); + Recorder = RecorderOwner.get(); // Valid until code completion finishes. + CompletionList Output; + semaCodeComplete(Ctx, std::move(RecorderOwner), Opts.getClangCompleteOpts(), + FileName, Command, Preamble, Contents, Pos, std::move(VFS), + std::move(PCHs), [&] { + if (!Recorder->Sema) // If Sema didn't trigger completion, don't query the + return; // index either - we don't have the necessary context. + llvm::StringRef FilterText = + Recorder->Sema->getPreprocessor().getCodeCompletionFilter(); + Filter = FuzzyMatcher(FilterText); + + // Now we have the needed context to query the index. + // FIXME: in addition to querying for extra/overlapping symbols, we should + // explicitly request symbols corresponding to Sema results. + // We can use their signals even if the index can't suggest them. + // We must copy index results to preserve them, but there are at most Limit. + SymbolSlab IndexResults; + if (Opts.Index && allowIndex(Recorder->CCContext.getKind())) { + SymbolSlab::Builder ResultsBuilder; + // Build the query. + FuzzyFindRequest Req; + Req.Query = FilterText; + // If the user typed a scope, e.g. a::b::xxx(), restrict to that scope. + // FIXME(ioeric): add scopes based on using directives and enclosing ns. + if (auto SS = Recorder->CCContext.getCXXScopeSpecifier()) + Req.Scopes = {getSpecifiedScope(*Recorder->Sema, **SS).forIndex()}; + else + // Unless the user typed a ns qualifier, complete in global scope only. + // FIXME: once we know what namespaces are in scope (D42073), use those. + // FIXME: once we can insert namespace qualifiers and use the in-scope + // namespaces for scoring, search in all namespaces. + Req.Scopes = {""}; // Complete in the global scope only. + // Run the query against the index. + Output.isIncomplete |= !Opts.Index->fuzzyFind( + Ctx, Req, [&](const Symbol &Sym) { ResultsBuilder.insert(Sym); }); + IndexResults = std::move(ResultsBuilder).build(); + } + + // Now we merge Sema and Index results into CompletionCandidates. + llvm::DenseSet UsedIndexResults; + auto CorrespondingIndexResult = + [&](const CodeCompletionResult &SemaResult) -> const Symbol * { + if (auto SymID = getSymbolID(SemaResult)) { + auto I = IndexResults.find(*SymID); + if (I != IndexResults.end()) { + UsedIndexResults.insert(&*I); + return &*I; + } + } + return nullptr; + }; + // Emit all Sema results, merging them with Index results if possible. + for (auto &SemaResult : Recorder->Results) + AddCandidate(&SemaResult, CorrespondingIndexResult(SemaResult)); + // Now emit any Index-only results. + for (const auto &IndexResult : IndexResults) { + if (UsedIndexResults.count(&IndexResult)) + continue; + AddCandidate(/*SemaResult=*/nullptr, &IndexResult); + } + + // Now we have all the winning candidates. Convert them to LSP structs. + Output.isIncomplete |= Candidates.dropped(); + for (auto &Cand : std::move(Candidates).items()) { + CodeCompletionString *SemaCCS = nullptr; + if (auto *R = Cand.first.SemaResult) + SemaCCS = Recorder->codeCompletionString(*R, Opts.IncludeBriefComments); + Output.items.push_back(Cand.first.build(Cand.second, Opts, SemaCCS)); + } + // We're done with the Sema results, so allow the AST to be cleaned up. + }); + + log(Ctx, + llvm::formatv("Code completion: {0} results from Sema, {1} from Index, " + "{2} matched, {3} returned{4}.", + NSema, NIndex, NBoth, Output.items.size(), + Output.isIncomplete ? " (incomplete)" : "")); + assert(!Opts.Limit || Output.items.size() <= Opts.Limit); + // Don't assert(!isIncomplete || !Limit && Output.items.size() == Opts.Limit) + // Indexes may choose to impose their own limits even if we don't have one. + return Output; } SignatureHelp signatureHelp(const Context &Ctx, PathRef FileName, @@ -686,10 +861,10 @@ Options.IncludeMacros = false; Options.IncludeCodePatterns = false; Options.IncludeBriefComments = true; - invokeCodeComplete(Ctx, - llvm::make_unique(Options, Result), - Options, FileName, Command, Preamble, Contents, Pos, - std::move(VFS), std::move(PCHs)); + semaCodeComplete(Ctx, + llvm::make_unique(Options, Result), + Options, FileName, Command, Preamble, Contents, Pos, + std::move(VFS), std::move(PCHs)); return Result; } Index: unittests/clangd/CodeCompleteTests.cpp =================================================================== --- unittests/clangd/CodeCompleteTests.cpp +++ unittests/clangd/CodeCompleteTests.cpp @@ -17,7 +17,6 @@ #include "SourceCode.h" #include "TestFS.h" #include "index/MemIndex.h" -#include "index/Merge.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -82,7 +81,7 @@ return arg.insertTextFormat == clangd::InsertTextFormat::Snippet && arg.insertText == Text; } -MATCHER(FilterContainsName, "") { +MATCHER(NameContainsFilter, "") { if (arg.filterText.empty()) return true; return llvm::StringRef(arg.insertText).contains(arg.filterText); @@ -129,14 +128,27 @@ .get() .second.Value; // Sanity-check that filterText is valid. - EXPECT_THAT(CompletionList.items, Each(FilterContainsName())); + EXPECT_THAT(CompletionList.items, Each(NameContainsFilter())); return CompletionList; } +std::string replace(StringRef Haystack, StringRef Needle, StringRef Repl) { + std::string Result; + raw_string_ostream OS(Result); + std::pair Split; + for (Split = Haystack.split(Needle); !Split.second.empty(); + Split = Split.first.split(Needle)) + OS << Split.first << Repl; + Result += Split.first; + OS.flush(); + return Result; +} + // Helpers to produce fake index symbols for memIndex() or completions(). -Symbol sym(StringRef QName, index::SymbolKind Kind) { +// USRFormat is a regex replacement string for the unqualified part of the USR. +Symbol sym(StringRef QName, index::SymbolKind Kind, StringRef USRFormat) { Symbol Sym; - Sym.ID = SymbolID(QName); + std::string USR = "c:"; // We synthesize a few simple cases of USRs by hand! size_t Pos = QName.rfind("::"); if (Pos == llvm::StringRef::npos) { Sym.Name = QName; @@ -144,15 +156,25 @@ } else { Sym.Name = QName.substr(Pos + 2); Sym.Scope = QName.substr(0, Pos); + USR += "@N@" + replace(Sym.Scope, "::", "@N@"); // ns:: -> @N@ns } + USR += Regex("^.*$").sub(USRFormat, Sym.Name); // e.g. func -> @F@func# + Sym.ID = SymbolID(USR); Sym.CompletionPlainInsertText = Sym.Name; + Sym.CompletionSnippetInsertText = Sym.Name; Sym.CompletionLabel = Sym.Name; Sym.SymInfo.Kind = Kind; return Sym; } -Symbol func(StringRef Name) { return sym(Name, index::SymbolKind::Function); } -Symbol cls(StringRef Name) { return sym(Name, index::SymbolKind::Class); } -Symbol var(StringRef Name) { return sym(Name, index::SymbolKind::Variable); } +Symbol func(StringRef Name) { // Assumes the function has no args. + return sym(Name, index::SymbolKind::Function, "@F@\\0#"); // no args +} +Symbol cls(StringRef Name) { + return sym(Name, index::SymbolKind::Class, "@S@\\0@S@\\0"); +} +Symbol var(StringRef Name) { + return sym(Name, index::SymbolKind::Variable, "@\\0"); +} TEST(CompletionTest, Limit) { clangd::CodeCompleteOptions Opts; @@ -226,7 +248,7 @@ ClassWithMembers().^ } )cpp", - /*IndexSymbols=*/{}, Opts); + {cls("IndexClass"), var("index_var"), func("index_func")}, Opts); // Class members. The only items that must be present in after-dot // completion. @@ -236,9 +258,11 @@ EXPECT_IFF(Opts.IncludeIneligibleResults, Results.items, Has("private_field")); // Global items. - EXPECT_THAT(Results.items, Not(AnyOf(Has("global_var"), Has("global_func"), - Has("global_func()"), Has("GlobalClass"), - Has("MACRO"), Has("LocalClass")))); + EXPECT_THAT( + Results.items, + Not(AnyOf(Has("global_var"), Has("index_var"), Has("global_func"), + Has("global_func()"), Has("index_func"), Has("GlobalClass"), + Has("IndexClass"), Has("MACRO"), Has("LocalClass")))); // There should be no code patterns (aka snippets) in after-dot // completion. At least there aren't any we're aware of. EXPECT_THAT(Results.items, Not(Contains(Kind(CompletionItemKind::Snippet)))); @@ -271,16 +295,17 @@ ^ } )cpp", - /*IndexSymbols=*/{}, Opts); + {cls("IndexClass"), var("index_var"), func("index_func")}, Opts); // Class members. Should never be present in global completions. EXPECT_THAT(Results.items, Not(AnyOf(Has("method"), Has("method()"), Has("field")))); // Global items. - EXPECT_IFF(Opts.IncludeGlobals, Results.items, - AllOf(Has("global_var"), - Has(Opts.EnableSnippets ? "global_func()" : "global_func"), - Has("GlobalClass"))); + EXPECT_THAT(Results.items, + AllOf(Has("global_var"), Has("index_var"), + Has(Opts.EnableSnippets ? "global_func()" : "global_func"), + Has("index_func" /* our fake symbol doesn't include () */), + Has("GlobalClass"), Has("IndexClass"))); // A macro. EXPECT_IFF(Opts.IncludeMacros, Results.items, Has("MACRO")); // Local items. Must be present always. @@ -300,7 +325,6 @@ // We used to test every combination of options, but that got too slow (2^N). auto Flags = { &clangd::CodeCompleteOptions::IncludeMacros, - &clangd::CodeCompleteOptions::IncludeGlobals, &clangd::CodeCompleteOptions::IncludeBriefComments, &clangd::CodeCompleteOptions::EnableSnippets, &clangd::CodeCompleteOptions::IncludeCodePatterns, @@ -400,83 +424,67 @@ } TEST(CompletionTest, Kinds) { - auto Results = completions(R"cpp( - #define MACRO X - int variable; - struct Struct {}; - int function(); - int X = ^ - )cpp"); - EXPECT_THAT(Results.items, Has("function", CompletionItemKind::Function)); - EXPECT_THAT(Results.items, Has("variable", CompletionItemKind::Variable)); - EXPECT_THAT(Results.items, Has("int", CompletionItemKind::Keyword)); - EXPECT_THAT(Results.items, Has("Struct", CompletionItemKind::Class)); - EXPECT_THAT(Results.items, Has("MACRO", CompletionItemKind::Text)); + auto Results = completions( + R"cpp( + #define MACRO X + int variable; + struct Struct {}; + int function(); + int X = ^ + )cpp", + {func("indexFunction"), var("indexVariable"), cls("indexClass")}); + EXPECT_THAT(Results.items, + AllOf(Has("function", CompletionItemKind::Function), + Has("variable", CompletionItemKind::Variable), + Has("int", CompletionItemKind::Keyword), + Has("Struct", CompletionItemKind::Class), + Has("MACRO", CompletionItemKind::Text), + Has("indexFunction", CompletionItemKind::Function), + Has("indexVariable", CompletionItemKind::Variable), + Has("indexClass", CompletionItemKind::Class))); Results = completions("nam^"); EXPECT_THAT(Results.items, Has("namespace", CompletionItemKind::Snippet)); } TEST(CompletionTest, NoDuplicates) { - auto Items = completions(R"cpp( -struct Adapter { - void method(); -}; + auto Results = completions( + R"cpp( + class Adapter { + void method(); + }; -void Adapter::method() { - Adapter^ -} - )cpp") - .items; + void Adapter::method() { + Adapter^ + } + )cpp", + {cls("Adapter")}); // Make sure there are no duplicate entries of 'Adapter'. - EXPECT_THAT(Items, ElementsAre(Named("Adapter"), Named("~Adapter"))); + EXPECT_THAT(Results.items, ElementsAre(Named("Adapter"), Named("~Adapter"))); } -TEST(CompletionTest, FuzzyRanking) { - auto Items = completions(R"cpp( - struct fake { int BigBang, Babble, Ball; }; - int main() { fake().bb^ }")cpp").items; - // BigBang is a better match than Babble. Ball doesn't match at all. - EXPECT_THAT(Items, ElementsAre(Named("BigBang"), Named("Babble"))); -} - -TEST(CompletionTest, NoIndex) { - auto Results = completions(R"cpp( - namespace ns { class Local {}; } - void f() { ns::^ } - )cpp"); - EXPECT_THAT(Results.items, Has("Local")); -} - -TEST(CompletionTest, StaticAndDynamicIndex) { - clangd::CodeCompleteOptions Opts; - auto StaticIdx = memIndex({cls("ns::XYZ")}); - auto DynamicIdx = memIndex({func("ns::foo")}); - auto Merge = mergeIndex(DynamicIdx.get(), StaticIdx.get()); - Opts.Index = Merge.get(); - +TEST(CompletionTest, ScopedNoIndex) { auto Results = completions( R"cpp( - void f() { ::ns::^ } - )cpp", - /*IndexSymbols=*/{}, Opts); - EXPECT_THAT(Results.items, Contains(Labeled("[I]XYZ"))); - EXPECT_THAT(Results.items, Contains(Labeled("[I]foo"))); + namespace fake { int BigBang, Babble, Ball; }; + int main() { fake::bb^ } + ")cpp"); + // BigBang is a better match than Babble. Ball doesn't match at all. + EXPECT_THAT(Results.items, ElementsAre(Named("BigBang"), Named("Babble"))); } -TEST(CompletionTest, IndexScope) { +TEST(CompletionTest, Scoped) { auto Results = completions( R"cpp( - namespace ns { int local; } - void f() { ns::^ } - )cpp", - {cls("ns::XYZ"), cls("nx::XYZ"), func("ns::foo")}); - EXPECT_THAT(Results.items, - UnorderedElementsAre(Named("XYZ"), Named("foo"), Named("local"))); + namespace fake { int Babble, Ball; }; + int main() { fake::bb^ } + ")cpp", + {var("fake::BigBang")}); + EXPECT_THAT(Results.items, ElementsAre(Named("BigBang"), Named("Babble"))); } -TEST(CompletionTest, IndexBasedWithFilter) { +TEST(CompletionTest, ScopedWithFilter) { auto Results = completions( R"cpp( void f() { ns::x^ } @@ -486,7 +494,7 @@ UnorderedElementsAre(AllOf(Named("XYZ"), Filter("XYZ")))); } -TEST(CompletionTest, IndexGlobalQualified) { +TEST(CompletionTest, GlobalQualified) { auto Results = completions( R"cpp( void f() { ::^ } @@ -496,13 +504,28 @@ Has("f", CompletionItemKind::Function))); } -TEST(CompletionTest, IndexFullyQualifiedScope) { +TEST(CompletionTest, FullyQualified) { auto Results = completions( R"cpp( + namespace ns { void bar(); } void f() { ::ns::^ } )cpp", {cls("ns::XYZ")}); - EXPECT_THAT(Results.items, Has("XYZ", CompletionItemKind::Class)); + EXPECT_THAT(Results.items, AllOf(Has("XYZ", CompletionItemKind::Class), + Has("bar", CompletionItemKind::Function))); +} + +TEST(CompletionTest, SemaIndexMerge) { + auto Results = completions( + R"cpp( + namespace ns { int local; void both(); } + void f() { ::ns::^ } + )cpp", + {func("ns::both"), cls("ns::Index")}); + // We get results from both index and sema, with no duplicates. + EXPECT_THAT( + Results.items, + UnorderedElementsAre(Named("local"), Named("Index"), Named("both"))); } TEST(CompletionTest, IndexSuppressesPreambleCompletions) {