Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -25,6 +25,7 @@ index/FileIndex.cpp index/Index.cpp index/MemIndex.cpp + index/Merge.cpp index/SymbolCollector.cpp index/SymbolYAML.cpp Index: clangd/ClangdServer.h =================================================================== --- clangd/ClangdServer.h +++ clangd/ClangdServer.h @@ -340,13 +340,16 @@ DiagnosticsConsumer &DiagConsumer; FileSystemProvider &FSProvider; DraftStore DraftMgr; - /// If set, this manages index for symbols in opened files. + // The index used to look up symbols. This could be: + // - null (all index functionality is optional) + // - the dynamic index owned by ClangdServer (FileIdx) + // - the static index passed to the constructor + // - a merged view of a static and dynamic index (MergedIndex) + SymbolIndex *Index; + // If present, an up-to-date of symbols in open files. Read via Index. std::unique_ptr FileIdx; - /// If set, this provides static index for project-wide global symbols. - /// clangd global code completion result will come from the static index and - /// the `FileIdx` above. - /// No owned, the life time is managed by clangd embedders. - SymbolIndex *StaticIdx; + // If present, a merged view of FileIdx and an external index. Read via Index. + std::unique_ptr MergedIndex; CppFileCollection Units; std::string ResourceDir; // If set, this represents the workspace path. Index: clangd/ClangdServer.cpp =================================================================== --- clangd/ClangdServer.cpp +++ clangd/ClangdServer.cpp @@ -11,6 +11,7 @@ #include "CodeComplete.h" #include "SourceCode.h" #include "XRefs.h" +#include "index/Merge.h" #include "clang/Format/Format.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" @@ -138,7 +139,6 @@ llvm::Optional ResourceDir) : CDB(CDB), DiagConsumer(DiagConsumer), FSProvider(FSProvider), FileIdx(BuildDynamicSymbolIndex ? new FileIndex() : nullptr), - StaticIdx(StaticIdx), // Pass a callback into `Units` to extract symbols from a newly parsed // file and rebuild the file index synchronously each time an AST is // parsed. @@ -151,7 +151,17 @@ ResourceDir(ResourceDir ? ResourceDir->str() : getStandardResourceDir()), PCHs(std::make_shared()), StorePreamblesInMemory(StorePreamblesInMemory), - WorkScheduler(AsyncThreadsCount) {} + WorkScheduler(AsyncThreadsCount) { + if (FileIdx && StaticIdx) { + MergedIndex = mergeIndex(FileIdx.get(), StaticIdx); + Index = MergedIndex.get(); + } else if (FileIdx) + Index = FileIdx.get(); + else if (StaticIdx) + Index = StaticIdx; + else + Index = nullptr; +} void ClangdServer::setRootPath(PathRef RootPath) { std::string NewRootPath = llvm::sys::path::convert_to_slash( @@ -250,10 +260,8 @@ Resources->getPossiblyStalePreamble(); // Copy completion options for passing them to async task handler. auto CodeCompleteOpts = Opts; - if (FileIdx) - CodeCompleteOpts.Index = FileIdx.get(); - if (StaticIdx) - CodeCompleteOpts.StaticIndex = StaticIdx; + if (!CodeCompleteOpts.Index) // Respect overridden index. + CodeCompleteOpts.Index = Index; // Copy File, as it is a PathRef that will go out of scope before Task is // executed. Index: clangd/CodeComplete.h =================================================================== --- clangd/CodeComplete.h +++ clangd/CodeComplete.h @@ -67,10 +67,6 @@ /// FIXME(ioeric): we might want a better way to pass the index around inside /// clangd. const SymbolIndex *Index = nullptr; - - // Populated internally by clangd, do not set. - /// Static index for project-wide global symbols. - const SymbolIndex *StaticIndex = nullptr; }; /// Get code completions at a specified \p Pos in \p FileName. Index: clangd/CodeComplete.cpp =================================================================== --- clangd/CodeComplete.cpp +++ clangd/CodeComplete.cpp @@ -667,17 +667,10 @@ // Got scope specifier (ns::f^) for code completion from sema, try to query // global symbols from indexes. - if (CompletedName.SSInfo) { - // FIXME: figure out a good algorithm to merge symbols from different - // sources (dynamic index, static index, AST symbols from clang's completion - // engine). - if (Opts.Index) - completeWithIndex(Ctx, *Opts.Index, Contents, *CompletedName.SSInfo, - CompletedName.Filter, &Results, /*DebuggingLabel=*/"D"); - if (Opts.StaticIndex) - completeWithIndex(Ctx, *Opts.StaticIndex, Contents, *CompletedName.SSInfo, - CompletedName.Filter, &Results, /*DebuggingLabel=*/"S"); - } + // FIXME: merge with Sema results, and respect limits. + if (CompletedName.SSInfo && Opts.Index) + completeWithIndex(Ctx, *Opts.Index, Contents, *CompletedName.SSInfo, + CompletedName.Filter, &Results, /*DebuggingLabel=*/"I"); return Results; } Index: clangd/index/Index.h =================================================================== --- clangd/index/Index.h +++ clangd/index/Index.h @@ -106,8 +106,9 @@ // WARNING: Symbols do not own much of their underlying data - typically strings // are owned by a SymbolSlab. They should be treated as non-owning references. // Copies are shallow. -// When adding new unowned data fields to Symbol, remember to update -// SymbolSlab::Builder in Index.cpp to copy them to the slab's storage. +// When adding new unowned data fields to Symbol, remember to update: +// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage. +// - mergeSymbol in Merge.cpp, to properly combine two Symbols. struct Symbol { // The ID of the symbol. SymbolID ID; @@ -155,7 +156,7 @@ }; // Optional details of the symbol. - Details *Detail = nullptr; + Details *Detail = nullptr; // FIXME: should be const // FIXME: add definition location of the symbol. // FIXME: add all occurrences support. @@ -227,7 +228,8 @@ /// A scope must be fully qualified without leading or trailing "::" e.g. /// "n1::n2". "" is interpreted as the global namespace, and "::" is invalid. std::vector Scopes; - /// \brief The maxinum number of candidates to return. + /// \brief The number of top candidates to return. The index may choose to + /// return more than this, e.g. if it doesn't know which candidates are best. size_t MaxCandidateCount = UINT_MAX; }; @@ -239,6 +241,7 @@ /// \brief Matches symbols in the index fuzzily and applies \p Callback on /// each matched symbol before returning. + /// If returned Symbols are used outside Callback, they must be deep-copied! /// /// Returns true if the result list is complete, false if it was truncated due /// to MaxCandidateCount Index: clangd/index/Merge.h =================================================================== --- /dev/null +++ clangd/index/Merge.h @@ -0,0 +1,29 @@ +//===--- Merge.h ------------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MERGE_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MERGE_H +#include "Index.h" +namespace clang { +namespace clangd { + +// Merge symbols L and R, preferring data from L in case of conflict. +// The two symbols must have the same ID. +// Returned symbol may contain data owned by either source. +Symbol mergeSymbol(const Symbol &L, const Symbol &R, Symbol::Details *Scratch); + +// mergedIndex returns a composite index based on two provided Indexes: +// - the Dynamic index covers few files, but is relatively up-to-date. +// - the Static index covers a bigger set of files, but is relatively stale. +// The returned index attempts to combine results, and avoid duplicates. +std::unique_ptr mergeIndex(const SymbolIndex *Dynamic, + const SymbolIndex *Static); + +} // namespace clangd +} // namespace clang +#endif Index: clangd/index/Merge.cpp =================================================================== --- /dev/null +++ clangd/index/Merge.cpp @@ -0,0 +1,98 @@ +//===--- Merge.h ------------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +#include "Merge.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +namespace clang { +namespace clangd { +namespace { +using namespace llvm; + +class MergedIndex : public SymbolIndex { + public: + MergedIndex(const SymbolIndex *Dynamic, const SymbolIndex *Static) + : Dynamic(Dynamic), Static(Static) {} + + // FIXME: Deleted symbols in dirty files are still returned (from Static). + // To identify these eliminate these, we should: + // - find the generating file from each Symbol which is Static-only + // - ask Dynamic if it has that file (needs new SymbolIndex method) + // - if so, drop the Symbol. + bool fuzzyFind(const Context &Ctx, const FuzzyFindRequest &Req, + function_ref Callback) const override { + // We can't step through both sources in parallel. So: + // 1) query all dynamic symbols, slurping results into a slab + // 2) query the static symbols, for each one: + // a) if it's not in the dynamic slab, yield it directly + // b) if it's in the dynamic slab, merge it and yield the result + // 3) now yield all the dynamic symbols we haven't processed. + bool More = false; // We'll be incomplete if either source was. + SymbolSlab::Builder DynB; + More |= + Dynamic->fuzzyFind(Ctx, Req, [&](const Symbol &S) { DynB.insert(S); }); + SymbolSlab Dyn = std::move(DynB).build(); + + DenseSet SeenDynamicSymbols; + Symbol::Details Scratch; + More |= Static->fuzzyFind(Ctx, Req, [&](const Symbol &S) { + auto DynS = Dyn.find(S.ID); + if (DynS == Dyn.end()) + return Callback(S); + SeenDynamicSymbols.insert(S.ID); + Callback(mergeSymbol(*DynS, S, &Scratch)); + }); + for (const Symbol &S : Dyn) + if (!SeenDynamicSymbols.count(S.ID)) + Callback(S); + return More; + } + +private: + const SymbolIndex *Dynamic, *Static; +}; +} + +Symbol +mergeSymbol(const Symbol &L, const Symbol &R, Symbol::Details *Scratch) { + assert(L.ID == R.ID); + Symbol S = L; + // For each optional field, fill it from R if missing in L. + // (It might be missing in R too, but that's a no-op). + if (S.CanonicalDeclaration.FilePath == "") + S.CanonicalDeclaration = R.CanonicalDeclaration; + if (S.CompletionLabel == "") + S.CompletionLabel = R.CompletionLabel; + if (S.CompletionFilterText == "") + S.CompletionFilterText = R.CompletionFilterText; + if (S.CompletionPlainInsertText == "") + S.CompletionPlainInsertText = R.CompletionPlainInsertText; + if (S.CompletionSnippetInsertText == "") + S.CompletionSnippetInsertText = R.CompletionSnippetInsertText; + + if (L.Detail && R.Detail) { + // Copy into scratch space so we can merge. + *Scratch = *L.Detail; + if (Scratch->Documentation == "") + Scratch->Documentation = R.Detail->Documentation; + if (Scratch->CompletionDetail == "") + Scratch->CompletionDetail = R.Detail->CompletionDetail; + S.Detail = Scratch; + } else if (L.Detail) + S.Detail = L.Detail; + else if (R.Detail) + S.Detail = R.Detail; + return S; +} + +std::unique_ptr mergeIndex(const SymbolIndex *Dynamic, + const SymbolIndex *Static) { + return llvm::make_unique(Dynamic, Static); +} +} // namespace clangd +} // namespace clang Index: unittests/clangd/CodeCompleteTests.cpp =================================================================== --- unittests/clangd/CodeCompleteTests.cpp +++ unittests/clangd/CodeCompleteTests.cpp @@ -17,6 +17,7 @@ #include "SourceCode.h" #include "TestFS.h" #include "index/MemIndex.h" +#include "index/Merge.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -518,17 +519,17 @@ clangd::CodeCompleteOptions Opts; auto StaticIdx = simpleIndexFromSymbols({{"ns::XYZ", index::SymbolKind::Class}}); - Opts.StaticIndex = StaticIdx.get(); auto DynamicIdx = simpleIndexFromSymbols({{"ns::foo", index::SymbolKind::Function}}); - Opts.Index = DynamicIdx.get(); + auto Merge = mergeIndex(DynamicIdx.get(), StaticIdx.get()); + Opts.Index = Merge.get(); auto Results = completions(R"cpp( void f() { ::ns::^ } )cpp", Opts); - EXPECT_THAT(Results.items, Contains(Labeled("[S]XYZ"))); - EXPECT_THAT(Results.items, Contains(Labeled("[D]foo"))); + EXPECT_THAT(Results.items, Contains(Labeled("[I]XYZ"))); + EXPECT_THAT(Results.items, Contains(Labeled("[I]foo"))); } TEST(CompletionTest, SimpleIndexBased) { Index: unittests/clangd/IndexTests.cpp =================================================================== --- unittests/clangd/IndexTests.cpp +++ unittests/clangd/IndexTests.cpp @@ -9,6 +9,7 @@ #include "index/Index.h" #include "index/MemIndex.h" +#include "index/Merge.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -207,6 +208,42 @@ EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc")); } +TEST(MergeTest, MergeIndex) { + MemIndex I, J; + I.build(generateSymbols({"ns::A", "ns::B"})); + J.build(generateSymbols({"ns::B", "ns::C"})); + FuzzyFindRequest Req; + Req.Scopes = {"ns"}; + EXPECT_THAT(match(*mergeIndex(&I, &J), Req), + UnorderedElementsAre("ns::A", "ns::B", "ns::C")); +} + +TEST(MergeTest, Merge) { + Symbol L, R; + L.ID = R.ID = SymbolID("hello"); + L.Name = R.Name = "Foo"; // same in both + L.CanonicalDeclaration.FilePath = "left.h"; // differs + R.CanonicalDeclaration.FilePath = "right.h"; + L.CompletionPlainInsertText = "f00"; // present in left only + R.CompletionSnippetInsertText = "f0{$1:0}"; // present in right only + Symbol::Details DetL, DetR; + DetL.CompletionDetail = "DetL"; + DetR.CompletionDetail = "DetR"; + DetR.Documentation = "--doc--"; + L.Detail = &DetL; + R.Detail = &DetR; + + Symbol::Details Scratch; + Symbol M = mergeSymbol(L, R, &Scratch); + EXPECT_EQ(M.Name, "Foo"); + EXPECT_EQ(M.CanonicalDeclaration.FilePath, "left.h"); + EXPECT_EQ(M.CompletionPlainInsertText, "f00"); + EXPECT_EQ(M.CompletionSnippetInsertText, "f0{$1:0}"); + ASSERT_TRUE(M.Detail); + EXPECT_EQ(M.Detail->CompletionDetail, "DetL"); + EXPECT_EQ(M.Detail->Documentation, "--doc--"); +} + } // namespace } // namespace clangd } // namespace clang