Index: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp =================================================================== --- clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp +++ clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp @@ -14,9 +14,11 @@ //===---------------------------------------------------------------------===// #include "index/Index.h" +#include "index/Merge.h" #include "index/SymbolCollector.h" #include "index/SymbolYAML.h" #include "clang/Frontend/FrontendActions.h" +#include "clang/Frontend/CompilerInstance.h" #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexingAction.h" #include "clang/Tooling/CommonOptionsParser.h" @@ -34,6 +36,7 @@ namespace clang { namespace clangd { +namespace { static llvm::cl::opt AssumedHeaderDir( "assume-header-dir", @@ -60,6 +63,12 @@ index::createIndexingAction(C, Opts, nullptr)), Ctx(Ctx), Collector(C) {} + // XXX this is just to make running the tool fast during dev! + bool BeginInvocation(CompilerInstance &CI) override { + const auto &Inputs = CI.getInvocation().getFrontendOpts().Inputs; + return !Inputs.empty() && Inputs[0].getFile().contains("clangd/index"); + } + void EndSourceFileAction() override { WrapperFrontendAction::EndSourceFileAction(); @@ -91,6 +100,25 @@ tooling::ExecutionContext *Ctx; }; +// Combine occurrences of the same symbol across translation units. +SymbolSlab mergeSymbols(tooling::ToolResults *Results) { + SymbolSlab::Builder UniqueSymbols; + llvm::BumpPtrAllocator Arena; + Symbol::Details Scratch; + Results->forEachResult([&](llvm::StringRef Key, llvm::StringRef Value) { + Arena.Reset(); + auto Sym = clang::clangd::SymbolFromYAML(Value, Arena); + clang::clangd::SymbolID ID; + Key >> ID; + if (const auto *Existing = UniqueSymbols.find(ID)) + UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch)); + else + UniqueSymbols.insert(Sym); + }); + return std::move(UniqueSymbols).build(); +} + +} // namespace } // namespace clangd } // namespace clang @@ -115,6 +143,7 @@ return 1; } + // Map phase: emit symbols found in each translation unit. auto Err = Executor->get()->execute( llvm::make_unique( Executor->get()->getExecutionContext())); @@ -122,22 +151,12 @@ llvm::errs() << llvm::toString(std::move(Err)) << "\n"; } - // Deduplicate the result by key and keep the longest value. - // FIXME(ioeric): Merge occurrences, rather than just dropping all but one. - // Definitions and forward declarations have the same key and may both have - // information. Usage count will need to be aggregated across occurrences, - // too. - llvm::StringMap UniqueSymbols; - Executor->get()->getToolResults()->forEachResult( - [&UniqueSymbols](llvm::StringRef Key, llvm::StringRef Value) { - auto Ret = UniqueSymbols.try_emplace(Key, Value); - if (!Ret.second) { - // If key already exists, keep the longest value. - llvm::StringRef &V = Ret.first->second; - V = V.size() < Value.size() ? Value : V; - } - }); + // Reduce phase: combine symbols using the ID as a key. + auto UniqueSymbols = + clang::clangd::mergeSymbols(Executor->get()->getToolResults()); + + // Output phase: emit YAML for result symbols. for (const auto &Sym : UniqueSymbols) - llvm::outs() << Sym.second; + llvm::outs() << SymbolToYAML(Sym); return 0; } Index: clangd/index/Index.h =================================================================== --- clangd/index/Index.h +++ clangd/index/Index.h @@ -27,11 +27,14 @@ llvm::StringRef FilePath; // The 0-based offset to the first character of the symbol from the beginning // of the source file. - unsigned StartOffset; + unsigned StartOffset = 0; // The 0-based offset to the last character of the symbol from the beginning // of the source file. - unsigned EndOffset; + unsigned EndOffset = 0; + + operator bool() const { return !FilePath.empty(); } }; +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &); // The class identifies a particular C++ symbol (class, function, method, etc). // @@ -44,7 +47,7 @@ class SymbolID { public: SymbolID() = default; - SymbolID(llvm::StringRef USR); + explicit SymbolID(llvm::StringRef USR); bool operator==(const SymbolID &Sym) const { return HashValue == Sym.HashValue; @@ -125,6 +128,7 @@ // * For non-inline functions, the canonical declaration is a declaration // (not a definition), which is usually declared in ".h" file. SymbolLocation CanonicalDeclaration; + SymbolLocation Definition; /// A brief description of the symbol that can be displayed in the completion /// candidate list. For example, "Foo(X x, Y y) const" is a labal for a @@ -160,6 +164,7 @@ // FIXME: add all occurrences support. // FIXME: add extra fields for index scoring signals. }; +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S); // An immutable symbol container that stores a set of symbols. // The container will maintain the lifetime of the symbols. Index: clangd/index/Index.cpp =================================================================== --- clangd/index/Index.cpp +++ clangd/index/Index.cpp @@ -10,11 +10,18 @@ #include "Index.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/raw_ostream.h" namespace clang { namespace clangd { using namespace llvm; +raw_ostream &operator<<(raw_ostream &OS, const SymbolLocation &L) { + if (!L) + return OS << "(none)"; + return OS << L.FilePath << "[" << L.StartOffset << "-" << L.EndOffset << "]"; +} + SymbolID::SymbolID(StringRef USR) : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {} @@ -29,6 +36,10 @@ std::copy(HexString.begin(), HexString.end(), ID.HashValue.begin()); } +raw_ostream &operator<<(raw_ostream &OS, const Symbol &S) { + return OS << S.Scope << S.Name; +} + SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const { auto It = std::lower_bound(Symbols.begin(), Symbols.end(), ID, [](const Symbol &S, const SymbolID &I) { @@ -55,6 +66,7 @@ Intern(S.Name); Intern(S.Scope); Intern(S.CanonicalDeclaration.FilePath); + Intern(S.Definition.FilePath); Intern(S.CompletionLabel); Intern(S.CompletionFilterText); Index: clangd/index/Merge.cpp =================================================================== --- clangd/index/Merge.cpp +++ clangd/index/Merge.cpp @@ -63,7 +63,11 @@ Symbol S = L; // For each optional field, fill it from R if missing in L. // (It might be missing in R too, but that's a no-op). - if (S.CanonicalDeclaration.FilePath == "") + if (!S.Definition) + S.Definition = R.Definition; + // Prefer the canonical declaration from TUs that saw the definition. + // Classes: this is the def itself. Functions: hopefully the header decl. + if (!S.CanonicalDeclaration || R.Definition) S.CanonicalDeclaration = R.CanonicalDeclaration; if (S.CompletionLabel == "") S.CompletionLabel = R.CompletionLabel; Index: clangd/index/SymbolCollector.h =================================================================== --- clangd/index/SymbolCollector.h +++ clangd/index/SymbolCollector.h @@ -53,6 +53,9 @@ SymbolSlab takeSymbols() { return std::move(Symbols).build(); } private: + const Symbol *addDeclaration(const NamedDecl &, SymbolID); + void addDefinition(const NamedDecl &, const Symbol &DeclSymbol); + // All Symbols collected from the AST. SymbolSlab::Builder Symbols; ASTContext *ASTCtx; Index: clangd/index/SymbolCollector.cpp =================================================================== --- clangd/index/SymbolCollector.cpp +++ clangd/index/SymbolCollector.cpp @@ -117,30 +117,34 @@ // For symbols defined inside macros: // * use expansion location, if the symbol is formed via macro concatenation. // * use spelling location, otherwise. -SymbolLocation GetSymbolLocation(const NamedDecl *D, SourceManager &SM, +SymbolLocation getSymbolLocation(const NamedDecl &D, SourceManager &SM, StringRef FallbackDir, std::string &FilePathStorage) { - SymbolLocation Location; - - SourceLocation Loc = SM.getSpellingLoc(D->getLocation()); - if (D->getLocation().isMacroID()) { + SourceLocation Loc = SM.getSpellingLoc(D.getLocation()); + if (D.getLocation().isMacroID()) { // The symbol is formed via macro concatenation, the spelling location will // be "", which is not interesting to us, use the expansion // location instead. if (llvm::StringRef(Loc.printToString(SM)).startswith("getLocation())), + SM, SM.getFilename(SM.getExpansionLoc(D.getLocation())), FallbackDir); - return {FilePathStorage, - SM.getFileOffset(SM.getExpansionRange(D->getLocStart()).first), - SM.getFileOffset(SM.getExpansionRange(D->getLocEnd()).second)}; + SymbolLocation Result; + Result.FilePath = FilePathStorage; + Result.StartOffset = + SM.getFileOffset(SM.getExpansionRange(D.getLocStart()).first); + Result.EndOffset = + SM.getFileOffset(SM.getExpansionRange(D.getLocEnd()).second); + return Result; } } FilePathStorage = makeAbsolutePath(SM, SM.getFilename(Loc), FallbackDir); - return {FilePathStorage, - SM.getFileOffset(SM.getSpellingLoc(D->getLocStart())), - SM.getFileOffset(SM.getSpellingLoc(D->getLocEnd()))}; + SymbolLocation Result; + Result.FilePath = FilePathStorage; + Result.StartOffset = SM.getFileOffset(SM.getSpellingLoc(D.getLocStart())); + Result.EndOffset = SM.getFileOffset(SM.getSpellingLoc(D.getLocEnd())); + return Result; } } // namespace @@ -176,62 +180,85 @@ return true; auto ID = SymbolID(USR); - if (Symbols.find(ID) != nullptr) - return true; - - auto &SM = ND->getASTContext().getSourceManager(); - - std::string QName; - llvm::raw_string_ostream OS(QName); - PrintingPolicy Policy(ASTCtx->getLangOpts()); - // Note that inline namespaces are treated as transparent scopes. This - // reflects the way they're most commonly used for lookup. Ideally we'd - // include them, but at query time it's hard to find all the inline - // namespaces to query: the preamble doesn't have a dedicated list. - Policy.SuppressUnwrittenScope = true; - ND->printQualifiedName(OS, Policy); - OS.flush(); - - Symbol S; - S.ID = std::move(ID); - std::tie(S.Scope, S.Name) = splitQualifiedName(QName); - S.SymInfo = index::getSymbolInfo(D); - std::string FilePath; - S.CanonicalDeclaration = GetSymbolLocation(ND, SM, Opts.FallbackDir, FilePath); - - // Add completion info. - assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set."); - CodeCompletionResult SymbolCompletion(ND, 0); - const auto *CCS = SymbolCompletion.CreateCodeCompletionString( - *ASTCtx, *PP, CodeCompletionContext::CCC_Name, *CompletionAllocator, - *CompletionTUInfo, - /*IncludeBriefComments*/ true); - std::string Label; - std::string SnippetInsertText; - std::string IgnoredLabel; - std::string PlainInsertText; - getLabelAndInsertText(*CCS, &Label, &SnippetInsertText, - /*EnableSnippets=*/true); - getLabelAndInsertText(*CCS, &IgnoredLabel, &PlainInsertText, - /*EnableSnippets=*/false); - std::string FilterText = getFilterText(*CCS); - std::string Documentation = getDocumentation(*CCS); - std::string CompletionDetail = getDetail(*CCS); - - S.CompletionFilterText = FilterText; - S.CompletionLabel = Label; - S.CompletionPlainInsertText = PlainInsertText; - S.CompletionSnippetInsertText = SnippetInsertText; - Symbol::Details Detail; - Detail.Documentation = Documentation; - Detail.CompletionDetail = CompletionDetail; - S.Detail = &Detail; - - Symbols.insert(S); + const Symbol* BasicSymbol = Symbols.find(ID); + if (!BasicSymbol) // Regardless of role, ND is the canonical declaration. + BasicSymbol = addDeclaration(*ND, std::move(ID)); + if (Roles & static_cast(index::SymbolRole::Definition)) + addDefinition(*cast(ASTNode.OrigD), *BasicSymbol); } - return true; } +const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, + SymbolID ID) { + auto &SM = ND.getASTContext().getSourceManager(); + + std::string QName; + llvm::raw_string_ostream OS(QName); + PrintingPolicy Policy(ASTCtx->getLangOpts()); + // Note that inline namespaces are treated as transparent scopes. This + // reflects the way they're most commonly used for lookup. Ideally we'd + // include them, but at query time it's hard to find all the inline + // namespaces to query: the preamble doesn't have a dedicated list. + Policy.SuppressUnwrittenScope = true; + ND.printQualifiedName(OS, Policy); + OS.flush(); + + Symbol S; + S.ID = std::move(ID); + std::tie(S.Scope, S.Name) = splitQualifiedName(QName); + S.SymInfo = index::getSymbolInfo(&ND); + std::string FilePath; + // FIXME: we may want a different "canonical" heuristic than clang chooses. + S.CanonicalDeclaration = + getSymbolLocation(ND, SM, Opts.FallbackDir, FilePath); + + // Add completion info. + // FIXME: we may want to choose a different redecl, or combine from several. + assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set."); + CodeCompletionResult SymbolCompletion(&ND, 0); + const auto *CCS = SymbolCompletion.CreateCodeCompletionString( + *ASTCtx, *PP, CodeCompletionContext::CCC_Name, *CompletionAllocator, + *CompletionTUInfo, + /*IncludeBriefComments*/ true); + std::string Label; + std::string SnippetInsertText; + std::string IgnoredLabel; + std::string PlainInsertText; + getLabelAndInsertText(*CCS, &Label, &SnippetInsertText, + /*EnableSnippets=*/true); + getLabelAndInsertText(*CCS, &IgnoredLabel, &PlainInsertText, + /*EnableSnippets=*/false); + std::string FilterText = getFilterText(*CCS); + std::string Documentation = getDocumentation(*CCS); + std::string CompletionDetail = getDetail(*CCS); + + S.CompletionFilterText = FilterText; + S.CompletionLabel = Label; + S.CompletionPlainInsertText = PlainInsertText; + S.CompletionSnippetInsertText = SnippetInsertText; + Symbol::Details Detail; + Detail.Documentation = Documentation; + Detail.CompletionDetail = CompletionDetail; + S.Detail = &Detail; + + Symbols.insert(S); + return Symbols.find(S.ID); +} + +void SymbolCollector::addDefinition(const NamedDecl &ND, + const Symbol &DeclSym) { + if (DeclSym.Definition) + return; + // If we saw some forward declaration, we end up copying the symbol. + // This is not ideal, but avoids duplicating the "is this a definition" check + // in clang::index. We should only see one definition. + Symbol S = DeclSym; + std::string FilePath; + S.Definition = getSymbolLocation(ND, ND.getASTContext().getSourceManager(), + Opts.FallbackDir, FilePath); + Symbols.insert(S); +} + } // namespace clangd } // namespace clang Index: clangd/index/SymbolYAML.h =================================================================== --- clangd/index/SymbolYAML.h +++ clangd/index/SymbolYAML.h @@ -25,7 +25,11 @@ namespace clangd { // Read symbols from a YAML-format string. -SymbolSlab SymbolFromYAML(llvm::StringRef YAMLContent); +SymbolSlab SymbolsFromYAML(llvm::StringRef YAMLContent); + +// Read one symbol from a YAML-format string, backed by the arena. +Symbol SymbolFromYAML(llvm::StringRef YAMLContent, + llvm::BumpPtrAllocator &Arena); // Convert a single symbol to YAML-format string. // The YAML result is safe to concatenate. Index: clangd/index/SymbolYAML.cpp =================================================================== --- clangd/index/SymbolYAML.cpp +++ clangd/index/SymbolYAML.cpp @@ -97,7 +97,9 @@ IO.mapRequired("Name", Sym.Name); IO.mapRequired("Scope", Sym.Scope); IO.mapRequired("SymInfo", Sym.SymInfo); - IO.mapRequired("CanonicalDeclaration", Sym.CanonicalDeclaration); + IO.mapOptional("CanonicalDeclaration", Sym.CanonicalDeclaration, + SymbolLocation()); + IO.mapOptional("Definition", Sym.Definition, SymbolLocation()); IO.mapRequired("CompletionLabel", Sym.CompletionLabel); IO.mapRequired("CompletionFilterText", Sym.CompletionFilterText); IO.mapRequired("CompletionPlainInsertText", Sym.CompletionPlainInsertText); @@ -160,7 +162,7 @@ namespace clang { namespace clangd { -SymbolSlab SymbolFromYAML(llvm::StringRef YAMLContent) { +SymbolSlab SymbolsFromYAML(llvm::StringRef YAMLContent) { // Store data of pointer fields (excl. `StringRef`) like `Detail`. llvm::BumpPtrAllocator Arena; llvm::yaml::Input Yin(YAMLContent, &Arena); @@ -173,6 +175,14 @@ return std::move(Syms).build(); } +Symbol SymbolFromYAML(llvm::StringRef YAMLContent, + llvm::BumpPtrAllocator &Arena) { + llvm::yaml::Input Yin(YAMLContent, &Arena); + Symbol S; + Yin >> S; + return S; +} + std::string SymbolsToYAML(const SymbolSlab& Symbols) { std::string Str; llvm::raw_string_ostream OS(Str); Index: clangd/tool/ClangdMain.cpp =================================================================== --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -37,7 +37,7 @@ llvm::errs() << "Can't open " << YamlSymbolFile << "\n"; return nullptr; } - auto Slab = SymbolFromYAML(Buffer.get()->getBuffer()); + auto Slab = SymbolsFromYAML(Buffer.get()->getBuffer()); SymbolSlab::Builder SymsBuilder; for (auto Sym : Slab) SymsBuilder.insert(Sym); Index: unittests/clangd/SymbolCollectorTests.cpp =================================================================== --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -439,18 +439,18 @@ ... )"; - auto Symbols1 = SymbolFromYAML(YAML1); + auto Symbols1 = SymbolsFromYAML(YAML1); EXPECT_THAT(Symbols1, UnorderedElementsAre( AllOf(QName("clang::Foo1"), Labeled("Foo1-label"), Doc("Foo doc"), Detail("int")))); - auto Symbols2 = SymbolFromYAML(YAML2); + auto Symbols2 = SymbolsFromYAML(YAML2); EXPECT_THAT(Symbols2, UnorderedElementsAre(AllOf(QName("clang::Foo2"), Labeled("Foo2-label"), Not(HasDetail())))); std::string ConcatenatedYAML = SymbolsToYAML(Symbols1) + SymbolsToYAML(Symbols2); - auto ConcatenatedSymbols = SymbolFromYAML(ConcatenatedYAML); + auto ConcatenatedSymbols = SymbolsFromYAML(ConcatenatedYAML); EXPECT_THAT(ConcatenatedSymbols, UnorderedElementsAre(QName("clang::Foo1"), QName("clang::Foo2")));