Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -40,6 +40,7 @@ index/CanonicalIncludes.cpp index/FileIndex.cpp index/Index.cpp + index/IndexAction.cpp index/MemIndex.cpp index/Merge.cpp index/Serialization.cpp Index: clangd/index/IndexAction.h =================================================================== --- /dev/null +++ clangd/index/IndexAction.h @@ -0,0 +1,32 @@ +//===--- IndexAction.h - Run the indexer as a frontend action ----*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_ACTION_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_ACTION_H +#include "SymbolCollector.h" +#include "clang/Frontend/FrontendActions.h" + +namespace clang { +namespace clangd { + +// Creates an action that indexes translation units and delivers the results +// for SymbolsCallback (each slab corresponds to one TU). +// +// Only a subset of SymbolCollector::Options are respected: +// - include paths are always collected, and canonicalized appropriately +// - references are always counted +// - the symbol origin is always Static +std::unique_ptr +createStaticIndexingAction(SymbolCollector::Options Opts, + std::function SymbolsCallback); + +} // namespace clangd +} // namespace clang + +#endif Index: clangd/index/IndexAction.cpp =================================================================== --- /dev/null +++ clangd/index/IndexAction.cpp @@ -0,0 +1,73 @@ +#include "IndexAction.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Index/IndexDataConsumer.h" +#include "clang/Index/IndexingAction.h" +#include "clang/Tooling/Tooling.h" +namespace clang { +namespace clangd { +namespace { + +// Wraps the index action and reports index data after each translation unit. +class IndexAction : public WrapperFrontendAction { +public: + IndexAction(std::shared_ptr C, + std::unique_ptr Includes, + const index::IndexingOptions &Opts, + std::function &SymbolsCallback) + : WrapperFrontendAction(index::createIndexingAction(C, Opts, nullptr)), + SymbolsCallback(SymbolsCallback), Collector(C), + Includes(std::move(Includes)), + PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {} + + std::unique_ptr CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) override { + CI.getPreprocessor().addCommentHandler(PragmaHandler.get()); + return WrapperFrontendAction::CreateASTConsumer(CI, InFile); + } + + bool BeginInvocation(CompilerInstance &CI) override { + // We want all comments, not just the doxygen ones. + CI.getLangOpts().CommentOpts.ParseAllComments = true; + return WrapperFrontendAction::BeginInvocation(CI); + } + + void EndSourceFileAction() override { + WrapperFrontendAction::EndSourceFileAction(); + + const auto &CI = getCompilerInstance(); + if (CI.hasDiagnostics() && + CI.getDiagnostics().hasUncompilableErrorOccurred()) { + llvm::errs() << "Skipping TU due to uncompilable errors\n"; + return; + } + SymbolsCallback(Collector->takeSymbols()); + } + +private: + std::function SymbolsCallback; + std::shared_ptr Collector; + std::unique_ptr Includes; + std::unique_ptr PragmaHandler; +}; + +} // namespace + +std::unique_ptr +createStaticIndexingAction(SymbolCollector::Options Opts, + std::function SymbolsCallback) { + index::IndexingOptions IndexOpts; + IndexOpts.SystemSymbolFilter = + index::IndexingOptions::SystemSymbolFilterKind::All; + Opts.CollectIncludePath = true; + Opts.CountReferences = true; + Opts.Origin = SymbolOrigin::Static; + auto Includes = llvm::make_unique(); + addSystemHeadersMapping(Includes.get()); + Opts.Includes = Includes.get(); + return llvm::make_unique( + std::make_shared(std::move(Opts)), std::move(Includes), + IndexOpts, SymbolsCallback); +}; + +} // namespace clangd +} // namespace clang Index: clangd/index/Serialization.h =================================================================== --- clangd/index/Serialization.h +++ clangd/index/Serialization.h @@ -40,23 +40,26 @@ YAML, // Human-readable format, suitable for experiments and debugging. }; +// Holds the contents of an index file that was read. +struct IndexFileIn { + llvm::Optional Symbols; +}; +// Parse an index file. The input must be a RIFF container chunk. +llvm::Expected readIndexFile(llvm::StringRef); + // Specifies the contents of an index file to be written. struct IndexFileOut { const SymbolSlab *Symbols; // TODO: Support serializing symbol occurrences. // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; -}; -// Serializes an index file. (This is a RIFF container chunk). -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O); -// Holds the contents of an index file that was read. -struct IndexFileIn { - llvm::Optional Symbols; - IndexFileFormat Format; + IndexFileOut() = default; + IndexFileOut(const IndexFileIn &I) + : Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr) {} }; -// Parse an index file. The input must be a RIFF container chunk. -llvm::Expected readIndexFile(llvm::StringRef); +// Serializes an index file. +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O); std::string toYAML(const Symbol &); // Returned symbol is backed by the YAML input. Index: clangd/index/SymbolCollector.h =================================================================== --- clangd/index/SymbolCollector.h +++ clangd/index/SymbolCollector.h @@ -6,6 +6,8 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_COLLECTOR_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_COLLECTOR_H #include "CanonicalIncludes.h" #include "Index.h" @@ -122,3 +124,4 @@ } // namespace clangd } // namespace clang +#endif Index: clangd/indexer/IndexerMain.cpp =================================================================== --- clangd/indexer/IndexerMain.cpp +++ clangd/indexer/IndexerMain.cpp @@ -15,6 +15,7 @@ #include "RIFF.h" #include "index/CanonicalIncludes.h" #include "index/Index.h" +#include "index/IndexAction.h" #include "index/Merge.h" #include "index/Serialization.h" #include "index/SymbolCollector.h" @@ -49,16 +50,6 @@ "not given, such headers will have relative paths."), llvm::cl::init("")); -static llvm::cl::opt MergeOnTheFly( - "merge-on-the-fly", - llvm::cl::desc( - "Merges symbols for each processed translation unit as soon " - "they become available. This results in a smaller memory " - "usage and an almost instant reduce stage. Optimal for running as a " - "standalone tool, but cannot be used with multi-process executors like " - "MapReduce."), - llvm::cl::init(true), llvm::cl::Hidden); - static llvm::cl::opt Format("format", llvm::cl::desc("Format of the index to be written"), llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml", @@ -67,145 +58,36 @@ "binary RIFF format")), llvm::cl::init(IndexFileFormat::YAML)); -/// Responsible for aggregating symbols from each processed file and producing -/// the final results. All methods in this class must be thread-safe, -/// 'consumeSymbols' may be called from multiple threads. -class SymbolsConsumer { -public: - virtual ~SymbolsConsumer() = default; - - /// Consume a SymbolSlab build for a file. - virtual void consumeSymbols(SymbolSlab Symbols) = 0; - /// Produce a resulting symbol slab, by combining occurrences of the same - /// symbols across translation units. - virtual SymbolSlab mergeResults() = 0; -}; - -class SymbolIndexActionFactory : public tooling::FrontendActionFactory { +class IndexActionFactory : public tooling::FrontendActionFactory { public: - SymbolIndexActionFactory(SymbolsConsumer &Consumer) : Consumer(Consumer) {} + IndexActionFactory(IndexFileIn &Result) : Result(Result) {} clang::FrontendAction *create() override { - // Wraps the index action and reports collected symbols to the execution - // context at the end of each translation unit. - class WrappedIndexAction : public WrapperFrontendAction { - public: - WrappedIndexAction(std::shared_ptr C, - std::unique_ptr Includes, - const index::IndexingOptions &Opts, - SymbolsConsumer &Consumer) - : WrapperFrontendAction( - index::createIndexingAction(C, Opts, nullptr)), - Consumer(Consumer), Collector(C), Includes(std::move(Includes)), - PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {} - - std::unique_ptr - CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { - CI.getPreprocessor().addCommentHandler(PragmaHandler.get()); - return WrapperFrontendAction::CreateASTConsumer(CI, InFile); - } - - bool BeginInvocation(CompilerInstance &CI) override { - // We want all comments, not just the doxygen ones. - CI.getLangOpts().CommentOpts.ParseAllComments = true; - return WrapperFrontendAction::BeginInvocation(CI); - } - - void EndSourceFileAction() override { - WrapperFrontendAction::EndSourceFileAction(); - - const auto &CI = getCompilerInstance(); - if (CI.hasDiagnostics() && - CI.getDiagnostics().hasUncompilableErrorOccurred()) { - llvm::errs() - << "Found uncompilable errors in the translation unit. Igoring " - "collected symbols...\n"; - return; - } - - Consumer.consumeSymbols(Collector->takeSymbols()); - } - - private: - SymbolsConsumer &Consumer; - std::shared_ptr Collector; - std::unique_ptr Includes; - std::unique_ptr PragmaHandler; - }; - - index::IndexingOptions IndexOpts; - IndexOpts.SystemSymbolFilter = - index::IndexingOptions::SystemSymbolFilterKind::All; - IndexOpts.IndexFunctionLocals = false; - auto CollectorOpts = SymbolCollector::Options(); - CollectorOpts.FallbackDir = AssumedHeaderDir; - CollectorOpts.CollectIncludePath = true; - CollectorOpts.CountReferences = true; - CollectorOpts.Origin = SymbolOrigin::Static; - auto Includes = llvm::make_unique(); - addSystemHeadersMapping(Includes.get()); - CollectorOpts.Includes = Includes.get(); - return new WrappedIndexAction( - std::make_shared(std::move(CollectorOpts)), - std::move(Includes), IndexOpts, Consumer); + SymbolCollector::Options Opts; + Opts.FallbackDir = AssumedHeaderDir; + return createStaticIndexingAction( + Opts, + [&](SymbolSlab S) { + // Merge as we go. + std::lock_guard Lock(SymbolsMu); + for (const auto &Sym : S) { + if (const auto *Existing = Symbols.find(Sym.ID)) + Symbols.insert(mergeSymbol(*Existing, Sym)); + else + Symbols.insert(Sym); + } + }) + .release(); } - SymbolsConsumer &Consumer; -}; - -/// Stashes per-file results inside ExecutionContext, merges all of them at the -/// end. Useful for running on MapReduce infrastructure to avoid keeping symbols -/// from multiple files in memory. -class ToolExecutorConsumer : public SymbolsConsumer { -public: - ToolExecutorConsumer(ToolExecutor &Executor) : Executor(Executor) {} - - void consumeSymbols(SymbolSlab Symbols) override { - for (const auto &Sym : Symbols) - Executor.getExecutionContext()->reportResult(Sym.ID.str(), toYAML(Sym)); - } - - SymbolSlab mergeResults() override { - SymbolSlab::Builder UniqueSymbols; - Executor.getToolResults()->forEachResult( - [&](llvm::StringRef Key, llvm::StringRef Value) { - llvm::yaml::Input Yin(Value); - auto Sym = cantFail(clang::clangd::symbolFromYAML(Yin)); - auto ID = cantFail(clang::clangd::SymbolID::fromStr(Key)); - if (const auto *Existing = UniqueSymbols.find(ID)) - UniqueSymbols.insert(mergeSymbol(*Existing, Sym)); - else - UniqueSymbols.insert(Sym); - }); - return std::move(UniqueSymbols).build(); - } + // Awkward: we write the result in the destructor, because the executor + // takes ownership so it's the easiest way to get our data back out. + ~IndexActionFactory() { Result.Symbols = std::move(Symbols).build(); } private: - ToolExecutor &Executor; -}; - -/// Merges symbols for each translation unit as soon as the file is processed. -/// Optimal choice for standalone tools. -class OnTheFlyConsumer : public SymbolsConsumer { -public: - void consumeSymbols(SymbolSlab Symbols) override { - std::lock_guard Lock(Mut); - for (auto &&Sym : Symbols) { - if (const auto *Existing = Result.find(Sym.ID)) - Result.insert(mergeSymbol(*Existing, Sym)); - else - Result.insert(Sym); - } - } - - SymbolSlab mergeResults() override { - std::lock_guard Lock(Mut); - return std::move(Result).build(); - } - -private: - std::mutex Mut; - SymbolSlab::Builder Result; + IndexFileIn &Result; + std::mutex SymbolsMu; + SymbolSlab::Builder Symbols; }; } // namespace @@ -216,12 +98,10 @@ llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); const char *Overview = R"( - This is an **experimental** tool to extract symbols from a whole project - for clangd (global code completion). It will be changed and deprecated - eventually. Don't use it in production code! + Creates an index of symbol information etc in a whole project. + This is **experimental** and not production-ready! - Example usage for building index for the whole project using CMake compile - commands: + Example usage for a project using CMake compile commands: $ clangd-indexer --executor=all-TUs compile_commands.json > index.yaml @@ -229,7 +109,7 @@ $ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > index.yaml - Note: only symbols from header files will be collected. + Note: only symbols from header files will be indexed. )"; auto Executor = clang::tooling::createExecutorFromCommandLineArgs( @@ -246,31 +126,16 @@ return 1; } - if (clang::clangd::MergeOnTheFly && !Executor->get()->isSingleProcess()) { - llvm::errs() - << "Found multi-process executor, forcing the use of intermediate YAML " - "serialization instead of the on-the-fly merge.\n"; - clang::clangd::MergeOnTheFly = false; - } - - std::unique_ptr Consumer; - if (clang::clangd::MergeOnTheFly) - Consumer = llvm::make_unique(); - else - Consumer = - llvm::make_unique(**Executor); - // Map phase: emit symbols found in each translation unit. + clang::clangd::IndexFileIn Data; auto Err = Executor->get()->execute( - llvm::make_unique(*Consumer)); + llvm::make_unique(Data)); if (Err) { llvm::errs() << llvm::toString(std::move(Err)) << "\n"; } - // Reduce phase: combine symbols with the same IDs. - auto UniqueSymbols = Consumer->mergeResults(); + // Output phase: emit result symbols. - clang::clangd::IndexFileOut Out; - Out.Symbols = &UniqueSymbols; + clang::clangd::IndexFileOut Out(Data); Out.Format = clang::clangd::Format; llvm::outs() << Out; return 0;