Index: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp =================================================================== --- clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp +++ clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp @@ -49,9 +49,33 @@ "not given, such headers will have relative paths."), llvm::cl::init("")); +static llvm::cl::opt MergeOnTheFly( + "merge-on-the-fly", + llvm::cl::desc( + "Merges symbols for each processed translation unit as soon " + "they become available. This results in a smaller memory " + "usage and an almost instant reduce stage. Optimal for running as a " + "standalone tool, but cannot be used with multi-process executors like " + "MapReduce."), + llvm::cl::init(true)); + +/// Responsible for aggregating symbols from each processed file and producing +/// the final results. All methods in this class must be thread-safe, +/// 'consumeSymbols' may be called from multiple threads. +class SymbolsConsumer { +public: + virtual ~SymbolsConsumer() = default; + + /// Consume a SymbolSlab build for a file. + virtual void consumeSymbols(SymbolSlab Symbols) = 0; + /// Produce a resulting symbol slab, by combining occurrences of the same + /// symbols across translation units. + virtual SymbolSlab mergeResults() = 0; +}; + class SymbolIndexActionFactory : public tooling::FrontendActionFactory { public: - SymbolIndexActionFactory(tooling::ExecutionContext *Ctx) : Ctx(Ctx) {} + SymbolIndexActionFactory(SymbolsConsumer &Consumer) : Consumer(Consumer) {} clang::FrontendAction *create() override { // Wraps the index action and reports collected symbols to the execution @@ -61,10 +85,10 @@ WrappedIndexAction(std::shared_ptr C, std::unique_ptr Includes, const index::IndexingOptions &Opts, - tooling::ExecutionContext *Ctx) + SymbolsConsumer &Consumer) : WrapperFrontendAction( index::createIndexingAction(C, Opts, nullptr)), - Ctx(Ctx), Collector(C), Includes(std::move(Includes)), + Consumer(Consumer), Collector(C), Includes(std::move(Includes)), PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {} std::unique_ptr @@ -91,14 +115,11 @@ return; } - auto Symbols = Collector->takeSymbols(); - for (const auto &Sym : Symbols) { - Ctx->reportResult(Sym.ID.str(), SymbolToYAML(Sym)); - } + Consumer.consumeSymbols(Collector->takeSymbols()); } private: - tooling::ExecutionContext *Ctx; + SymbolsConsumer &Consumer; std::shared_ptr Collector; std::unique_ptr Includes; std::unique_ptr PragmaHandler; @@ -118,30 +139,72 @@ CollectorOpts.Includes = Includes.get(); return new WrappedIndexAction( std::make_shared(std::move(CollectorOpts)), - std::move(Includes), IndexOpts, Ctx); + std::move(Includes), IndexOpts, Consumer); } - tooling::ExecutionContext *Ctx; + SymbolsConsumer &Consumer; }; -// Combine occurrences of the same symbol across translation units. -SymbolSlab mergeSymbols(tooling::ToolResults *Results) { - SymbolSlab::Builder UniqueSymbols; - llvm::BumpPtrAllocator Arena; - Symbol::Details Scratch; - Results->forEachResult([&](llvm::StringRef Key, llvm::StringRef Value) { - Arena.Reset(); - llvm::yaml::Input Yin(Value, &Arena); - auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena); - clang::clangd::SymbolID ID; - Key >> ID; - if (const auto *Existing = UniqueSymbols.find(ID)) - UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch)); - else - UniqueSymbols.insert(Sym); - }); - return std::move(UniqueSymbols).build(); -} +/// Stashes per-file results inside ExecutionContext, merges all of them at the +/// end. Useful for running on MapReduce infrastructure to avoid keeping symbols +/// from multiple files in memory. +class ToolExecutorConsumer : public SymbolsConsumer { +public: + ToolExecutorConsumer(ToolExecutor &Executor) : Executor(Executor) {} + + void consumeSymbols(SymbolSlab Symbols) override { + for (const auto &Sym : Symbols) + Executor.getExecutionContext()->reportResult(Sym.ID.str(), + SymbolToYAML(Sym)); + } + + SymbolSlab mergeResults() override { + SymbolSlab::Builder UniqueSymbols; + llvm::BumpPtrAllocator Arena; + Symbol::Details Scratch; + Executor.getToolResults()->forEachResult( + [&](llvm::StringRef Key, llvm::StringRef Value) { + Arena.Reset(); + llvm::yaml::Input Yin(Value, &Arena); + auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena); + clang::clangd::SymbolID ID; + Key >> ID; + if (const auto *Existing = UniqueSymbols.find(ID)) + UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch)); + else + UniqueSymbols.insert(Sym); + }); + return std::move(UniqueSymbols).build(); + } + +private: + ToolExecutor &Executor; +}; + +/// Merges symbols for each translation unit as soon as the file is processed. +/// Optimal choice for standalone tools. +class OnTheFlyConsumer : public SymbolsConsumer { +public: + void consumeSymbols(SymbolSlab Symbols) override { + std::lock_guard Lock(Mut); + for (auto &&Sym : Symbols) { + Symbol::Details Scratch; + if (const auto *Existing = Result.find(Sym.ID)) + Result.insert(mergeSymbol(*Existing, Sym, &Scratch)); + else + Result.insert(Sym); + } + } + + SymbolSlab mergeResults() override { + std::lock_guard Lock(Mut); + return std::move(Result).build(); + } + +private: + std::mutex Mut; + SymbolSlab::Builder Result; +}; } // namespace } // namespace clangd @@ -181,18 +244,28 @@ return 1; } + if (clang::clangd::MergeOnTheFly && !Executor->get()->isSingleProcess()) { + llvm::errs() + << "Found multi-process executor, forcing the use of intermediate YAML " + "serialization instead of the on-the-fly merge.\n"; + clang::clangd::MergeOnTheFly = false; + } + + std::unique_ptr Consumer; + if (clang::clangd::MergeOnTheFly) + Consumer = llvm::make_unique(); + else + Consumer = + llvm::make_unique(**Executor); + // Map phase: emit symbols found in each translation unit. auto Err = Executor->get()->execute( - llvm::make_unique( - Executor->get()->getExecutionContext())); + llvm::make_unique(*Consumer)); if (Err) { llvm::errs() << llvm::toString(std::move(Err)) << "\n"; } - - // Reduce phase: combine symbols using the ID as a key. - auto UniqueSymbols = - clang::clangd::mergeSymbols(Executor->get()->getToolResults()); - + // Reduce phase: combine symbols with the same IDs. + auto UniqueSymbols = Consumer->mergeResults(); // Output phase: emit YAML for result symbols. SymbolsToYAML(UniqueSymbols, llvm::outs()); return 0;