Index: clang-tools-extra/trunk/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/clangd/CMakeLists.txt +++ clang-tools-extra/trunk/clangd/CMakeLists.txt @@ -73,7 +73,7 @@ add_subdirectory(fuzzer) endif() add_subdirectory(tool) -add_subdirectory(global-symbol-builder) +add_subdirectory(indexer) add_subdirectory(index/dex/dexp) if (LLVM_INCLUDE_BENCHMARKS) Index: clang-tools-extra/trunk/clangd/global-symbol-builder/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/clangd/global-symbol-builder/CMakeLists.txt +++ clang-tools-extra/trunk/clangd/global-symbol-builder/CMakeLists.txt @@ -1,20 +0,0 @@ -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) - -set(LLVM_LINK_COMPONENTS - Support - ) - -add_clang_executable(global-symbol-builder - GlobalSymbolBuilderMain.cpp - ) - -target_link_libraries(global-symbol-builder - PRIVATE - clangAST - clangIndex - clangDaemon - clangBasic - clangFrontend - clangLex - clangTooling -) Index: clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp =================================================================== --- clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp +++ clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp @@ -1,285 +0,0 @@ -//===--- GlobalSymbolBuilderMain.cpp -----------------------------*- C++-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// GlobalSymbolBuilder is a tool to extract symbols from a whole project. -// This tool is **experimental** only. Don't use it in production code. -// -//===----------------------------------------------------------------------===// - -#include "RIFF.h" -#include "index/CanonicalIncludes.h" -#include "index/Index.h" -#include "index/Merge.h" -#include "index/Serialization.h" -#include "index/SymbolCollector.h" -#include "index/SymbolYAML.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/FrontendActions.h" -#include "clang/Index/IndexDataConsumer.h" -#include "clang/Index/IndexingAction.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Execution.h" -#include "clang/Tooling/Tooling.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/ThreadPool.h" -#include "llvm/Support/YAMLTraits.h" - -using namespace llvm; -using namespace clang::tooling; -using clang::clangd::SymbolSlab; - -namespace clang { -namespace clangd { -namespace { - -static llvm::cl::opt AssumedHeaderDir( - "assume-header-dir", - llvm::cl::desc("The index includes header that a symbol is defined in. " - "If the absolute path cannot be determined (e.g. an " - "in-memory VFS) then the relative path is resolved against " - "this directory, which must be absolute. If this flag is " - "not given, such headers will have relative paths."), - llvm::cl::init("")); - -static llvm::cl::opt MergeOnTheFly( - "merge-on-the-fly", - llvm::cl::desc( - "Merges symbols for each processed translation unit as soon " - "they become available. This results in a smaller memory " - "usage and an almost instant reduce stage. Optimal for running as a " - "standalone tool, but cannot be used with multi-process executors like " - "MapReduce."), - llvm::cl::init(true), llvm::cl::Hidden); - -enum IndexFormat { YAML, Binary }; -static llvm::cl::opt Format( - "format", llvm::cl::desc("Format of the index to be written"), - llvm::cl::values(clEnumValN(YAML, "yaml", "human-readable YAML format"), - clEnumValN(Binary, "binary", "binary RIFF format")), - llvm::cl::init(YAML)); - -/// Responsible for aggregating symbols from each processed file and producing -/// the final results. All methods in this class must be thread-safe, -/// 'consumeSymbols' may be called from multiple threads. -class SymbolsConsumer { -public: - virtual ~SymbolsConsumer() = default; - - /// Consume a SymbolSlab build for a file. - virtual void consumeSymbols(SymbolSlab Symbols) = 0; - /// Produce a resulting symbol slab, by combining occurrences of the same - /// symbols across translation units. - virtual SymbolSlab mergeResults() = 0; -}; - -class SymbolIndexActionFactory : public tooling::FrontendActionFactory { -public: - SymbolIndexActionFactory(SymbolsConsumer &Consumer) : Consumer(Consumer) {} - - clang::FrontendAction *create() override { - // Wraps the index action and reports collected symbols to the execution - // context at the end of each translation unit. - class WrappedIndexAction : public WrapperFrontendAction { - public: - WrappedIndexAction(std::shared_ptr C, - std::unique_ptr Includes, - const index::IndexingOptions &Opts, - SymbolsConsumer &Consumer) - : WrapperFrontendAction( - index::createIndexingAction(C, Opts, nullptr)), - Consumer(Consumer), Collector(C), Includes(std::move(Includes)), - PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {} - - std::unique_ptr - CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { - CI.getPreprocessor().addCommentHandler(PragmaHandler.get()); - return WrapperFrontendAction::CreateASTConsumer(CI, InFile); - } - - bool BeginInvocation(CompilerInstance &CI) override { - // We want all comments, not just the doxygen ones. - CI.getLangOpts().CommentOpts.ParseAllComments = true; - return WrapperFrontendAction::BeginInvocation(CI); - } - - void EndSourceFileAction() override { - WrapperFrontendAction::EndSourceFileAction(); - - const auto &CI = getCompilerInstance(); - if (CI.hasDiagnostics() && - CI.getDiagnostics().hasUncompilableErrorOccurred()) { - llvm::errs() - << "Found uncompilable errors in the translation unit. Igoring " - "collected symbols...\n"; - return; - } - - Consumer.consumeSymbols(Collector->takeSymbols()); - } - - private: - SymbolsConsumer &Consumer; - std::shared_ptr Collector; - std::unique_ptr Includes; - std::unique_ptr PragmaHandler; - }; - - index::IndexingOptions IndexOpts; - IndexOpts.SystemSymbolFilter = - index::IndexingOptions::SystemSymbolFilterKind::All; - IndexOpts.IndexFunctionLocals = false; - auto CollectorOpts = SymbolCollector::Options(); - CollectorOpts.FallbackDir = AssumedHeaderDir; - CollectorOpts.CollectIncludePath = true; - CollectorOpts.CountReferences = true; - CollectorOpts.Origin = SymbolOrigin::Static; - auto Includes = llvm::make_unique(); - addSystemHeadersMapping(Includes.get()); - CollectorOpts.Includes = Includes.get(); - return new WrappedIndexAction( - std::make_shared(std::move(CollectorOpts)), - std::move(Includes), IndexOpts, Consumer); - } - - SymbolsConsumer &Consumer; -}; - -/// Stashes per-file results inside ExecutionContext, merges all of them at the -/// end. Useful for running on MapReduce infrastructure to avoid keeping symbols -/// from multiple files in memory. -class ToolExecutorConsumer : public SymbolsConsumer { -public: - ToolExecutorConsumer(ToolExecutor &Executor) : Executor(Executor) {} - - void consumeSymbols(SymbolSlab Symbols) override { - for (const auto &Sym : Symbols) - Executor.getExecutionContext()->reportResult(Sym.ID.str(), - SymbolToYAML(Sym)); - } - - SymbolSlab mergeResults() override { - SymbolSlab::Builder UniqueSymbols; - Executor.getToolResults()->forEachResult( - [&](llvm::StringRef Key, llvm::StringRef Value) { - llvm::yaml::Input Yin(Value); - auto Sym = clang::clangd::SymbolFromYAML(Yin); - clang::clangd::SymbolID ID; - Key >> ID; - if (const auto *Existing = UniqueSymbols.find(ID)) - UniqueSymbols.insert(mergeSymbol(*Existing, Sym)); - else - UniqueSymbols.insert(Sym); - }); - return std::move(UniqueSymbols).build(); - } - -private: - ToolExecutor &Executor; -}; - -/// Merges symbols for each translation unit as soon as the file is processed. -/// Optimal choice for standalone tools. -class OnTheFlyConsumer : public SymbolsConsumer { -public: - void consumeSymbols(SymbolSlab Symbols) override { - std::lock_guard Lock(Mut); - for (auto &&Sym : Symbols) { - if (const auto *Existing = Result.find(Sym.ID)) - Result.insert(mergeSymbol(*Existing, Sym)); - else - Result.insert(Sym); - } - } - - SymbolSlab mergeResults() override { - std::lock_guard Lock(Mut); - return std::move(Result).build(); - } - -private: - std::mutex Mut; - SymbolSlab::Builder Result; -}; - -} // namespace -} // namespace clangd -} // namespace clang - -int main(int argc, const char **argv) { - llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); - - const char *Overview = R"( - This is an **experimental** tool to extract symbols from a whole project - for clangd (global code completion). It will be changed and deprecated - eventually. Don't use it in production code! - - Example usage for building index for the whole project using CMake compile - commands: - - $ global-symbol-builder --executor=all-TUs compile_commands.json > index.yaml - - Example usage for file sequence index without flags: - - $ global-symbol-builder File1.cpp File2.cpp ... FileN.cpp > index.yaml - - Note: only symbols from header files will be collected. - )"; - - auto Executor = clang::tooling::createExecutorFromCommandLineArgs( - argc, argv, cl::GeneralCategory, Overview); - - if (!Executor) { - llvm::errs() << llvm::toString(Executor.takeError()) << "\n"; - return 1; - } - - if (!clang::clangd::AssumedHeaderDir.empty() && - !llvm::sys::path::is_absolute(clang::clangd::AssumedHeaderDir)) { - llvm::errs() << "--assume-header-dir must be an absolute path.\n"; - return 1; - } - - if (clang::clangd::MergeOnTheFly && !Executor->get()->isSingleProcess()) { - llvm::errs() - << "Found multi-process executor, forcing the use of intermediate YAML " - "serialization instead of the on-the-fly merge.\n"; - clang::clangd::MergeOnTheFly = false; - } - - std::unique_ptr Consumer; - if (clang::clangd::MergeOnTheFly) - Consumer = llvm::make_unique(); - else - Consumer = - llvm::make_unique(**Executor); - - // Map phase: emit symbols found in each translation unit. - auto Err = Executor->get()->execute( - llvm::make_unique(*Consumer)); - if (Err) { - llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - } - // Reduce phase: combine symbols with the same IDs. - auto UniqueSymbols = Consumer->mergeResults(); - // Output phase: emit result symbols. - switch (clang::clangd::Format) { - case clang::clangd::IndexFormat::YAML: - SymbolsToYAML(UniqueSymbols, llvm::outs()); - break; - case clang::clangd::IndexFormat::Binary: { - clang::clangd::IndexFileOut Out; - Out.Symbols = &UniqueSymbols; - llvm::outs() << Out; - } - } - return 0; -} Index: clang-tools-extra/trunk/clangd/indexer/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/clangd/indexer/CMakeLists.txt +++ clang-tools-extra/trunk/clangd/indexer/CMakeLists.txt @@ -0,0 +1,20 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) + +set(LLVM_LINK_COMPONENTS + Support + ) + +add_clang_executable(clangd-indexer + IndexerMain.cpp + ) + +target_link_libraries(clangd-indexer + PRIVATE + clangAST + clangIndex + clangDaemon + clangBasic + clangFrontend + clangLex + clangTooling +) Index: clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp =================================================================== --- clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp +++ clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp @@ -0,0 +1,285 @@ +//===--- GlobalSymbolBuilderMain.cpp -----------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// GlobalSymbolBuilder is a tool to extract symbols from a whole project. +// This tool is **experimental** only. Don't use it in production code. +// +//===----------------------------------------------------------------------===// + +#include "RIFF.h" +#include "index/CanonicalIncludes.h" +#include "index/Index.h" +#include "index/Merge.h" +#include "index/Serialization.h" +#include "index/SymbolCollector.h" +#include "index/SymbolYAML.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Index/IndexDataConsumer.h" +#include "clang/Index/IndexingAction.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Execution.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/YAMLTraits.h" + +using namespace llvm; +using namespace clang::tooling; +using clang::clangd::SymbolSlab; + +namespace clang { +namespace clangd { +namespace { + +static llvm::cl::opt AssumedHeaderDir( + "assume-header-dir", + llvm::cl::desc("The index includes header that a symbol is defined in. " + "If the absolute path cannot be determined (e.g. an " + "in-memory VFS) then the relative path is resolved against " + "this directory, which must be absolute. If this flag is " + "not given, such headers will have relative paths."), + llvm::cl::init("")); + +static llvm::cl::opt MergeOnTheFly( + "merge-on-the-fly", + llvm::cl::desc( + "Merges symbols for each processed translation unit as soon " + "they become available. This results in a smaller memory " + "usage and an almost instant reduce stage. Optimal for running as a " + "standalone tool, but cannot be used with multi-process executors like " + "MapReduce."), + llvm::cl::init(true), llvm::cl::Hidden); + +enum IndexFormat { YAML, Binary }; +static llvm::cl::opt Format( + "format", llvm::cl::desc("Format of the index to be written"), + llvm::cl::values(clEnumValN(YAML, "yaml", "human-readable YAML format"), + clEnumValN(Binary, "binary", "binary RIFF format")), + llvm::cl::init(YAML)); + +/// Responsible for aggregating symbols from each processed file and producing +/// the final results. All methods in this class must be thread-safe, +/// 'consumeSymbols' may be called from multiple threads. +class SymbolsConsumer { +public: + virtual ~SymbolsConsumer() = default; + + /// Consume a SymbolSlab build for a file. + virtual void consumeSymbols(SymbolSlab Symbols) = 0; + /// Produce a resulting symbol slab, by combining occurrences of the same + /// symbols across translation units. + virtual SymbolSlab mergeResults() = 0; +}; + +class SymbolIndexActionFactory : public tooling::FrontendActionFactory { +public: + SymbolIndexActionFactory(SymbolsConsumer &Consumer) : Consumer(Consumer) {} + + clang::FrontendAction *create() override { + // Wraps the index action and reports collected symbols to the execution + // context at the end of each translation unit. + class WrappedIndexAction : public WrapperFrontendAction { + public: + WrappedIndexAction(std::shared_ptr C, + std::unique_ptr Includes, + const index::IndexingOptions &Opts, + SymbolsConsumer &Consumer) + : WrapperFrontendAction( + index::createIndexingAction(C, Opts, nullptr)), + Consumer(Consumer), Collector(C), Includes(std::move(Includes)), + PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {} + + std::unique_ptr + CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { + CI.getPreprocessor().addCommentHandler(PragmaHandler.get()); + return WrapperFrontendAction::CreateASTConsumer(CI, InFile); + } + + bool BeginInvocation(CompilerInstance &CI) override { + // We want all comments, not just the doxygen ones. + CI.getLangOpts().CommentOpts.ParseAllComments = true; + return WrapperFrontendAction::BeginInvocation(CI); + } + + void EndSourceFileAction() override { + WrapperFrontendAction::EndSourceFileAction(); + + const auto &CI = getCompilerInstance(); + if (CI.hasDiagnostics() && + CI.getDiagnostics().hasUncompilableErrorOccurred()) { + llvm::errs() + << "Found uncompilable errors in the translation unit. Igoring " + "collected symbols...\n"; + return; + } + + Consumer.consumeSymbols(Collector->takeSymbols()); + } + + private: + SymbolsConsumer &Consumer; + std::shared_ptr Collector; + std::unique_ptr Includes; + std::unique_ptr PragmaHandler; + }; + + index::IndexingOptions IndexOpts; + IndexOpts.SystemSymbolFilter = + index::IndexingOptions::SystemSymbolFilterKind::All; + IndexOpts.IndexFunctionLocals = false; + auto CollectorOpts = SymbolCollector::Options(); + CollectorOpts.FallbackDir = AssumedHeaderDir; + CollectorOpts.CollectIncludePath = true; + CollectorOpts.CountReferences = true; + CollectorOpts.Origin = SymbolOrigin::Static; + auto Includes = llvm::make_unique(); + addSystemHeadersMapping(Includes.get()); + CollectorOpts.Includes = Includes.get(); + return new WrappedIndexAction( + std::make_shared(std::move(CollectorOpts)), + std::move(Includes), IndexOpts, Consumer); + } + + SymbolsConsumer &Consumer; +}; + +/// Stashes per-file results inside ExecutionContext, merges all of them at the +/// end. Useful for running on MapReduce infrastructure to avoid keeping symbols +/// from multiple files in memory. +class ToolExecutorConsumer : public SymbolsConsumer { +public: + ToolExecutorConsumer(ToolExecutor &Executor) : Executor(Executor) {} + + void consumeSymbols(SymbolSlab Symbols) override { + for (const auto &Sym : Symbols) + Executor.getExecutionContext()->reportResult(Sym.ID.str(), + SymbolToYAML(Sym)); + } + + SymbolSlab mergeResults() override { + SymbolSlab::Builder UniqueSymbols; + Executor.getToolResults()->forEachResult( + [&](llvm::StringRef Key, llvm::StringRef Value) { + llvm::yaml::Input Yin(Value); + auto Sym = clang::clangd::SymbolFromYAML(Yin); + clang::clangd::SymbolID ID; + Key >> ID; + if (const auto *Existing = UniqueSymbols.find(ID)) + UniqueSymbols.insert(mergeSymbol(*Existing, Sym)); + else + UniqueSymbols.insert(Sym); + }); + return std::move(UniqueSymbols).build(); + } + +private: + ToolExecutor &Executor; +}; + +/// Merges symbols for each translation unit as soon as the file is processed. +/// Optimal choice for standalone tools. +class OnTheFlyConsumer : public SymbolsConsumer { +public: + void consumeSymbols(SymbolSlab Symbols) override { + std::lock_guard Lock(Mut); + for (auto &&Sym : Symbols) { + if (const auto *Existing = Result.find(Sym.ID)) + Result.insert(mergeSymbol(*Existing, Sym)); + else + Result.insert(Sym); + } + } + + SymbolSlab mergeResults() override { + std::lock_guard Lock(Mut); + return std::move(Result).build(); + } + +private: + std::mutex Mut; + SymbolSlab::Builder Result; +}; + +} // namespace +} // namespace clangd +} // namespace clang + +int main(int argc, const char **argv) { + llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + + const char *Overview = R"( + This is an **experimental** tool to extract symbols from a whole project + for clangd (global code completion). It will be changed and deprecated + eventually. Don't use it in production code! + + Example usage for building index for the whole project using CMake compile + commands: + + $ clangd-indexer --executor=all-TUs compile_commands.json > index.yaml + + Example usage for file sequence index without flags: + + $ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > index.yaml + + Note: only symbols from header files will be collected. + )"; + + auto Executor = clang::tooling::createExecutorFromCommandLineArgs( + argc, argv, cl::GeneralCategory, Overview); + + if (!Executor) { + llvm::errs() << llvm::toString(Executor.takeError()) << "\n"; + return 1; + } + + if (!clang::clangd::AssumedHeaderDir.empty() && + !llvm::sys::path::is_absolute(clang::clangd::AssumedHeaderDir)) { + llvm::errs() << "--assume-header-dir must be an absolute path.\n"; + return 1; + } + + if (clang::clangd::MergeOnTheFly && !Executor->get()->isSingleProcess()) { + llvm::errs() + << "Found multi-process executor, forcing the use of intermediate YAML " + "serialization instead of the on-the-fly merge.\n"; + clang::clangd::MergeOnTheFly = false; + } + + std::unique_ptr Consumer; + if (clang::clangd::MergeOnTheFly) + Consumer = llvm::make_unique(); + else + Consumer = + llvm::make_unique(**Executor); + + // Map phase: emit symbols found in each translation unit. + auto Err = Executor->get()->execute( + llvm::make_unique(*Consumer)); + if (Err) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + } + // Reduce phase: combine symbols with the same IDs. + auto UniqueSymbols = Consumer->mergeResults(); + // Output phase: emit result symbols. + switch (clang::clangd::Format) { + case clang::clangd::IndexFormat::YAML: + SymbolsToYAML(UniqueSymbols, llvm::outs()); + break; + case clang::clangd::IndexFormat::Binary: { + clang::clangd::IndexFileOut Out; + Out.Symbols = &UniqueSymbols; + llvm::outs() << Out; + } + } + return 0; +} Index: clang-tools-extra/trunk/test/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/test/CMakeLists.txt +++ clang-tools-extra/trunk/test/CMakeLists.txt @@ -55,7 +55,7 @@ # These individual tools have no tests, add them here to make them compile # together with check-clang-tools, so that we won't break them in the future. - global-symbol-builder + clangd-indexer # Unit tests ExtraToolsUnitTests Index: clang-tools-extra/trunk/test/clangd/index-tools.test =================================================================== --- clang-tools-extra/trunk/test/clangd/index-tools.test +++ clang-tools-extra/trunk/test/clangd/index-tools.test @@ -1,3 +1,3 @@ -# RUN: global-symbol-builder %p/Inputs/BenchmarkSource.cpp -- -I%p/Inputs > %t.index +# RUN: clangd-indexer %p/Inputs/BenchmarkSource.cpp -- -I%p/Inputs > %t.index # FIXME: By default, benchmarks are excluded from the list of default targets hence not built. Find a way to depend on benchmarks to run the next command. # RUN: if [ -f %clangd-benchmark-dir/IndexBenchmark ]; then %clangd-benchmark-dir/IndexBenchmark %t.index %p/Inputs/requests.log --benchmark_min_time=0.01 ; fi