Index: clang-tools-extra/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/CMakeLists.txt +++ clang-tools-extra/clangd/CMakeLists.txt @@ -72,3 +72,4 @@ endif() add_subdirectory(tool) add_subdirectory(global-symbol-builder) +add_subdirectory(dexplorer) Index: clang-tools-extra/clangd/dexplorer/CMakeLists.txt =================================================================== --- /dev/null +++ clang-tools-extra/clangd/dexplorer/CMakeLists.txt @@ -0,0 +1,22 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) +# FIXME(kbobyrev): Do I need all of these dependencies? + +set(LLVM_LINK_COMPONENTS + LineEditor + Support + ) + +add_clang_executable(dexplorer + Dexplorer.cpp + ) + +target_link_libraries(dexplorer + PRIVATE + clangAST + clangIndex + clangDaemon + clangBasic + clangFrontend + clangLex + clangTooling +) Index: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/dexplorer/Dexplorer.cpp @@ -0,0 +1,170 @@ +//===--- Dexplorer.cpp - Helper Index Exploration tool ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple interactive tool which can be used to manually +// evaluate symbol search quality of Clangd index. +// +//===----------------------------------------------------------------------===// + +#include "../index/SymbolYAML.h" +#include "../index/dex/DexIndex.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/LineEditor/LineEditor.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/YAMLTraits.h" +#include + +namespace { + +llvm::cl::opt YAMLSymbolCollection( + "symbol-collection-file", + llvm::cl::desc("Path to the file with YAML symbol collection"), + llvm::cl::Positional, llvm::cl::Required); + +const std::string Overview = R"( +This is an **experimental** interactive tool to process user-provided search +queries over given symbol collection obtained via global-symbol-builder. The +tool can be used to evaluate search quality of existing index implementations +and manually construct non-trivial test cases. +)"; + +// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line +// library for parsing flags and arguments. +// FIXME(kbobyrev): Ideas for commands: +// * fuzzy find symbol given a set of properties +// * symbol lookup: print out symbol in YAML format given SymbolID +// * load/swap/reload index: this would make it possible to get rid of llvm::cl +// usages in the tool driver and actually use llvm::cl library in the REPL. +// * show posting list density histogram (our dump data somewhere so that user +// could build one) +// * show number of tokens of each kind +// * print out tokens with the most dense posting lists +// * print out tokens with least dense posting lists +llvm::Optional +readRequest(llvm::LineEditor &LE) { + clang::clangd::FuzzyFindRequest Result; + + // By default, show 10 results. Otherwise, stdout might be polluted. + Result.MaxCandidateCount = 10; + + llvm::outs() << "Query:\n"; + if (llvm::Optional Line = LE.readLine()) + Result.Query = Line.getValue(); + + llvm::outs() << "Scopes (comma-separated list):\n"; + if (llvm::Optional Line = LE.readLine()) { + llvm::SmallVector Scopes; + llvm::StringRef S = Line.getValue(); + S.split(Scopes, ','); + for (auto Scope : Scopes) + Result.Scopes.push_back(Scope); + } + + llvm::outs() << "Numbers of symbol to return (default: 10):\n"; + if (llvm::Optional Line = LE.readLine()) + if (unsigned long long MaxCandidateCount = + std::strtoul(Line.getValue().c_str(), nullptr, 10)) + Result.MaxCandidateCount = MaxCandidateCount; + + llvm::outs() << "Proximity paths (comma-separated list):\n"; + if (llvm::Optional Line = LE.readLine()) { + llvm::SmallVector Paths; + llvm::StringRef S = Line.getValue(); + S.split(Paths, ','); + for (auto Path : Paths) + Result.ProximityPaths.push_back(Path); + } + + llvm::outs() << "FuzzyFindRequest {\n"; + llvm::outs() << " Query = " << Result.Query << '\n'; + llvm::outs() << " Scopes = ["; + for (size_t ScopeID = 0; ScopeID < Result.Scopes.size(); ++ScopeID) { + llvm::outs() << '"' << Result.Scopes[ScopeID] << '"'; + if (ScopeID != Result.Scopes.size() - 1) + llvm::outs() << ", "; + } + llvm::outs() << " ]\n"; + + llvm::outs() << " MaxCandidateCount = " << Result.MaxCandidateCount << '\n'; + llvm::outs() << " ProximityPaths = ["; + for (size_t PathID = 0; PathID < Result.ProximityPaths.size(); ++PathID) { + llvm::outs() << Result.ProximityPaths[PathID]; + if (PathID != Result.ProximityPaths.size() - 1) + llvm::outs() << ", "; + } + llvm::outs() << '\n'; + llvm::outs() << " ]\n"; + llvm::outs() << "}\n"; + + return Result; +} + +void processRequest(const std::unique_ptr &Index, + clang::clangd::FuzzyFindRequest &Request) { + std::vector Symbols; + const auto TimerStart = std::chrono::high_resolution_clock::now(); + Index->fuzzyFind( + Request, [&](const clang::clangd::Symbol &S) { Symbols.push_back(S); }); + const auto TimerStop = std::chrono::high_resolution_clock::now(); + llvm::outs() << "Query took " + << std::chrono::duration_cast( + TimerStop - TimerStart) + .count() + << " ms.\n"; + // FIXME(kbobyrev): Allow specifying which fields the user wants to see: e.g. + // origin of the symbol (CanonicalDeclaration path), #References, etc. + // FIXME(kbobyrev): Use formatting if it's available. E.g. something similar + // to std::setw(int); for consistent width. Allowing format specification + // would be great. Possible output format: + // + // Rank | Symbol Name | Scope | # of References | File with definition + // --------------------------------------------------------------------------- + // 0 | SymbolIndex | ...clangd... | 10 | .../SymbolIndex.h + // + // For functions or variables, it might be worth to print out arguments, type, + // etc. + // FIXME(kbobyrev): Print symbol final scores to see the distribution. + llvm::outs() << "\nRetrievedSymbols\n"; + for (size_t Rank = 0; Rank < Symbols.size(); ++Rank) + llvm::outs() << Rank << ". " << Symbols[Rank].Name << '\n'; + llvm::outs() << '\n'; +} + +} // namespace + +int main(int argc, const char *argv[]) { + llvm::cl::ParseCommandLineOptions(argc, argv, Overview); + llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + + // FIXME(kbobyrev): Wrap time measurements into something like + // measureTime(Function, Arguments...). + const auto TimerStart = std::chrono::high_resolution_clock::now(); + const auto Index = + clang::clangd::buildStaticIndex(YAMLSymbolCollection, /*UseDex=*/true); + const auto TimerStop = std::chrono::high_resolution_clock::now(); + llvm::outs() << "Build stage took " + << std::chrono::duration_cast(TimerStop - + TimerStart) + .count() + << " s.\n"; + if (!Index) { + llvm::errs() << "Please provide a valid YAML symbol collection.\n"; + return -1; + } + + llvm::LineEditor LE("dexplorer"); + + while (llvm::Optional Request = + readRequest(LE)) + processRequest(Index, Request.getValue()); + + return 0; +}