Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -41,6 +41,7 @@ index/MemIndex.cpp index/Merge.cpp index/SymbolCollector.cpp + index/SymbolOccurrenceCollector.cpp index/SymbolYAML.cpp index/dex/Iterator.cpp Index: clangd/ClangdLSPServer.h =================================================================== --- clangd/ClangdLSPServer.h +++ clangd/ClangdLSPServer.h @@ -75,6 +75,7 @@ void onRename(RenameParams &Parames) override; void onHover(TextDocumentPositionParams &Params) override; void onChangeConfiguration(DidChangeConfigurationParams &Params) override; + void onReference(ReferenceParams &Params) override; std::vector getFixes(StringRef File, const clangd::Diagnostic &D); Index: clangd/ClangdLSPServer.cpp =================================================================== --- clangd/ClangdLSPServer.cpp +++ clangd/ClangdLSPServer.cpp @@ -101,8 +101,7 @@ {"documentRangeFormattingProvider", true}, {"documentOnTypeFormattingProvider", json::Object{ - {"firstTriggerCharacter", "}"}, - {"moreTriggerCharacter", {}}, + {"firstTriggerCharacter", "}"}, {"moreTriggerCharacter", {}}, }}, {"codeActionProvider", true}, {"completionProvider", @@ -120,6 +119,7 @@ {"renameProvider", true}, {"documentSymbolProvider", true}, {"workspaceSymbolProvider", true}, + {"referencesProvider", true}, {"executeCommandProvider", json::Object{ {"commands", {ExecuteCommandParams::CLANGD_APPLY_FIX_COMMAND}}, @@ -436,6 +436,18 @@ applyConfiguration(Params.settings); } +void ClangdLSPServer::onReference(ReferenceParams &Params) { + Server.references(Params.textDocument.uri.file(), Params.position, + Params.context.includeDeclaration, + [](llvm::Expected> Locations) { + if (!Locations) + return replyError( + ErrorCode::InternalError, + llvm::toString(Locations.takeError())); + reply(llvm::json::Array(*Locations)); + }); +} + ClangdLSPServer::ClangdLSPServer(JSONOutput &Out, const clangd::CodeCompleteOptions &CCOpts, llvm::Optional CompileCommandsDir, Index: clangd/ClangdServer.h =================================================================== --- clangd/ClangdServer.h +++ clangd/ClangdServer.h @@ -154,6 +154,10 @@ void documentSymbols(StringRef File, Callback> CB); + /// Retrieve locations for symbol references. + void references(PathRef File, Position Pos, bool includeDeclaration, + Callback> CB); + /// Run formatting for \p Rng inside \p File with content \p Code. llvm::Expected formatRange(StringRef Code, PathRef File, Range Rng); @@ -207,6 +211,27 @@ tooling::CompileCommand getCompileCommand(PathRef File); + /// Manages dynamic index for open files. Each file might contribute two sets + /// of symbols to the dynamic index: symbols from the preamble and symbols + /// from the file itself. Those have different lifetimes and we merge results from both + class DynamicIndex : public ParsingCallbacks { + public: + DynamicIndex(std::vector URISchemes); + + SymbolIndex &index() const; + + void onPreambleAST(PathRef Path, ASTContext &Ctx, + std::shared_ptr PP) override; + void onMainAST(PathRef Path, ParsedAST &AST) override; + + private: + FileIndex PreambleIdx; + FileIndex MainFileIdx; + /// Merged view into both indexes. Merges are performed in a similar manner + /// to the merges of dynamic and static index. + std::unique_ptr MergedIndex; + }; + GlobalCompilationDatabase &CDB; DiagnosticsConsumer &DiagConsumer; FileSystemProvider &FSProvider; @@ -221,8 +246,8 @@ // - the static index passed to the constructor // - a merged view of a static and dynamic index (MergedIndex) SymbolIndex *Index; - // If present, an up-to-date of symbols in open files. Read via Index. - std::unique_ptr FileIdx; + /// If present, an up-to-date of symbols in open files. Read via Index. + std::unique_ptr FileIdx; // If present, a merged view of FileIdx and an external index. Read via Index. std::unique_ptr MergedIndex; // If set, this represents the workspace path. Index: clangd/ClangdServer.cpp =================================================================== --- clangd/ClangdServer.cpp +++ clangd/ClangdServer.cpp @@ -66,6 +66,19 @@ Optional> Result; }; +// FIXME(ibiryukov): this should be a generic helper instead. +class NoopCallbacks : public ParsingCallbacks { +public: + static ParsingCallbacks &instance() { + static ParsingCallbacks *Instance = new NoopCallbacks; + return *Instance; + } + + void onPreambleAST(PathRef Path, ASTContext &Ctx, + std::shared_ptr PP) override {} + void onMainAST(PathRef Path, ParsedAST &AST) override {} +}; + } // namespace ClangdServer::Options ClangdServer::optsForTest() { @@ -83,7 +96,7 @@ : CDB(CDB), DiagConsumer(DiagConsumer), FSProvider(FSProvider), ResourceDir(Opts.ResourceDir ? Opts.ResourceDir->str() : getStandardResourceDir()), - FileIdx(Opts.BuildDynamicSymbolIndex ? new FileIndex(Opts.URISchemes) + FileIdx(Opts.BuildDynamicSymbolIndex ? new DynamicIndex(Opts.URISchemes) : nullptr), PCHs(std::make_shared()), // Pass a callback into `WorkScheduler` to extract symbols from a newly @@ -91,19 +104,14 @@ // is parsed. // FIXME(ioeric): this can be slow and we may be able to index on less // critical paths. - WorkScheduler( - Opts.AsyncThreadsCount, Opts.StorePreamblesInMemory, - FileIdx - ? [this](PathRef Path, ASTContext &AST, - std::shared_ptr - PP) { FileIdx->update(Path, &AST, std::move(PP)); } - : PreambleParsedCallback(), - Opts.UpdateDebounce, Opts.RetentionPolicy) { + WorkScheduler(Opts.AsyncThreadsCount, Opts.StorePreamblesInMemory, + FileIdx ? *FileIdx : NoopCallbacks::instance(), + Opts.UpdateDebounce, Opts.RetentionPolicy) { if (FileIdx && Opts.StaticIndex) { - MergedIndex = mergeIndex(FileIdx.get(), Opts.StaticIndex); + MergedIndex = mergeIndex(&FileIdx->index(), Opts.StaticIndex); Index = MergedIndex.get(); } else if (FileIdx) - Index = FileIdx.get(); + Index = &FileIdx->index(); else if (Opts.StaticIndex) Index = Opts.StaticIndex; else @@ -465,6 +473,19 @@ Bind(Action, std::move(CB))); } +void ClangdServer::references(PathRef File, Position Pos, + bool includeDeclaration, + Callback> CB) { + auto Action = [Pos, includeDeclaration, this]( + Callback> CB, llvm::Expected InpAST) { + if (!InpAST) + return CB(InpAST.takeError()); + CB(clangd::references(InpAST->AST, Pos, includeDeclaration, Index)); + }; + + WorkScheduler.runWithAST("References", File, Bind(Action, std::move(CB))); +} + std::vector> ClangdServer::getUsedBytesPerFile() const { return WorkScheduler.getUsedBytesPerFile(); @@ -474,3 +495,19 @@ ClangdServer::blockUntilIdleForTest(llvm::Optional TimeoutSeconds) { return WorkScheduler.blockUntilIdle(timeoutSeconds(TimeoutSeconds)); } + +ClangdServer::DynamicIndex::DynamicIndex(std::vector URISchemes) + : PreambleIdx(URISchemes), MainFileIdx(URISchemes), + MergedIndex(mergeIndex(&MainFileIdx, &PreambleIdx)) {} + +SymbolIndex &ClangdServer::DynamicIndex::index() const { return *MergedIndex; } + +void ClangdServer::DynamicIndex::onPreambleAST( + PathRef Path, ASTContext &Ctx, std::shared_ptr PP) { + PreambleIdx.update(Path, &Ctx, PP, /*TopLevelDecls=*/llvm::None); +} + +void ClangdServer::DynamicIndex::onMainAST(PathRef Path, ParsedAST &AST) { + MainFileIdx.update(Path, &AST.getASTContext(), AST.getPreprocessorPtr(), + AST.getLocalTopLevelDecls()); +} Index: clangd/Protocol.h =================================================================== --- clangd/Protocol.h +++ clangd/Protocol.h @@ -867,6 +867,17 @@ llvm::json::Value toJSON(const DocumentHighlight &DH); llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DocumentHighlight &); +struct ReferenceContext { + // Include the declaration of the current symbol. + bool includeDeclaration; +}; +bool fromJSON(const llvm::json::Value &, ReferenceContext &); + +struct ReferenceParams : public TextDocumentPositionParams { + ReferenceContext context; +}; +bool fromJSON(const llvm::json::Value &, ReferenceParams &); + } // namespace clangd } // namespace clang Index: clangd/Protocol.cpp =================================================================== --- clangd/Protocol.cpp +++ clangd/Protocol.cpp @@ -615,5 +615,16 @@ O.map("compilationDatabaseChanges", CCPC.compilationDatabaseChanges); } +bool fromJSON(const json::Value &Params, ReferenceContext &RC) { + json::ObjectMapper O(Params); + return O && O.map("includeDeclaration", RC.includeDeclaration); +} + +bool fromJSON(const json::Value &Params, ReferenceParams &R) { + json::ObjectMapper O(Params); + return O && O.map("context", R.context) && + O.map("textDocument", R.textDocument) && O.map("position", R.position); +} + } // namespace clangd } // namespace clang Index: clangd/ProtocolHandlers.h =================================================================== --- clangd/ProtocolHandlers.h +++ clangd/ProtocolHandlers.h @@ -55,6 +55,7 @@ virtual void onDocumentHighlight(TextDocumentPositionParams &Params) = 0; virtual void onHover(TextDocumentPositionParams &Params) = 0; virtual void onChangeConfiguration(DidChangeConfigurationParams &Params) = 0; + virtual void onReference(ReferenceParams &Params) = 0; }; void registerCallbackHandlers(JSONRPCDispatcher &Dispatcher, Index: clangd/ProtocolHandlers.cpp =================================================================== --- clangd/ProtocolHandlers.cpp +++ clangd/ProtocolHandlers.cpp @@ -75,4 +75,5 @@ Register("workspace/didChangeConfiguration", &ProtocolCallbacks::onChangeConfiguration); Register("workspace/symbol", &ProtocolCallbacks::onWorkspaceSymbol); + Register("textDocument/references", &ProtocolCallbacks::onReference); } Index: clangd/TUScheduler.h =================================================================== --- clangd/TUScheduler.h +++ clangd/TUScheduler.h @@ -51,6 +51,21 @@ unsigned MaxRetainedASTs = 3; }; +class ParsingCallbacks { +public: + virtual ~ParsingCallbacks() = default; + + /// Called on the AST that was built for emitting the preamble. The built AST + /// contains only AST nodes from the #include directives at the start of the + /// file. AST node in the current file should be observed on onMainAST call. + virtual void onPreambleAST(PathRef Path, ASTContext &Ctx, + std::shared_ptr PP) = 0; + /// Called on the AST built for the file itself. Note that preamble AST nodes + /// are not deserialized and should be processed in the onPreambleAST call + /// instead. + virtual void onMainAST(PathRef Path, ParsedAST &AST) = 0; +}; + /// Handles running tasks for ClangdServer and managing the resources (e.g., /// preambles and ASTs) for opened files. /// TUScheduler is not thread-safe, only one thread should be providing updates @@ -61,7 +76,7 @@ class TUScheduler { public: TUScheduler(unsigned AsyncThreadsCount, bool StorePreamblesInMemory, - PreambleParsedCallback PreambleCallback, + ParsingCallbacks& ASTCallbacks, std::chrono::steady_clock::duration UpdateDebounce, ASTRetentionPolicy RetentionPolicy); ~TUScheduler(); @@ -132,7 +147,7 @@ private: const bool StorePreamblesInMemory; const std::shared_ptr PCHOps; - const PreambleParsedCallback PreambleCallback; + ParsingCallbacks& Callbacks; Semaphore Barrier; llvm::StringMap> Files; std::unique_ptr IdleASTs; Index: clangd/TUScheduler.cpp =================================================================== --- clangd/TUScheduler.cpp +++ clangd/TUScheduler.cpp @@ -158,8 +158,7 @@ Semaphore &Barrier, bool RunSync, steady_clock::duration UpdateDebounce, std::shared_ptr PCHs, - bool StorePreamblesInMemory, - PreambleParsedCallback PreambleCallback); + bool StorePreamblesInMemory, ParsingCallbacks &Callbacks); public: /// Create a new ASTWorker and return a handle to it. @@ -173,7 +172,7 @@ steady_clock::duration UpdateDebounce, std::shared_ptr PCHs, bool StorePreamblesInMemory, - PreambleParsedCallback PreambleCallback); + ParsingCallbacks& Callbacks); ~ASTWorker(); void update(ParseInputs Inputs, WantDiagnostics, @@ -228,8 +227,8 @@ const Path FileName; /// Whether to keep the built preambles in memory or on disk. const bool StorePreambleInMemory; - /// Callback, passed to the preamble builder. - const PreambleParsedCallback PreambleCallback; + /// Callback, invoked when preamble or main file AST is built. + ParsingCallbacks& Callbacks; /// Helper class required to build the ASTs. const std::shared_ptr PCHs; @@ -299,10 +298,10 @@ steady_clock::duration UpdateDebounce, std::shared_ptr PCHs, bool StorePreamblesInMemory, - PreambleParsedCallback PreambleCallback) { + ParsingCallbacks& Callbacks) { std::shared_ptr Worker(new ASTWorker( FileName, IdleASTs, Barrier, /*RunSync=*/!Tasks, UpdateDebounce, - std::move(PCHs), StorePreamblesInMemory, std::move(PreambleCallback))); + std::move(PCHs), StorePreamblesInMemory, Callbacks)); if (Tasks) Tasks->runAsync("worker:" + llvm::sys::path::filename(FileName), [Worker]() { Worker->run(); }); @@ -315,10 +314,10 @@ steady_clock::duration UpdateDebounce, std::shared_ptr PCHs, bool StorePreamblesInMemory, - PreambleParsedCallback PreambleCallback) + ParsingCallbacks& Callbacks) : IdleASTs(LRUCache), RunSync(RunSync), UpdateDebounce(UpdateDebounce), FileName(FileName), StorePreambleInMemory(StorePreamblesInMemory), - PreambleCallback(std::move(PreambleCallback)), PCHs(std::move(PCHs)), + Callbacks(Callbacks), PCHs(std::move(PCHs)), Barrier(Barrier), Done(false) {} ASTWorker::~ASTWorker() { @@ -365,7 +364,11 @@ getPossiblyStalePreamble(); std::shared_ptr NewPreamble = buildPreamble(FileName, *Invocation, OldPreamble, OldCommand, Inputs, - PCHs, StorePreambleInMemory, PreambleCallback); + PCHs, StorePreambleInMemory, + [this](PathRef Path, ASTContext &Ctx, + std::shared_ptr PP) { + Callbacks.onPreambleAST(FileName, Ctx, std::move(PP)); + }); bool CanReuseAST = InputsAreTheSame && (OldPreamble == NewPreamble); { @@ -415,6 +418,7 @@ // Note *AST can be still be null if buildAST fails. if (*AST) { OnUpdated((*AST)->getDiagnostics()); + Callbacks.onMainAST(FileName, **AST); DiagsWereReported = true; } // Stash the AST in the cache for further use. @@ -627,12 +631,12 @@ TUScheduler::TUScheduler(unsigned AsyncThreadsCount, bool StorePreamblesInMemory, - PreambleParsedCallback PreambleCallback, + ParsingCallbacks &Callbacks, std::chrono::steady_clock::duration UpdateDebounce, ASTRetentionPolicy RetentionPolicy) : StorePreamblesInMemory(StorePreamblesInMemory), - PCHOps(std::make_shared()), - PreambleCallback(std::move(PreambleCallback)), Barrier(AsyncThreadsCount), + PCHOps(std::make_shared()), Callbacks(Callbacks), + Barrier(AsyncThreadsCount), IdleASTs(llvm::make_unique(RetentionPolicy.MaxRetainedASTs)), UpdateDebounce(UpdateDebounce) { if (0 < AsyncThreadsCount) { @@ -670,8 +674,7 @@ // Create a new worker to process the AST-related tasks. ASTWorkerHandle Worker = ASTWorker::create( File, *IdleASTs, WorkerThreads ? WorkerThreads.getPointer() : nullptr, - Barrier, UpdateDebounce, PCHOps, StorePreamblesInMemory, - PreambleCallback); + Barrier, UpdateDebounce, PCHOps, StorePreamblesInMemory, Callbacks); FD = std::unique_ptr(new FileData{ Inputs.Contents, Inputs.CompileCommand, std::move(Worker)}); } else { Index: clangd/XRefs.h =================================================================== --- clangd/XRefs.h +++ clangd/XRefs.h @@ -34,6 +34,11 @@ /// Get the hover information when hovering at \p Pos. llvm::Optional getHover(ParsedAST &AST, Position Pos); +/// Get references of symbol at a \p Pos. +std::vector references(ParsedAST &AST, Position Pos, + bool includeDeclaration, + const SymbolIndex *Index = nullptr); + } // namespace clangd } // namespace clang Index: clangd/XRefs.cpp =================================================================== --- clangd/XRefs.cpp +++ clangd/XRefs.cpp @@ -11,12 +11,14 @@ #include "Logger.h" #include "SourceCode.h" #include "URI.h" +#include "index/SymbolCollector.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexingAction.h" #include "clang/Index/USRGeneration.h" #include "llvm/Support/Path.h" + namespace clang { namespace clangd { using namespace llvm; @@ -660,5 +662,70 @@ return None; } +std::vector references(ParsedAST &AST, Position Pos, + bool IncludeDeclaration, + const SymbolIndex *Index) { + const SourceManager &SourceMgr = AST.getASTContext().getSourceManager(); + SourceLocation SourceLocationBeg = + getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID()); + // Identified symbols at a specific position. + auto Symbols = getSymbolAtPosition(AST, SourceLocationBeg); + // FIXME: support macros. + llvm::DenseSet IDs; + llvm::DenseSet NonLocalIDs; + for (const auto *D : Symbols.Decls) { + if (auto ID = getSymbolID(D)) { + llvm::errs() << "ID: " << *ID << ": " + << (index::isFunctionLocalSymbol(D) ? "Local\n" + : "Non-local\n"); + if (!clang::index::isFunctionLocalSymbol(D)) + NonLocalIDs.insert(*ID); + IDs.insert(*ID); + } + } + SymbolOccurrenceKind Filter = + SymbolOccurrenceKind::Reference | SymbolOccurrenceKind::Definition; + if (IncludeDeclaration) + Filter |= SymbolOccurrenceKind::Declaration; + SymbolCollector::Options::CollectOccurrenceOptions Opts; + Opts.Filter = Filter; + Opts.IDs = std::move(IDs); + + SymbolCollector Collector({nullptr, &Opts}, {}); + index::IndexingOptions IndexOpts; + IndexOpts.SystemSymbolFilter = + index::IndexingOptions::SystemSymbolFilterKind::All; + IndexOpts.IndexFunctionLocals = true; + // Only find references for the current main file. + indexTopLevelDecls(AST.getASTContext(), AST.getLocalTopLevelDecls(), + Collector, IndexOpts); + + std::vector Results; + llvm::DenseSet SeenURIs; + auto Storage = Collector.takeSymbols(); + for (auto &O : Storage.takeAllOccurences()) { + if (auto LSPLoc = toLSPLocation(O.Location, "")) { + Results.push_back(*LSPLoc); + SeenURIs.insert(LSPLoc->uri.uri()); + } + } + + // Query index for non-local symbols. + if (Index && !NonLocalIDs.empty()) { + log("query index for references"); + OccurrencesRequest R; + R.IDs = std::move(NonLocalIDs); + R.Filter = Filter; + Index->findOccurrences(R, [&](const SymbolOccurrence &O) { + log("index: find occ: {0}", O.Location); + if (auto LSPLoc = toLSPLocation(O.Location, "")) { + if (!llvm::is_contained(SeenURIs, O.Location.FileURI)) + Results.push_back(*LSPLoc); + } + }); + } + return Results; +} + } // namespace clangd } // namespace clang Index: clangd/index/FileIndex.h =================================================================== --- clangd/index/FileIndex.h +++ clangd/index/FileIndex.h @@ -46,6 +46,8 @@ // The shared_ptr keeps the symbols alive std::shared_ptr> allSymbols(); + std::vector> allSlabs() const; + private: mutable std::mutex Mutex; @@ -64,7 +66,11 @@ /// nullptr, this removes all symbols in the file. /// If \p AST is not null, \p PP cannot be null and it should be the /// preprocessor that was used to build \p AST. - void update(PathRef Path, ASTContext *AST, std::shared_ptr PP); + /// If \p TopLevelDecls is set, only these decls are indexed. Otherwise, all + /// top level decls obtained from \p AST are indexed. + void + update(PathRef Path, ASTContext *AST, std::shared_ptr PP, + llvm::Optional> TopLevelDecls = llvm::None); bool fuzzyFind(const FuzzyFindRequest &Req, @@ -86,8 +92,12 @@ /// Retrieves namespace and class level symbols in \p AST. /// Exposed to assist in unit tests. /// If URISchemes is empty, the default schemes in SymbolCollector will be used. -SymbolSlab indexAST(ASTContext &AST, std::shared_ptr PP, - llvm::ArrayRef URISchemes = {}); +/// If \p TopLevelDecls is set, only these decls are indexed. Otherwise, all top +/// level decls obtained from \p AST are indexed. +SymbolSlab +indexAST(ASTContext &AST, std::shared_ptr PP, + llvm::Optional> TopLevelDecls = llvm::None, + llvm::ArrayRef URISchemes = {}); } // namespace clangd } // namespace clang Index: clangd/index/FileIndex.cpp =================================================================== --- clangd/index/FileIndex.cpp +++ clangd/index/FileIndex.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "FileIndex.h" -#include "SymbolCollector.h" #include "../Logger.h" +#include "SymbolCollector.h" #include "clang/Index/IndexingAction.h" #include "clang/Lex/Preprocessor.h" @@ -17,8 +17,31 @@ namespace clangd { SymbolSlab indexAST(ASTContext &AST, std::shared_ptr PP, + llvm::Optional> TopLevelDecls, llvm::ArrayRef URISchemes) { - SymbolCollector::Options CollectorOpts; + log("index ast for {0}", TopLevelDecls ? "Main AST" : "preamble AST"); + if (TopLevelDecls) { // Index main AST. + // Collect references. + SymbolCollector::Options::CollectOccurrenceOptions Opts; + Opts.Filter = SymbolOccurrenceKind::Declaration | + SymbolOccurrenceKind::Definition | + SymbolOccurrenceKind::Reference; + SymbolCollector Collector({nullptr, &Opts}, URISchemes); + + Collector.setPreprocessor(PP); + index::IndexingOptions IndexOpts; + // We only need declarations, because we don't count references. + IndexOpts.SystemSymbolFilter = + index::IndexingOptions::SystemSymbolFilterKind::None; + IndexOpts.IndexFunctionLocals = false; + + index::indexTopLevelDecls(AST, *TopLevelDecls, Collector, IndexOpts); + auto S = Collector.takeSymbols(); + log("references slab size: {0} bytes", S.bytes()); + return S; + } + + SymbolCollector::Options::CollectSymbolOptions CollectorOpts; // FIXME(ioeric): we might also want to collect include headers. We would need // to make sure all includes are canonicalized (with CanonicalIncludes), which // is not trivial given the current way of collecting symbols: we only have @@ -26,11 +49,10 @@ // CommentHandler for IWYU pragma) to canonicalize includes. CollectorOpts.CollectIncludePath = false; CollectorOpts.CountReferences = false; - if (!URISchemes.empty()) - CollectorOpts.URISchemes = URISchemes; CollectorOpts.Origin = SymbolOrigin::Dynamic; - SymbolCollector Collector(std::move(CollectorOpts)); + SymbolCollector::Options Opts = { &CollectorOpts, nullptr}; + SymbolCollector Collector(std::move(Opts), URISchemes); Collector.setPreprocessor(PP); index::IndexingOptions IndexOpts; // We only need declarations, because we don't count references. @@ -38,12 +60,17 @@ index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly; IndexOpts.IndexFunctionLocals = false; - std::vector TopLevelDecls( - AST.getTranslationUnitDecl()->decls().begin(), - AST.getTranslationUnitDecl()->decls().end()); - index::indexTopLevelDecls(AST, TopLevelDecls, Collector, IndexOpts); + std::vector Storage; + if (!TopLevelDecls) { + Storage.assign(AST.getTranslationUnitDecl()->decls().begin(), + AST.getTranslationUnitDecl()->decls().end()); + TopLevelDecls = Storage; + } + index::indexTopLevelDecls(AST, *TopLevelDecls, Collector, IndexOpts); - return Collector.takeSymbols(); + auto S = Collector.takeSymbols(); + log("symbol slab size: {0} bytes", S.bytes()); + return S; } FileIndex::FileIndex(std::vector URISchemes) @@ -80,14 +107,25 @@ return {std::move(Snap), Pointers}; } +std::vector> FileSymbols::allSlabs() const { + std::lock_guard Lock(Mutex); + + std::vector> Slabs; + for (const auto &FileAndSlab : FileToSlabs) { + Slabs.push_back(FileAndSlab.second); + } + return Slabs; +} + void FileIndex::update(PathRef Path, ASTContext *AST, - std::shared_ptr PP) { + std::shared_ptr PP, + llvm::Optional> TopLevelDecls) { if (!AST) { FSymbols.update(Path, nullptr); } else { assert(PP); auto Slab = llvm::make_unique(); - *Slab = indexAST(*AST, PP, URISchemes); + *Slab = indexAST(*AST, PP, TopLevelDecls, URISchemes); FSymbols.update(Path, std::move(Slab)); } auto Symbols = FSymbols.allSymbols(); @@ -109,7 +147,19 @@ void FileIndex::findOccurrences( const OccurrencesRequest &Req, llvm::function_ref Callback) const { - log("findOccurrences is not implemented."); + auto Snapshot = FSymbols.allSlabs(); + + log("fileIndex: findOccurrences: {0}", Snapshot.size()); + for (const auto &Slab : Snapshot) { + for (const auto &ID : Req.IDs) { + for (const auto &Occurrence : Slab->findOccurrences(ID)) { + log("fileIndx: find occ: ", Occurrence); + if (static_cast(Req.Filter & Occurrence.Kind)) { + Callback(Occurrence); + } + } + } + } } } // namespace clangd Index: clangd/index/Index.h =================================================================== --- clangd/index/Index.h +++ clangd/index/Index.h @@ -17,6 +17,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/StringSaver.h" #include #include @@ -30,9 +31,6 @@ uint32_t Line = 0; // 0-based // Using UTF-16 code units. uint32_t Column = 0; // 0-based - bool operator==(const Position& P) const { - return Line == P.Line && Column == P.Column; - } }; // The URI of the source file where a symbol occurs. @@ -43,11 +41,25 @@ Position End; explicit operator bool() const { return !FileURI.empty(); } - bool operator==(const SymbolLocation& Loc) const { - return std::tie(FileURI, Start, End) == - std::tie(Loc.FileURI, Loc.Start, Loc.End); - } }; +inline bool operator==(const SymbolLocation::Position &L, + const SymbolLocation::Position &R){ + return std::tie(L.Line, L.Column) == std::tie(R.Line, R.Column); +} +inline bool operator<(const SymbolLocation::Position &L, + const SymbolLocation::Position &R){ + return std::tie(L.Line, L.Column) < std::tie(R.Line, R.Column); +} +inline bool operator==(const SymbolLocation&L, + const SymbolLocation&R){ + return std::tie(L.FileURI, L.Start, L.End) == + std::tie(R.FileURI, R.Start, R.End); +} +inline bool operator<(const SymbolLocation&L, + const SymbolLocation&R){ + return std::tie(L.FileURI, L.Start, L.End) < + std::tie(R.FileURI, R.Start, R.End); +} llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &); // The class identifies a particular C++ symbol (class, function, method, etc). @@ -233,6 +245,49 @@ // and signals -> score, so it can be reused for Sema completions. double quality(const Symbol &S); +// Describes the kind of a symbol occurrence. +// +// This is a bitfield which can be combined from different kinds. +enum class SymbolOccurrenceKind : uint8_t { + Unknown = 0, + Declaration = static_cast(index::SymbolRole::Declaration), + Definition = static_cast(index::SymbolRole::Definition), + Reference = static_cast(index::SymbolRole::Reference), +}; +inline SymbolOccurrenceKind operator|(SymbolOccurrenceKind L, + SymbolOccurrenceKind R) { + return static_cast(static_cast(L) | + static_cast(R)); +} +inline SymbolOccurrenceKind &operator|=(SymbolOccurrenceKind &L, + SymbolOccurrenceKind R) { + return L = L | R; +} +inline SymbolOccurrenceKind operator&(SymbolOccurrenceKind A, + SymbolOccurrenceKind B) { + return static_cast(static_cast(A) & + static_cast(B)); +} +raw_ostream &operator<<(raw_ostream &OS, SymbolOccurrenceKind K); + +// Represents a symbol occurrence in the source file. It could be a +// declaration/definition/reference occurrence. +// +// WARNING: Location does not own the underlying data - Copies are shallow. +struct SymbolOccurrence { + // The location of the occurrence. + SymbolLocation Location; + SymbolOccurrenceKind Kind = SymbolOccurrenceKind::Unknown; +}; +inline bool operator<(const SymbolOccurrence &L, const SymbolOccurrence &R) { + return std::tie(L.Location, L.Kind) < std::tie(R.Location, R.Kind); +} +inline bool operator==(const SymbolOccurrence &L, const SymbolOccurrence &R) { + return std::tie(L.Location, L.Kind) == std::tie(R.Location, R.Kind); +} +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SymbolOccurrence &Occurrence); + // An immutable symbol container that stores a set of symbols. // The container will maintain the lifetime of the symbols. class SymbolSlab { @@ -250,7 +305,24 @@ // Estimates the total memory usage. size_t bytes() const { return sizeof(*this) + Arena.getTotalMemory() + - Symbols.capacity() * sizeof(Symbol); + Symbols.capacity() * sizeof(Symbol) + + SymbolOccurrences.getMemorySize(); + } + + llvm::ArrayRef findOccurrences(const SymbolID &ID) const { + auto It = SymbolOccurrences.find(ID); + if (It == SymbolOccurrences.end()) + return {}; + return It->second; + } + + std::vector takeAllOccurences() { + std::vector R; + for (auto &It : SymbolOccurrences) { + R.insert(R.end(), make_move_iterator(It.second.begin()), + make_move_iterator(It.second.end())); + } + return R; } // SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab. @@ -267,6 +339,8 @@ return I == SymbolIndex.end() ? nullptr : &Symbols[I->second]; } + void insert(const SymbolID& ID, SymbolOccurrence Occurrence); + // Consumes the builder to finalize the slab. SymbolSlab build() &&; @@ -277,48 +351,55 @@ std::vector Symbols; // Values are indices into Symbols vector. llvm::DenseMap SymbolIndex; + // FIXME: we can do more optimization to reduce memory usage, group by + // FileName, instead of storing a StringRef for each occurrence object. + llvm::DenseMap> SymbolOccurrences; }; private: - SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector Symbols) - : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} + SymbolSlab( + llvm::BumpPtrAllocator Arena, std::vector Symbols, + llvm::DenseMap> SymbolOccurrences) + : Arena(std::move(Arena)), Symbols(std::move(Symbols)), + SymbolOccurrences(SymbolOccurrences) {} llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. std::vector Symbols; // Sorted by SymbolID to allow lookup. + llvm::DenseMap> SymbolOccurrences; }; -// Describes the kind of a symbol occurrence. -// -// This is a bitfield which can be combined from different kinds. -enum class SymbolOccurrenceKind : uint8_t { - Unknown = 0, - Declaration = static_cast(index::SymbolRole::Declaration), - Definition = static_cast(index::SymbolRole::Definition), - Reference = static_cast(index::SymbolRole::Reference), -}; -inline SymbolOccurrenceKind operator|(SymbolOccurrenceKind L, - SymbolOccurrenceKind R) { - return static_cast(static_cast(L) | - static_cast(R)); -} -inline SymbolOccurrenceKind &operator|=(SymbolOccurrenceKind &L, - SymbolOccurrenceKind R) { - return L = L | R; -} -inline SymbolOccurrenceKind operator&(SymbolOccurrenceKind A, - SymbolOccurrenceKind B) { - return static_cast(static_cast(A) & - static_cast(B)); -} -// Represents a symbol occurrence in the source file. It could be a -// declaration/definition/reference occurrence. -// -// WARNING: Location does not own the underlying data - Copies are shallow. -struct SymbolOccurrence { - // The location of the occurrence. - SymbolLocation Location; - SymbolOccurrenceKind Kind = SymbolOccurrenceKind::Unknown; +// An efficient structure of storing large set of symbol occurrences in memory. +// Filenames are deduplicated. +class SymbolOccurrenceSlab { + public: + using const_iterator = + llvm::DenseMap>::const_iterator; + using iterator = const_iterator; + + SymbolOccurrenceSlab() : UniqueStrings(Arena) {} + + const_iterator begin() const { return Occurrences.begin(); } + const_iterator end() const { return Occurrences.end(); } + + // Adds a symbol occurrence. + // This is a deep copy: underlying FileURI will be owned by the slab. + void insert(const SymbolID &SymID, const SymbolOccurrence &Occurrence); + + llvm::ArrayRef find(const SymbolID &ID) const { + auto It = Occurrences.find(ID); + if (It == Occurrences.end()) + return {}; + return It->second; + } + + void freeze(); + +private: + bool Frozen = false; + llvm::BumpPtrAllocator Arena; + llvm::UniqueStringSaver UniqueStrings; + llvm::DenseMap> Occurrences; }; struct FuzzyFindRequest { Index: clangd/index/Index.cpp =================================================================== --- clangd/index/Index.cpp +++ clangd/index/Index.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Index.h" +#include "../Logger.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" @@ -76,6 +77,19 @@ return Symbols.end(); } +static void own(SymbolOccurrence &O, DenseSet& Strings, + BumpPtrAllocator &Arena) { + // Intern replaces V with a reference to the same string owned by the arena. + auto Intern = [&](StringRef &V) { + auto R = Strings.insert(V); + if (R.second) { // New entry added to the table, copy the string. + *R.first = V.copy(Arena); + } + V = *R.first; + }; + Intern(O.Location.FileURI); +} + // Copy the underlying data of the symbol into the owned arena. static void own(Symbol &S, DenseSet &Strings, BumpPtrAllocator &Arena) { @@ -121,6 +135,11 @@ } } +void SymbolSlab::Builder::insert(const SymbolID &ID, SymbolOccurrence Occurrence) { + own(Occurrence, Strings, Arena); + SymbolOccurrences[ID].push_back(std::move(Occurrence)); +} + SymbolSlab SymbolSlab::Builder::build() && { Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit. // Sort symbols so the slab can binary search over them. @@ -131,7 +150,71 @@ DenseSet Strings; for (auto &S : Symbols) own(S, Strings, NewArena); - return SymbolSlab(std::move(NewArena), std::move(Symbols)); + + int count = 0; + // We may have duplicated symbol occurrences. Deduplicate them. + for (auto &IDAndOccurrence : SymbolOccurrences) { + auto &Occurrence = IDAndOccurrence.getSecond(); + std::sort(Occurrence.begin(), Occurrence.end(), + [](const SymbolOccurrence &L, const SymbolOccurrence &R) { + return L < R; + }); + Occurrence.erase( + std::unique(Occurrence.begin(), Occurrence.end(), + [](const SymbolOccurrence &L, const SymbolOccurrence &R) { + return L == R; + }), + Occurrence.end()); + + for (auto &O : Occurrence) + own(O, Strings, NewArena); + count += Occurrence.size(); + } + log("ReferencesInfo: {0} different symbols, {1} total occurrences.", + SymbolOccurrences.size(), count); + log("SymbolInfo: {0} differenct symbols.", Symbols.size()); + + return SymbolSlab(std::move(NewArena), std::move(Symbols), + std::move(SymbolOccurrences)); +} + +raw_ostream &operator<<(raw_ostream &OS, SymbolOccurrenceKind K) { + OS << static_cast(K); + return OS; +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SymbolOccurrence &Occurrence) { + OS << Occurrence.Location << ":" << Occurrence.Kind; + return OS; +} + +void SymbolOccurrenceSlab::insert(const SymbolID &SymID, + const SymbolOccurrence &Occurrence) { + assert(!Frozen && + "Can't insert a symbol occurrence after the slab has been frozen!"); + auto& SymOccurrences = Occurrences[SymID]; + SymOccurrences.push_back(Occurrence); + SymOccurrences.back().Location.FileURI = + UniqueStrings.save(Occurrence.Location.FileURI); +} + +void SymbolOccurrenceSlab::freeze() { + // We may have duplicated symbol occurrences. Deduplicate them. + for (auto &IDAndOccurrence : Occurrences) { + auto &Occurrence = IDAndOccurrence.getSecond(); + std::sort(Occurrence.begin(), Occurrence.end(), + [](const SymbolOccurrence &L, const SymbolOccurrence &R) { + return L < R; + }); + Occurrence.erase( + std::unique(Occurrence.begin(), Occurrence.end(), + [](const SymbolOccurrence &L, const SymbolOccurrence &R) { + return L == R; + }), + Occurrence.end()); + } + Frozen = true; } } // namespace clangd Index: clangd/index/MemIndex.cpp =================================================================== --- clangd/index/MemIndex.cpp +++ clangd/index/MemIndex.cpp @@ -90,7 +90,8 @@ void MemIndex::findOccurrences( const OccurrencesRequest &Req, llvm::function_ref Callback) const { - log("findOccurrences is not implemented."); + assert( + "find Occurrences is not support in the index, and should not be called"); } } // namespace clangd Index: clangd/index/Merge.cpp =================================================================== --- clangd/index/Merge.cpp +++ clangd/index/Merge.cpp @@ -81,7 +81,15 @@ void findOccurrences(const OccurrencesRequest &Req, llvm::function_ref Callback) const override { - log("findOccurrences is not implemented."); + llvm::DenseSet SeenURIs; + Dynamic->findOccurrences(Req, [&](const SymbolOccurrence &O) { + SeenURIs.insert(O.Location.FileURI); + Callback(O); + }); + Static->findOccurrences(Req, [&](const SymbolOccurrence &O) { + if (!llvm::is_contained(SeenURIs, O.Location.FileURI)) + Callback(O); + }); } private: Index: clangd/index/SymbolCollector.h =================================================================== --- clangd/index/SymbolCollector.h +++ clangd/index/SymbolCollector.h @@ -18,6 +18,7 @@ namespace clang { namespace clangd { + /// \brief Collect declarations (symbols) from an AST. /// It collects most declarations except: /// - Implicit declarations @@ -37,42 +38,48 @@ class SymbolCollector : public index::IndexDataConsumer { public: struct Options { - /// When symbol paths cannot be resolved to absolute paths (e.g. files in - /// VFS that does not have absolute path), combine the fallback directory - /// with symbols' paths to get absolute paths. This must be an absolute - /// path. - std::string FallbackDir; - /// Specifies URI schemes that can be used to generate URIs for file paths - /// in symbols. The list of schemes will be tried in order until a working - /// scheme is found. If no scheme works, symbol location will be dropped. - std::vector URISchemes = {"file"}; - bool CollectIncludePath = false; - /// If set, this is used to map symbol #include path to a potentially - /// different #include path. - const CanonicalIncludes *Includes = nullptr; - // Populate the Symbol.References field. - bool CountReferences = false; - // Every symbol collected will be stamped with this origin. - SymbolOrigin Origin = SymbolOrigin::Unknown; - /// Collect macros. - /// Note that SymbolCollector must be run with preprocessor in order to - /// collect macros. For example, `indexTopLevelDecls` will not index any - /// macro even if this is true. - bool CollectMacro = false; + struct CollectSymbolOptions { + /// When symbol paths cannot be resolved to absolute paths (e.g. files in + /// VFS that does not have absolute path), combine the fallback directory + /// with symbols' paths to get absolute paths. This must be an absolute + /// path. + std::string FallbackDir; + bool CollectIncludePath = false; + /// If set, this is used to map symbol #include path to a potentially + /// different #include path. + const CanonicalIncludes *Includes = nullptr; + // Populate the Symbol.References field. + bool CountReferences = false; + // Every symbol collected will be stamped with this origin. + SymbolOrigin Origin = SymbolOrigin::Unknown; + /// Collect macros. + /// Note that SymbolCollector must be run with preprocessor in order to + /// collect macros. For example, `indexTopLevelDecls` will not index any + /// macro even if this is true. + bool CollectMacro = false; + }; + struct CollectOccurrenceOptions { + // The symbol occurrence kind that will be collected. + SymbolOccurrenceKind Filter; + // A whitelist symbols which will be collected. + // If none, all symbol occurrences will be collected. + llvm::Optional> IDs = llvm::None; + }; + + const CollectSymbolOptions* SymbolOpts; + const CollectOccurrenceOptions* OccurrenceOpts; }; - SymbolCollector(Options Opts); + SymbolCollector(Options Opts, std::vector URISchemes); + ~SymbolCollector(); /// Returns true is \p ND should be collected. /// AST matchers require non-const ASTContext. - static bool shouldCollectSymbol(const NamedDecl &ND, ASTContext &ASTCtx, - const Options &Opts); + static bool shouldCollectSymbol(const NamedDecl &ND, ASTContext &ASTCtx); void initialize(ASTContext &Ctx) override; - void setPreprocessor(std::shared_ptr PP) override { - this->PP = std::move(PP); - } + void setPreprocessor(std::shared_ptr PP) override; bool handleDeclOccurence(const Decl *D, index::SymbolRoleSet Roles, @@ -89,25 +96,22 @@ void finish() override; private: - const Symbol *addDeclaration(const NamedDecl &, SymbolID); - void addDefinition(const NamedDecl &, const Symbol &DeclSymbol); + Options Opts; + + /// Specifies URI schemes that can be used to generate URIs for file paths + /// in symbols. The list of schemes will be tried in order until a working + /// scheme is found. If no scheme works, symbol location will be dropped. + std::vector URISchemes = {"file"}; - // All Symbols collected from the AST. - SymbolSlab::Builder Symbols; - ASTContext *ASTCtx; std::shared_ptr PP; - std::shared_ptr CompletionAllocator; - std::unique_ptr CompletionTUInfo; - Options Opts; - // Symbols referenced from the current TU, flushed on finish(). - llvm::DenseSet ReferencedDecls; - llvm::DenseSet ReferencedMacros; - // Maps canonical declaration provided by clang to canonical declaration for - // an index symbol, if clangd prefers a different declaration than that - // provided by clang. For example, friend declaration might be considered - // canonical by clang but should not be considered canonical in the index - // unless it's a definition. - llvm::DenseMap CanonicalDecls; + + ASTContext *ASTCtx; + + class CollectSymbol; + class CollectOccurrence; + std::unique_ptr CollectSym; + std::unique_ptr CollectOccu; + SymbolSlab::Builder Symbols; }; } // namespace clangd Index: clangd/index/SymbolCollector.cpp =================================================================== --- clangd/index/SymbolCollector.cpp +++ clangd/index/SymbolCollector.cpp @@ -46,8 +46,10 @@ // // The Path can be a path relative to the build directory, or retrieved from // the SourceManager. -llvm::Optional toURI(const SourceManager &SM, StringRef Path, - const SymbolCollector::Options &Opts) { +llvm::Optional +toURI(const SourceManager &SM, StringRef Path, + ArrayRef URISchemes, + llvm::StringRef FallbackDir) { llvm::SmallString<128> AbsolutePath(Path); if (std::error_code EC = SM.getFileManager().getVirtualFileSystem()->makeAbsolute( @@ -73,14 +75,14 @@ llvm::sys::path::filename(AbsolutePath.str())); AbsolutePath = AbsoluteFilename; } - } else if (!Opts.FallbackDir.empty()) { - llvm::sys::fs::make_absolute(Opts.FallbackDir, AbsolutePath); + } else if (!FallbackDir.empty()) { + llvm::sys::fs::make_absolute(FallbackDir, AbsolutePath); } llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true); std::string ErrMsg; - for (const auto &Scheme : Opts.URISchemes) { + for (const auto &Scheme : URISchemes) { auto U = URI::create(AbsolutePath, Scheme); if (U) return U->toString(); @@ -157,7 +159,9 @@ /// Returns None if fails to get include header for \p Loc. llvm::Optional getIncludeHeader(llvm::StringRef QName, const SourceManager &SM, - SourceLocation Loc, const SymbolCollector::Options &Opts) { + SourceLocation Loc, + ArrayRef URISchemes, + const SymbolCollector::Options::CollectSymbolOptions &Opts) { std::vector Headers; // Collect the #include stack. while (true) { @@ -179,16 +183,17 @@ if (Header.startswith("<") || Header.startswith("\"")) return Header.str(); } - return toURI(SM, Header, Opts); + return toURI(SM, Header, URISchemes, Opts.FallbackDir); } // Return the symbol location of the token at \p Loc. llvm::Optional getTokenLocation(SourceLocation TokLoc, const SourceManager &SM, - const SymbolCollector::Options &Opts, + ArrayRef URISchemes, + llvm::StringRef FallbackDir, const clang::LangOptions &LangOpts, std::string &FileURIStorage) { - auto U = toURI(SM, SM.getFilename(TokLoc), Opts); + auto U = toURI(SM, SM.getFilename(TokLoc), URISchemes, FallbackDir); if (!U) return llvm::None; FileURIStorage = std::move(*U); @@ -224,20 +229,155 @@ match(decl(isExpansionInMainFile()), ND, ND.getASTContext()).empty(); } +SymbolOccurrenceKind ToOccurrenceKind(index::SymbolRoleSet Roles) { + SymbolOccurrenceKind Kind; + for (auto Mask : {SymbolOccurrenceKind::Declaration, + SymbolOccurrenceKind::Definition, + SymbolOccurrenceKind::Reference}) { + if (Roles & static_cast(Mask)) + Kind |= Mask; + } + return Kind; +} + } // namespace -SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {} +class SymbolCollector::CollectSymbol { + public: + CollectSymbol(const SymbolCollector::Options::CollectSymbolOptions &Opts, + ArrayRef URISchemes, + SymbolSlab::Builder* Builder) + : Opts(Opts), URISchemes(URISchemes), Symbols(Builder) { + } + + void collectDecl(const Decl *D, index::SymbolRoleSet Roles, + ArrayRef Relations, + SourceLocation Loc, + index::IndexDataConsumer::ASTNodeInfo ASTNode); + + void collectMacro(const IdentifierInfo *Name, const MacroInfo *MI, + index::SymbolRoleSet Roles, SourceLocation Loc); + + void initialize(ASTContext &Ctx) { + ASTCtx = &Ctx; + CompletionAllocator = std::make_shared(); + CompletionTUInfo = + llvm::make_unique(CompletionAllocator); + } + + void setPreprocessor(std::shared_ptr PP) { + this->PP = std::move(PP); + } + + void finish(); + + private: + const Symbol *addDeclaration(const NamedDecl &, SymbolID); + void addDefinition(const NamedDecl & ND, const Symbol &DeclSym); + + const SymbolCollector::Options::CollectSymbolOptions& Opts; + ArrayRef URISchemes; + + SymbolSlab::Builder* Symbols; + ASTContext *ASTCtx; + + std::shared_ptr PP; + std::shared_ptr CompletionAllocator; + std::unique_ptr CompletionTUInfo; + // Symbols referenced from the current TU, flushed on finish(). + llvm::DenseSet ReferencedDecls; + llvm::DenseSet ReferencedMacros; + // Maps canonical declaration provided by clang to canonical declaration for + // an index symbol, if clangd prefers a different declaration than that + // provided by clang. For example, friend declaration might be considered + // canonical by clang but should not be considered canonical in the index + // unless it's a definition. + llvm::DenseMap CanonicalDecls; +}; + +SymbolCollector::SymbolCollector(Options Opts, + std::vector URISchemes) + : Opts(std::move(Opts)) { + if (!URISchemes.empty()) + this->URISchemes = std::move(URISchemes); + if (Opts.SymbolOpts) { + CollectSym = llvm::make_unique(*Opts.SymbolOpts, + this->URISchemes, &Symbols); + } + if (Opts.OccurrenceOpts) { + CollectOccu = llvm::make_unique( + *Opts.OccurrenceOpts, this->URISchemes, &Symbols); + } +} + +SymbolCollector::~SymbolCollector() {} + +class SymbolCollector::CollectOccurrence { +public: + CollectOccurrence( + const SymbolCollector::Options::CollectOccurrenceOptions &Opts, + ArrayRef URISchemes, SymbolSlab::Builder *Builder) + : Opts(Opts), URISchemes(URISchemes), Builder(Builder) {} + + void initialize(ASTContext &Ctx) { + ASTCtx = &Ctx; + } + + void collectDecl(const Decl *D, index::SymbolRoleSet Roles, + ArrayRef Relations, + SourceLocation Loc, + index::IndexDataConsumer::ASTNodeInfo ASTNode) { + assert(ASTCtx && "ASTContext must be set."); + if (D->isImplicit()) + return; + + if (!ASTCtx->getSourceManager().isInMainFile(Loc)) + return; + std::string FileURI; + auto AddOccurrence = [&](SourceLocation L, const SymbolID &ID) { + if (auto Location = + getTokenLocation(Loc, ASTCtx->getSourceManager(), URISchemes, "", + ASTCtx->getLangOpts(), FileURI)) { + log("add occurrences: {0}", *Location); + SymbolOccurrence Occurrence; + Occurrence.Location = *Location; + Occurrence.Kind = ToOccurrenceKind(Roles); + Builder->insert(ID, Occurrence); + } + }; + if (static_cast(Opts.Filter) & Roles) { + if (auto ID = getSymbolID(D)) { + if (!Opts.IDs || llvm::is_contained(*Opts.IDs, *ID)) { + AddOccurrence(Loc, *ID); + } + } + } + } + +private: + const SymbolCollector::Options::CollectOccurrenceOptions& Opts; + ArrayRef URISchemes; + + SymbolSlab::Builder* Builder; + ASTContext *ASTCtx; +}; + void SymbolCollector::initialize(ASTContext &Ctx) { - ASTCtx = &Ctx; - CompletionAllocator = std::make_shared(); - CompletionTUInfo = - llvm::make_unique(CompletionAllocator); + if (CollectSym) + CollectSym->initialize(Ctx); + if (CollectOccu) + CollectOccu->initialize(Ctx); +} + +void SymbolCollector::setPreprocessor(std::shared_ptr PP) { + if (CollectSym) { + CollectSym->setPreprocessor(PP); + } } bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND, - ASTContext &ASTCtx, - const Options &Opts) { + ASTContext &ASTCtx) { using namespace clang::ast_matchers; if (ND.isImplicit()) return false; @@ -282,7 +422,7 @@ } // Always return true to continue indexing. -bool SymbolCollector::handleDeclOccurence( +void SymbolCollector::CollectSymbol::collectDecl( const Decl *D, index::SymbolRoleSet Roles, ArrayRef Relations, SourceLocation Loc, index::IndexDataConsumer::ASTNodeInfo ASTNode) { @@ -295,7 +435,7 @@ if ((ASTNode.OrigD->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None) && !(Roles & static_cast(index::SymbolRole::Definition))) - return true; + return; // A declaration created for a friend declaration should not be used as the // canonical declaration in the index. Use OrigD instead, unless we've already // picked a replacement for D @@ -303,7 +443,7 @@ D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second; const NamedDecl *ND = llvm::dyn_cast(D); if (!ND) - return true; + return; // Mark D as referenced if this is a reference coming from the main file. // D may not be an interesting symbol, but it's cheaper to check at the end. @@ -316,16 +456,16 @@ // Don't continue indexing if this is a mere reference. if (!(Roles & static_cast(index::SymbolRole::Declaration) || Roles & static_cast(index::SymbolRole::Definition))) - return true; - if (!shouldCollectSymbol(*ND, *ASTCtx, Opts)) - return true; + return; + if (!shouldCollectSymbol(*ND, *ASTCtx)) + return; auto ID = getSymbolID(ND); if (!ID) - return true; + return; const NamedDecl &OriginalDecl = *cast(ASTNode.OrigD); - const Symbol *BasicSymbol = Symbols.find(*ID); + const Symbol *BasicSymbol = Symbols->find(*ID); if (!BasicSymbol) // Regardless of role, ND is the canonical declaration. BasicSymbol = addDeclaration(*ND, std::move(*ID)); else if (isPreferredDeclaration(OriginalDecl, Roles)) @@ -337,24 +477,23 @@ if (Roles & static_cast(index::SymbolRole::Definition)) addDefinition(OriginalDecl, *BasicSymbol); - return true; } -bool SymbolCollector::handleMacroOccurence(const IdentifierInfo *Name, - const MacroInfo *MI, - index::SymbolRoleSet Roles, - SourceLocation Loc) { +void SymbolCollector::CollectSymbol::collectMacro(const IdentifierInfo *Name, + const MacroInfo *MI, + index::SymbolRoleSet Roles, + SourceLocation Loc) { if (!Opts.CollectMacro) - return true; + return ; assert(PP.get()); const auto &SM = PP->getSourceManager(); if (SM.isInMainFile(SM.getExpansionLoc(MI->getDefinitionLoc()))) - return true; + return ; // Header guards are not interesting in index. Builtin macros don't have // useful locations and are not needed for code completions. if (MI->isUsedForHeaderGuard() || MI->isBuiltinMacro()) - return true; + return ; // Mark the macro as referenced if this is a reference coming from the main // file. The macro may not be an interesting symbol, but it's cheaper to check @@ -367,17 +506,17 @@ // FIXME: remove macro with ID if it is undefined. if (!(Roles & static_cast(index::SymbolRole::Declaration) || Roles & static_cast(index::SymbolRole::Definition))) - return true; + return; llvm::SmallString<128> USR; if (index::generateUSRForMacro(Name->getName(), MI->getDefinitionLoc(), SM, USR)) - return true; + return; SymbolID ID(USR); // Only collect one instance in case there are multiple. - if (Symbols.find(ID) != nullptr) - return true; + if (Symbols->find(ID) != nullptr) + return; Symbol S; S.ID = std::move(ID); @@ -385,8 +524,10 @@ S.IsIndexedForCodeCompletion = true; S.SymInfo = index::getSymbolInfoForMacro(*MI); std::string FileURI; - if (auto DeclLoc = getTokenLocation(MI->getDefinitionLoc(), SM, Opts, - PP->getLangOpts(), FileURI)) + + if (auto DeclLoc = + getTokenLocation(MI->getDefinitionLoc(), SM, URISchemes, + Opts.FallbackDir, PP->getLangOpts(), FileURI)) S.CanonicalDeclaration = *DeclLoc; CodeCompletionResult SymbolCompletion(Name); @@ -398,9 +539,9 @@ std::string Include; if (Opts.CollectIncludePath && shouldCollectIncludePath(S.SymInfo.Kind)) { - if (auto Header = - getIncludeHeader(Name->getName(), SM, - SM.getExpansionLoc(MI->getDefinitionLoc()), Opts)) + if (auto Header = getIncludeHeader( + Name->getName(), SM, SM.getExpansionLoc(MI->getDefinitionLoc()), + URISchemes, Opts)) Include = std::move(*Header); } S.Signature = Signature; @@ -408,17 +549,21 @@ Symbol::Details Detail; Detail.IncludeHeader = Include; S.Detail = &Detail; - Symbols.insert(S); - return true; + Symbols->insert(S); } void SymbolCollector::finish() { + if (CollectSym) + CollectSym->finish(); +} + +void SymbolCollector::CollectSymbol::finish() { // At the end of the TU, add 1 to the refcount of all referenced symbols. auto IncRef = [this](const SymbolID &ID) { - if (const auto *S = Symbols.find(ID)) { + if (const auto *S = Symbols->find(ID)) { Symbol Inc = *S; ++Inc.References; - Symbols.insert(Inc); + Symbols->insert(Inc); } }; for (const NamedDecl *ND : ReferencedDecls) { @@ -440,7 +585,7 @@ ReferencedMacros.clear(); } -const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, +const Symbol *SymbolCollector::CollectSymbol::addDeclaration(const NamedDecl &ND, SymbolID ID) { auto &Ctx = ND.getASTContext(); auto &SM = Ctx.getSourceManager(); @@ -455,8 +600,9 @@ S.IsIndexedForCodeCompletion = isIndexedForCodeCompletion(ND, Ctx); S.SymInfo = index::getSymbolInfo(&ND); std::string FileURI; - if (auto DeclLoc = getTokenLocation(findNameLoc(&ND), SM, Opts, - ASTCtx->getLangOpts(), FileURI)) + if (auto DeclLoc = + getTokenLocation(findNameLoc(&ND), SM, URISchemes, Opts.FallbackDir, + ASTCtx->getLangOpts(), FileURI)) S.CanonicalDeclaration = *DeclLoc; // Add completion info. @@ -481,7 +627,7 @@ // Use the expansion location to get the #include header since this is // where the symbol is exposed. if (auto Header = getIncludeHeader( - QName, SM, SM.getExpansionLoc(ND.getLocation()), Opts)) + QName, SM, SM.getExpansionLoc(ND.getLocation()), URISchemes, Opts)) Include = std::move(*Header); } S.Signature = Signature; @@ -493,12 +639,12 @@ S.Detail = &Detail; S.Origin = Opts.Origin; - Symbols.insert(S); - return Symbols.find(S.ID); + Symbols->insert(S); + return Symbols->find(S.ID); } -void SymbolCollector::addDefinition(const NamedDecl &ND, - const Symbol &DeclSym) { +void SymbolCollector::CollectSymbol::addDefinition(const NamedDecl &ND, + const Symbol &DeclSym) { if (DeclSym.Definition) return; // If we saw some forward declaration, we end up copying the symbol. @@ -506,11 +652,32 @@ // in clang::index. We should only see one definition. Symbol S = DeclSym; std::string FileURI; - if (auto DefLoc = getTokenLocation(findNameLoc(&ND), - ND.getASTContext().getSourceManager(), - Opts, ASTCtx->getLangOpts(), FileURI)) + if (auto DefLoc = getTokenLocation( + findNameLoc(&ND), ND.getASTContext().getSourceManager(), URISchemes, + Opts.FallbackDir, ASTCtx->getLangOpts(), FileURI)) S.Definition = *DefLoc; - Symbols.insert(S); + Symbols->insert(S); +} + + +bool SymbolCollector::handleDeclOccurence( + const Decl *D, index::SymbolRoleSet Roles, + ArrayRef Relations, SourceLocation Loc, + index::IndexDataConsumer::ASTNodeInfo ASTNode) { + if (CollectSym) + CollectSym->collectDecl(D, Roles, Relations, Loc, ASTNode); + if (CollectOccu) + CollectOccu->collectDecl(D, Roles, Relations, Loc, ASTNode); + return true; +} + +bool SymbolCollector::handleMacroOccurence(const IdentifierInfo *Name, + const MacroInfo *MI, + index::SymbolRoleSet Roles, + SourceLocation Loc) { + if (CollectSym) + CollectSym->collectMacro(Name, MI, Roles, Loc); + return true; } } // namespace clangd Index: test/clangd/initialize-params-invalid.test =================================================================== --- test/clangd/initialize-params-invalid.test +++ test/clangd/initialize-params-invalid.test @@ -29,6 +29,7 @@ # CHECK-NEXT: ] # CHECK-NEXT: }, # CHECK-NEXT: "hoverProvider": true, +# CHECK-NEXT: "referencesProvider": false, # CHECK-NEXT: "renameProvider": true, # CHECK-NEXT: "signatureHelpProvider": { # CHECK-NEXT: "triggerCharacters": [ Index: test/clangd/initialize-params.test =================================================================== --- test/clangd/initialize-params.test +++ test/clangd/initialize-params.test @@ -29,6 +29,7 @@ # CHECK-NEXT: ] # CHECK-NEXT: }, # CHECK-NEXT: "hoverProvider": true, +# CHECK-NEXT: "referencesProvider": false, # CHECK-NEXT: "renameProvider": true, # CHECK-NEXT: "signatureHelpProvider": { # CHECK-NEXT: "triggerCharacters": [ Index: unittests/clangd/CMakeLists.txt =================================================================== --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -27,6 +27,7 @@ QualityTests.cpp SourceCodeTests.cpp SymbolCollectorTests.cpp + SymbolOccurrenceCollectorTests.cpp SyncAPI.cpp TUSchedulerTests.cpp TestFS.cpp Index: unittests/clangd/TUSchedulerTests.cpp =================================================================== --- unittests/clangd/TUSchedulerTests.cpp +++ unittests/clangd/TUSchedulerTests.cpp @@ -17,10 +17,11 @@ namespace clang { namespace clangd { +namespace { using ::testing::_; -using ::testing::Each; using ::testing::AnyOf; +using ::testing::Each; using ::testing::Pair; using ::testing::Pointee; using ::testing::UnorderedElementsAre; @@ -30,6 +31,18 @@ handleAllErrors(std::move(Err), [](const llvm::ErrorInfoBase &) {}); } +class NoopParsingCallbacks : public ParsingCallbacks { +public: + static NoopParsingCallbacks& instance() { + static NoopParsingCallbacks* Instance = new NoopParsingCallbacks; + return *Instance; + } + + void onPreambleAST(PathRef Path, ASTContext &Ctx, + std::shared_ptr PP) override {} + void onMainAST(PathRef Path, ParsedAST &AST) override {} +}; + class TUSchedulerTests : public ::testing::Test { protected: ParseInputs getInputs(PathRef File, std::string Contents) { @@ -45,7 +58,7 @@ TEST_F(TUSchedulerTests, MissingFiles) { TUScheduler S(getDefaultAsyncThreadsCount(), /*StorePreamblesInMemory=*/true, - /*PreambleParsedCallback=*/nullptr, + NoopParsingCallbacks::instance(), /*UpdateDebounce=*/std::chrono::steady_clock::duration::zero(), ASTRetentionPolicy()); @@ -102,7 +115,7 @@ TUScheduler S( getDefaultAsyncThreadsCount(), /*StorePreamblesInMemory=*/true, - /*PreambleParsedCallback=*/nullptr, + NoopParsingCallbacks::instance(), /*UpdateDebounce=*/std::chrono::steady_clock::duration::zero(), ASTRetentionPolicy()); auto Path = testPath("foo.cpp"); @@ -129,11 +142,10 @@ TEST_F(TUSchedulerTests, Debounce) { std::atomic CallbackCount(0); { - TUScheduler S(getDefaultAsyncThreadsCount(), - /*StorePreamblesInMemory=*/true, - /*PreambleParsedCallback=*/nullptr, - /*UpdateDebounce=*/std::chrono::seconds(1), - ASTRetentionPolicy()); + TUScheduler S( + getDefaultAsyncThreadsCount(), + /*StorePreamblesInMemory=*/true, NoopParsingCallbacks::instance(), + /*UpdateDebounce=*/std::chrono::seconds(1), ASTRetentionPolicy()); // FIXME: we could probably use timeouts lower than 1 second here. auto Path = testPath("foo.cpp"); S.update(Path, getInputs(Path, "auto (debounced)"), WantDiagnostics::Auto, @@ -163,7 +175,7 @@ { TUScheduler S(getDefaultAsyncThreadsCount(), /*StorePreamblesInMemory=*/true, - /*PreambleParsedCallback=*/nullptr, + NoopParsingCallbacks::instance(), /*UpdateDebounce=*/std::chrono::milliseconds(50), ASTRetentionPolicy()); @@ -261,7 +273,7 @@ Policy.MaxRetainedASTs = 2; TUScheduler S( /*AsyncThreadsCount=*/1, /*StorePreambleInMemory=*/true, - PreambleParsedCallback(), + NoopParsingCallbacks::instance(), /*UpdateDebounce=*/std::chrono::steady_clock::duration::zero(), Policy); llvm::StringLiteral SourceContents = R"cpp( @@ -358,7 +370,7 @@ // the same time. All reads should get the same non-null preamble. TUScheduler S( /*AsyncThreadsCount=*/4, /*StorePreambleInMemory=*/true, - PreambleParsedCallback(), + NoopParsingCallbacks::instance(), /*UpdateDebounce=*/std::chrono::steady_clock::duration::zero(), ASTRetentionPolicy()); auto Foo = testPath("foo.cpp"); @@ -391,7 +403,7 @@ TEST_F(TUSchedulerTests, NoopOnEmptyChanges) { TUScheduler S( /*AsyncThreadsCount=*/getDefaultAsyncThreadsCount(), - /*StorePreambleInMemory=*/true, PreambleParsedCallback(), + /*StorePreambleInMemory=*/true, NoopParsingCallbacks::instance(), /*UpdateDebounce=*/std::chrono::steady_clock::duration::zero(), ASTRetentionPolicy()); @@ -444,7 +456,7 @@ TEST_F(TUSchedulerTests, NoChangeDiags) { TUScheduler S( /*AsyncThreadsCount=*/getDefaultAsyncThreadsCount(), - /*StorePreambleInMemory=*/true, PreambleParsedCallback(), + /*StorePreambleInMemory=*/true, NoopParsingCallbacks::instance(), /*UpdateDebounce=*/std::chrono::steady_clock::duration::zero(), ASTRetentionPolicy()); @@ -475,5 +487,6 @@ ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(1))); } +} // namespace } // namespace clangd } // namespace clang