diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -73,6 +73,7 @@ XRefs.cpp index/Background.cpp + index/BackgroundIndexLoader.cpp index/BackgroundIndexStorage.cpp index/BackgroundQueue.cpp index/BackgroundRebuild.cpp diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -12,6 +12,7 @@ #include "Context.h" #include "FSProvider.h" #include "GlobalCompilationDatabase.h" +#include "Path.h" #include "SourceCode.h" #include "Threading.h" #include "index/BackgroundRebuild.h" @@ -165,20 +166,9 @@ std::mutex ShardVersionsMu; BackgroundIndexStorage::Factory IndexStorageFactory; - struct Source { - std::string Path; - bool NeedsReIndexing; - Source(llvm::StringRef Path, bool NeedsReIndexing) - : Path(Path), NeedsReIndexing(NeedsReIndexing) {} - }; - // Loads the shards for a single TU and all of its dependencies. Returns the - // list of sources and whether they need to be re-indexed. - std::vector loadShard(const tooling::CompileCommand &Cmd, - BackgroundIndexStorage *IndexStorage, - llvm::StringSet<> &LoadedShards); - // Tries to load shards for the ChangedFiles. + // Tries to load shards for the MainFiles. std::vector> - loadShards(std::vector ChangedFiles); + loadProject(std::vector MainFiles); BackgroundQueue::Task changedFilesTask(const std::vector &ChangedFiles); diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -10,6 +10,7 @@ #include "ClangdUnit.h" #include "Compiler.h" #include "Context.h" +#include "FSProvider.h" #include "Headers.h" #include "Logger.h" #include "Path.h" @@ -18,6 +19,7 @@ #include "Threading.h" #include "Trace.h" #include "URI.h" +#include "index/BackgroundIndexLoader.h" #include "index/FileIndex.h" #include "index/IndexAction.h" #include "index/MemIndex.h" @@ -41,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +51,8 @@ #include #include #include +#include +#include namespace clang { namespace clangd { @@ -118,6 +123,7 @@ } return AbsolutePath; } + } // namespace BackgroundIndex::BackgroundIndex( @@ -155,7 +161,7 @@ log("Enqueueing {0} commands for indexing", ChangedFiles.size()); SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size())); - auto NeedsReIndexing = loadShards(std::move(ChangedFiles)); + auto NeedsReIndexing = loadProject(std::move(ChangedFiles)); // Run indexing for files that need to be updated. std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(), std::mt19937(std::random_device{}())); @@ -415,142 +421,18 @@ return llvm::Error::success(); } -std::vector -BackgroundIndex::loadShard(const tooling::CompileCommand &Cmd, - BackgroundIndexStorage *IndexStorage, - llvm::StringSet<> &LoadedShards) { - struct ShardInfo { - std::string AbsolutePath; - std::unique_ptr Shard; - FileDigest Digest = {}; - bool CountReferences = false; - bool HadErrors = false; - }; - std::vector IntermediateSymbols; - // Make sure we don't have duplicate elements in the queue. Keys are absolute - // paths. - llvm::StringSet<> InQueue; - auto FS = FSProvider.getFileSystem(); - // Dependencies of this TU, paired with the information about whether they - // need to be re-indexed or not. - std::vector Dependencies; - std::queue ToVisit; - std::string AbsolutePath = getAbsolutePath(Cmd).str(); - // Up until we load the shard related to a dependency it needs to be - // re-indexed. - ToVisit.emplace(AbsolutePath, true); - InQueue.insert(AbsolutePath); - // Goes over each dependency. - while (!ToVisit.empty()) { - Dependencies.push_back(std::move(ToVisit.front())); - // Dependencies is not modified during the rest of the loop, so it is safe - // to keep the reference. - auto &CurDependency = Dependencies.back(); - ToVisit.pop(); - // If we have already seen this shard before(either loaded or failed) don't - // re-try again. Since the information in the shard won't change from one TU - // to another. - if (!LoadedShards.try_emplace(CurDependency.Path).second) { - // If the dependency needs to be re-indexed, first occurence would already - // have detected that, so we don't need to issue it again. - CurDependency.NeedsReIndexing = false; - continue; - } - - auto Shard = IndexStorage->loadShard(CurDependency.Path); - if (!Shard || !Shard->Sources) { - // File will be returned as requiring re-indexing to caller. - vlog("Failed to load shard: {0}", CurDependency.Path); - continue; - } - // These are the edges in the include graph for current dependency. - for (const auto &I : *Shard->Sources) { - auto U = URI::parse(I.getKey()); - if (!U) - continue; - auto AbsolutePath = URI::resolve(*U, CurDependency.Path); - if (!AbsolutePath) - continue; - // Add file as dependency if haven't seen before. - if (InQueue.try_emplace(*AbsolutePath).second) - ToVisit.emplace(*AbsolutePath, true); - // The node contains symbol information only for current file, the rest is - // just edges. - if (*AbsolutePath != CurDependency.Path) - continue; - - // We found source file info for current dependency. - assert(I.getValue().Digest != FileDigest{{0}} && "Digest is empty?"); - ShardInfo SI; - SI.AbsolutePath = CurDependency.Path; - SI.Shard = std::move(Shard); - SI.Digest = I.getValue().Digest; - SI.CountReferences = - I.getValue().Flags & IncludeGraphNode::SourceFlag::IsTU; - SI.HadErrors = - I.getValue().Flags & IncludeGraphNode::SourceFlag::HadErrors; - IntermediateSymbols.push_back(std::move(SI)); - // Check if the source needs re-indexing. - // Get the digest, skip it if file doesn't exist. - auto Buf = FS->getBufferForFile(CurDependency.Path); - if (!Buf) { - elog("Couldn't get buffer for file: {0}: {1}", CurDependency.Path, - Buf.getError().message()); - continue; - } - // If digests match then dependency doesn't need re-indexing. - // FIXME: Also check for dependencies(sources) of this shard and compile - // commands for cache invalidation. - CurDependency.NeedsReIndexing = - digest(Buf->get()->getBuffer()) != I.getValue().Digest; - } - } - // Load shard information into background-index. - { - std::lock_guard Lock(ShardVersionsMu); - // This can override a newer version that is added in another thread, - // if this thread sees the older version but finishes later. This - // should be rare in practice. - for (const ShardInfo &SI : IntermediateSymbols) { - auto SS = - SI.Shard->Symbols - ? llvm::make_unique(std::move(*SI.Shard->Symbols)) - : nullptr; - auto RS = SI.Shard->Refs - ? llvm::make_unique(std::move(*SI.Shard->Refs)) - : nullptr; - auto RelS = - SI.Shard->Relations - ? llvm::make_unique(std::move(*SI.Shard->Relations)) - : nullptr; - ShardVersion &SV = ShardVersions[SI.AbsolutePath]; - SV.Digest = SI.Digest; - SV.HadErrors = SI.HadErrors; - - IndexedSymbols.update(SI.AbsolutePath, std::move(SS), std::move(RS), - std::move(RelS), SI.CountReferences); - } - } - if (!IntermediateSymbols.empty()) - Rebuilder.loadedTU(); - - return Dependencies; -} - // Goes over each changed file and loads them from index. Returns the list of // TUs that had out-of-date/no shards. std::vector> -BackgroundIndex::loadShards(std::vector ChangedFiles) { +BackgroundIndex::loadProject(std::vector MainFiles) { std::vector> NeedsReIndexing; - // Keeps track of the files that will be reindexed, to make sure we won't - // re-index same dependencies more than once. Keys are AbsolutePaths. - llvm::StringSet<> FilesToIndex; - // Keeps track of the loaded shards to make sure we don't perform redundant - // disk IO. Keys are absolute paths. - llvm::StringSet<> LoadedShards; + + auto FS = FSProvider.getFileSystem(); + BackgroundIndexLoader Loader(FS.get()); + Rebuilder.startLoading(); - for (const auto &File : ChangedFiles) { + for (const auto &File : MainFiles) { auto Cmd = CDB.getCompileCommand(File); if (!Cmd) continue; @@ -560,23 +442,38 @@ ProjectRoot = std::move(PI->SourceRoot); BackgroundIndexStorage *IndexStorage = IndexStorageFactory(ProjectRoot); - auto Dependencies = loadShard(*Cmd, IndexStorage, LoadedShards); - for (const auto &Dependency : Dependencies) { - if (!Dependency.NeedsReIndexing || FilesToIndex.count(Dependency.Path)) - continue; - // FIXME: Currently, we simply schedule indexing on a TU whenever any of - // its dependencies needs re-indexing. We might do it smarter by figuring - // out a minimal set of TUs that will cover all the stale dependencies. - vlog("Enqueueing TU {0} because its dependency {1} needs re-indexing.", - Cmd->Filename, Dependency.Path); + Path AbsPath = getAbsolutePath(*Cmd).str().str(); + + if (!Loader.load(AbsPath, IndexStorage)) NeedsReIndexing.push_back({std::move(*Cmd), IndexStorage}); - // Mark all of this TU's dependencies as to-be-indexed so that we won't - // try to re-index those. - for (const auto &Dependency : Dependencies) - FilesToIndex.insert(Dependency.Path); - break; - } } + + auto Shards = std::move(Loader).takeShards(); + std::lock_guard Lock(ShardVersionsMu); + for (auto &SI : Shards) { + if (!SI.Shard) + continue; + auto SS = SI.Shard->Symbols + ? llvm::make_unique(std::move(*SI.Shard->Symbols)) + : nullptr; + auto RS = SI.Shard->Refs + ? llvm::make_unique(std::move(*SI.Shard->Refs)) + : nullptr; + auto RelS = + SI.Shard->Relations + ? llvm::make_unique(std::move(*SI.Shard->Relations)) + : nullptr; + ShardVersion &SV = ShardVersions[SI.AbsolutePath]; + SV.Digest = SI.Digest; + SV.HadErrors = SI.HadErrors; + + IndexedSymbols.update(SI.AbsolutePath, std::move(SS), std::move(RS), + std::move(RelS), SI.CountReferences); + } + // FIXME: Should we call it for each TU? + if (!Shards.empty()) + Rebuilder.loadedTU(); + Rebuilder.doneLoading(); return NeedsReIndexing; } diff --git a/clang-tools-extra/clangd/index/BackgroundIndexLoader.h b/clang-tools-extra/clangd/index/BackgroundIndexLoader.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/index/BackgroundIndexLoader.h @@ -0,0 +1,59 @@ +//===--- Background.h - Build an index in a background thread ----*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_INDEX_LOADER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_INDEX_LOADER_H + +#include "Path.h" +#include "index/Background.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/VirtualFileSystem.h" +#include +#include + +namespace clang { +namespace clangd { + +struct ShardInfo { + Path AbsolutePath; + FileDigest Digest = {}; + bool CountReferences = false; + bool HadErrors = false; + std::unique_ptr Shard; +}; + +class BackgroundIndexLoader { +public: + BackgroundIndexLoader(llvm::vfs::FileSystem *FS) : FS(FS) {} + /// Loads all shards for the TU \p MainFile from \p Storage. Returns false if + /// any shards that are first seen for this TU are stale. + bool load(PathRef MainFile, BackgroundIndexStorage *Storage); + + /// Consumes the loader and returns all shards. + std::vector takeShards() &&; + +private: + struct CachedShard { + ShardInfo SI; + // Strings points to ShardInfo. + std::vector Edges; + bool IsStale = false; + }; + std::pair + loadShard(PathRef ShardIdentifier, BackgroundIndexStorage *Storage); + + // Cache for Storage lookups. + llvm::StringMap LoadedShards; + + llvm::vfs::FileSystem *FS; +}; + +} // namespace clangd +} // namespace clang + +#endif diff --git a/clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp b/clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp @@ -0,0 +1,124 @@ +//===-- BackgroundIndexLoader.cpp - ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "index/BackgroundIndexLoader.h" +#include "Logger.h" +#include "Path.h" +#include "index/Background.h" +#include "llvm/ADT/DenseSet.h" +#include + +namespace clang { +namespace clangd { +namespace { + +llvm::Optional uriToAbsolutePath(llvm::StringRef URI, PathRef HintPath) { + auto U = URI::parse(URI); + if (!U) + return llvm::None; + auto AbsolutePath = URI::resolve(*U, HintPath); + if (!AbsolutePath) + return llvm::None; + return *AbsolutePath; +} + +bool hasChanged(llvm::vfs::FileSystem *FS, const ShardInfo &SI) { + auto Buf = FS->getBufferForFile(SI.AbsolutePath); + if (!Buf) { + elog("Couldn't get buffer for file: {0}: {1}", SI.AbsolutePath, + Buf.getError().message()); + // There is no point in indexing an unreadable file. + return false; + } + return digest(Buf->get()->getBuffer()) != SI.Digest; +} + +} // namespace + +std::pair +BackgroundIndexLoader::loadShard(PathRef ShardIdentifier, + BackgroundIndexStorage *Storage) { + auto It = LoadedShards.try_emplace(ShardIdentifier); + CachedShard &CS = It.first->getValue(); + if (!It.second) + return {CS, true}; + + ShardInfo &SI = CS.SI; + SI.AbsolutePath = ShardIdentifier; + auto Shard = Storage->loadShard(ShardIdentifier); + if (!Shard || !Shard->Sources) { + vlog("Failed to load shard: {0}", ShardIdentifier); + SI.HadErrors = true; + CS.IsStale = true; + return {CS, false}; + } + + SI.Shard = std::move(Shard); + SI.AbsolutePath = ShardIdentifier; + for (const auto &It : *SI.Shard->Sources) { + auto AbsPath = uriToAbsolutePath(It.getKey(), ShardIdentifier); + if (!AbsPath || *AbsPath != ShardIdentifier) { + CS.Edges.push_back(*AbsPath); + continue; + } + + const IncludeGraphNode &IGN = It.getValue(); + SI.Digest = IGN.Digest; + SI.CountReferences = IGN.Flags & IncludeGraphNode::SourceFlag::IsTU; + SI.HadErrors = IGN.Flags & IncludeGraphNode::SourceFlag::HadErrors; + } + assert(SI.Digest != FileDigest{{0}} && "Digest is empty?"); + CS.IsStale = hasChanged(FS, SI); + return {CS, false}; +} + +bool BackgroundIndexLoader::load(PathRef MainFile, + BackgroundIndexStorage *Storage) { + bool IsStale = false; + // Following containers points to strings inside LoadedShards. + llvm::DenseSet InQueue; + std::queue ToVisit; + auto LoadResult = loadShard(MainFile, Storage); + + const CachedShard &MainShard = LoadResult.first; + if (!LoadResult.second && MainShard.IsStale) + IsStale = true; + + for (PathRef Edge : MainShard.Edges) { + InQueue.insert(Edge); + ToVisit.push(Edge); + } + while (!ToVisit.empty()) { + PathRef ShardIdentifier = ToVisit.front(); + ToVisit.pop(); + + auto LoadResult = loadShard(ShardIdentifier, Storage); + const CachedShard &CS = LoadResult.first; + // Report the file as stale if it is the first time we see it. + // FIXME: We should still update staleness for main file, if it had errors. + if (!LoadResult.second && CS.IsStale) + IsStale = true; + for (PathRef Edge : CS.Edges) { + if (InQueue.insert(Edge).second) + ToVisit.push(Edge); + } + } + + return !IsStale; +} + +std::vector BackgroundIndexLoader::takeShards() && { + std::vector Shards; + Shards.reserve(LoadedShards.size()); + for (auto &It : LoadedShards) + Shards.push_back(std::move(It.getValue().SI)); + return Shards; +} + +} // namespace clangd +} // namespace clang