Index: clangd/index/Background.h =================================================================== --- clangd/index/Background.h +++ clangd/index/Background.h @@ -14,6 +14,7 @@ #include "FSProvider.h" #include "index/FileIndex.h" #include "index/Index.h" +#include "index/Serialization.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/SHA1.h" @@ -27,6 +28,37 @@ namespace clang { namespace clangd { +// Handles storage and retrieval of index shards. +class BackgroundIndexStorage { +private: + static std::function(llvm::StringRef)> + Factory; + +public: + using FileDigest = decltype(llvm::SHA1::hash({})); + + // Stores given shard associationg with ShardIdentifier, which can be + // retrieved later on with the same identifier. + virtual bool storeShard(llvm::StringRef ShardIdentifier, + IndexFileOut Shard) const = 0; + + // Retrieves the shard if found, also returns hash for the source file that + // generated this shard. + virtual llvm::Expected + retrieveShard(llvm::StringRef ShardIdentifier) const = 0; + + template static void setStorageFactory(T Factory) { + BackgroundIndexStorage::Factory = Factory; + } + + static std::shared_ptr + getForDirectory(llvm::StringRef Directory) { + if (!Factory) + return nullptr; + return Factory(Directory); + } +}; + // Builds an in-memory index by by running the static indexer action over // all commands in a compilation database. Indexing happens in the background. // FIXME: it should also persist its state on disk for fast start. @@ -34,7 +66,7 @@ class BackgroundIndex : public SwapIndex { public: // FIXME: resource-dir injection should be hoisted somewhere common. - BackgroundIndex(Context BackgroundContext, StringRef ResourceDir, + BackgroundIndex(Context BackgroundContext, llvm::StringRef ResourceDir, const FileSystemProvider &, ArrayRef URISchemes, size_t ThreadPoolSize = llvm::hardware_concurrency()); ~BackgroundIndex(); // Blocks while the current task finishes. @@ -59,7 +91,12 @@ private: /// Given index results from a TU, only update files in \p FilesToUpdate. void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, - const llvm::StringMap &FilesToUpdate); + const llvm::StringMap &FilesToUpdate, + BackgroundIndexStorage *IndexStorage); + void loadShard(BackgroundIndexStorage *IndexStorage, + const tooling::CompileCommand &Cmd); + void loadShards(BackgroundIndexStorage *IndexStorage, + const std::vector &Cmds); // configuration std::string ResourceDir; @@ -68,7 +105,8 @@ std::vector URISchemes; // index state - llvm::Error index(tooling::CompileCommand); + llvm::Error index(tooling::CompileCommand, + BackgroundIndexStorage *IndexStorage); FileSymbols IndexedSymbols; llvm::StringMap IndexedFileDigests; // Key is absolute file path. @@ -77,7 +115,8 @@ // queue management using Task = std::function; void run(); // Main loop executed by Thread. Runs tasks from Queue. - void enqueueLocked(tooling::CompileCommand Cmd); + void enqueueLocked(tooling::CompileCommand Cmd, + std::shared_ptr IndexStorage); std::mutex QueueMu; unsigned NumActiveTasks = 0; // Only idle when queue is empty *and* no tasks. std::condition_variable QueueCV; Index: clangd/index/Background.cpp =================================================================== --- clangd/index/Background.cpp +++ clangd/index/Background.cpp @@ -24,6 +24,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/SHA1.h" +#include +#include #include #include @@ -31,6 +33,92 @@ namespace clang { namespace clangd { +namespace { + +static BackgroundIndex::FileDigest digest(StringRef Content) { + return SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); +} + +static Optional digestFile(const SourceManager &SM, + FileID FID) { + bool Invalid = false; + StringRef Content = SM.getBufferData(FID, &Invalid); + if (Invalid) + return None; + return digest(Content); +} + +llvm::SmallString<128> +getShardPathFromFilePath(llvm::SmallString<128> ShardRoot, + llvm::StringRef FilePath) { + sys::path::append(ShardRoot, sys::path::filename(FilePath) + + toHex(digest(FilePath)) + ".idx"); + return ShardRoot; +} + +// Uses disk as a storage for index shards. Creates a directory called +// ".clangd-index/" under the path provided during initialize. +// Note: Requires initialize to be called before storing or retrieval. +// This class is thread-safe. +class DiskBackedIndexStorage : public BackgroundIndexStorage { + llvm::SmallString<128> DiskShardRoot; + +public: + llvm::Expected + retrieveShard(llvm::StringRef ShardIdentifier) const override { + auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + auto Buffer = MemoryBuffer::getFile(ShardPath); + if (!Buffer) { + elog("Couldn't retrieve {0}: {1}", ShardPath, + Buffer.getError().message()); + return llvm::make_error(Buffer.getError()); + } + // FIXME: Change readIndexFile to also look at Hash of the source that + // generated index and skip if there is a mismatch. + return readIndexFile(Buffer->get()->getBuffer()); + } + + bool storeShard(llvm::StringRef ShardIdentifier, + IndexFileOut Shard) const override { + auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + std::error_code EC; + llvm::raw_fd_ostream OS(ShardPath, EC); + if (EC) { + elog("Failed to open {0} for writing: {1}", ShardPath, EC.message()); + return false; + } + OS << Shard; + return true; + } + + // Initializes DiskShardRoot to (Directory + ".clangd-index/") which is the + // base directory for all shard files. After the initialization succeeds all + // subsequent calls are no-op. + DiskBackedIndexStorage(llvm::StringRef Directory) : DiskShardRoot(Directory) { + sys::path::append(DiskShardRoot, ".clangd-index/"); + if (!llvm::sys::fs::exists(DiskShardRoot)) { + std::error_code OK; + std::error_code EC = llvm::sys::fs::create_directory(DiskShardRoot); + if (EC != OK) { + elog("Failed to create {0}: {1}", DiskShardRoot, EC.message()); + } + } + } +}; + +SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { + SmallString<128> AbsolutePath; + if (sys::path::is_absolute(Cmd.Filename)) { + AbsolutePath = Cmd.Filename; + } else { + AbsolutePath = Cmd.Directory; + sys::path::append(AbsolutePath, Cmd.Filename); + } + return AbsolutePath; +} + +} // namespace + BackgroundIndex::BackgroundIndex(Context BackgroundContext, StringRef ResourceDir, const FileSystemProvider &FSProvider, @@ -45,7 +133,7 @@ // Set priority to low, since background indexing is a long running task we // do not want to eat up cpu when there are any other high priority threads. // FIXME: In the future we might want a more general way of handling this to - // support a tasks with various priorities. + // support tasks with various priorities. setThreadPriority(ThreadPool.back(), ThreadPriority::Low); } } @@ -97,19 +185,58 @@ void BackgroundIndex::enqueue(StringRef Directory, tooling::CompileCommand Cmd) { + auto IndexStorage = BackgroundIndexStorage::getForDirectory(Directory); + if (IndexStorage) + loadShard(IndexStorage.get(), Cmd); + else + elog("No index storage for: {0}", Directory); { std::lock_guard Lock(QueueMu); - enqueueLocked(std::move(Cmd)); + enqueueLocked(std::move(Cmd), std::move(IndexStorage)); } QueueCV.notify_all(); } +void BackgroundIndex::loadShard(BackgroundIndexStorage *IndexStorage, + const tooling::CompileCommand &Cmd) { + assert(IndexStorage && "No index storage to load from?"); + auto AbsolutePath = getAbsolutePath(Cmd); + auto Shard = IndexStorage->retrieveShard(AbsolutePath); + if (Shard) { + // FIXME: Updated hashes once we have them in serialized format. + // IndexedFileDigests[AbsolutePath] = Hash; + IndexedSymbols.update(AbsolutePath, + make_unique(std::move(*Shard->Symbols)), + make_unique(std::move(*Shard->Refs))); + + vlog("Loaded {0} from storage", AbsolutePath); + } +} + +void BackgroundIndex::loadShards( + BackgroundIndexStorage *IndexStorage, + const std::vector &Cmds) { + assert(IndexStorage && "No index storage to load from?"); + for (const auto &Cmd : Cmds) + loadShard(IndexStorage, Cmd); + // FIXME: Maybe we should get rid of this one once we start building index + // periodically? Especially if we also offload this task onto the queue. + vlog("Rebuilding automatic index"); + reset(IndexedSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge, + URISchemes)); +} + void BackgroundIndex::enqueueAll(StringRef Directory, const tooling::CompilationDatabase &CDB) { trace::Span Tracer("BackgroundIndexEnqueueCDB"); // FIXME: this function may be slow. Perhaps enqueue a task to re-read the CDB // from disk and enqueue the commands asynchronously? auto Cmds = CDB.getAllCompileCommands(); + auto IndexStorage = BackgroundIndexStorage::getForDirectory(Directory); + if (IndexStorage) + loadShards(IndexStorage.get(), Cmds); + else + elog("No index storage for: {0}", Directory); SPAN_ATTACH(Tracer, "commands", int64_t(Cmds.size())); std::mt19937 Generator(std::random_device{}()); std::shuffle(Cmds.begin(), Cmds.end(), Generator); @@ -117,33 +244,23 @@ { std::lock_guard Lock(QueueMu); for (auto &Cmd : Cmds) - enqueueLocked(std::move(Cmd)); + enqueueLocked(std::move(Cmd), IndexStorage); } QueueCV.notify_all(); } -void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) { +void BackgroundIndex::enqueueLocked( + tooling::CompileCommand Cmd, + std::shared_ptr IndexStorage) { Queue.push_back(Bind( - [this](tooling::CompileCommand Cmd) { + [this](tooling::CompileCommand Cmd, + std::shared_ptr IndexStorage) { std::string Filename = Cmd.Filename; Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir); - if (auto Error = index(std::move(Cmd))) + if (auto Error = index(std::move(Cmd), IndexStorage.get())) log("Indexing {0} failed: {1}", Filename, std::move(Error)); }, - std::move(Cmd))); -} - -static BackgroundIndex::FileDigest digest(StringRef Content) { - return SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); -} - -static Optional digestFile(const SourceManager &SM, - FileID FID) { - bool Invalid = false; - StringRef Content = SM.getBufferData(FID, &Invalid); - if (Invalid) - return None; - return digest(Content); + std::move(Cmd), std::move(IndexStorage))); } // Resolves URI to file paths with cache. @@ -179,7 +296,8 @@ /// Given index results from a TU, only update files in \p FilesToUpdate. void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, - const StringMap &FilesToUpdate) { + const StringMap &FilesToUpdate, + BackgroundIndexStorage *IndexStorage) { // Partition symbols/references into files. struct File { DenseSet Symbols; @@ -227,20 +345,33 @@ for (const auto *R : F.second.Refs) Refs.insert(RefToIDs[R], *R); + auto SS = llvm::make_unique(std::move(Syms).build()); + auto RS = llvm::make_unique(std::move(Refs).build()); + + auto Hash = FilesToUpdate.lookup(Path); + // We need to store shards before updating the index, since the latter + // consumes slabs. + // FIXME: Store Hash in the Shard. + if (IndexStorage) { + IndexFileOut Shard; + Shard.Symbols = SS.get(); + Shard.Refs = RS.get(); + IndexStorage->storeShard(Path, Shard); + } + std::lock_guard Lock(DigestsMu); // This can override a newer version that is added in another thread, // if this thread sees the older version but finishes later. This should be // rare in practice. - IndexedFileDigests[Path] = FilesToUpdate.lookup(Path); - IndexedSymbols.update(Path, - make_unique(std::move(Syms).build()), - make_unique(std::move(Refs).build())); + IndexedFileDigests[Path] = Hash; + IndexedSymbols.update(Path, std::move(SS), std::move(RS)); } } // Creates a filter to not collect index results from files with unchanged // digests. -// \p FileDigests contains file digests for the current indexed files, and all changed files will be added to \p FilesToUpdate. +// \p FileDigests contains file digests for the current indexed files, and all +// changed files will be added to \p FilesToUpdate. decltype(SymbolCollector::Options::FileFilter) createFileFilter( const llvm::StringMap &FileDigests, llvm::StringMap &FilesToUpdate) { @@ -269,16 +400,11 @@ }; } -Error BackgroundIndex::index(tooling::CompileCommand Cmd) { +Error BackgroundIndex::index(tooling::CompileCommand Cmd, + BackgroundIndexStorage *IndexStorage) { trace::Span Tracer("BackgroundIndex"); SPAN_ATTACH(Tracer, "file", Cmd.Filename); - SmallString<128> AbsolutePath; - if (sys::path::is_absolute(Cmd.Filename)) { - AbsolutePath = Cmd.Filename; - } else { - AbsolutePath = Cmd.Directory; - sys::path::append(AbsolutePath, Cmd.Filename); - } + SmallString<128> AbsolutePath = getAbsolutePath(Cmd); auto FS = FSProvider.getFileSystem(); auto Buf = FS->getBufferForFile(AbsolutePath); @@ -342,7 +468,8 @@ Symbols.size(), Refs.numRefs()); SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs())); - update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate); + update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate, + IndexStorage); { // Make sure hash for the main file is always updated even if there is no // index data in it. @@ -359,5 +486,8 @@ return Error::success(); } +std::function(llvm::StringRef)> + BackgroundIndexStorage::Factory = nullptr; + } // namespace clangd } // namespace clang Index: unittests/clangd/BackgroundIndexTests.cpp =================================================================== --- unittests/clangd/BackgroundIndexTests.cpp +++ unittests/clangd/BackgroundIndexTests.cpp @@ -76,5 +76,81 @@ FileURI("unittest:///root/B.cc")})); } +TEST(BackgroundIndexTest, ShardStorageTest) { + class MemoryShardStorage : public BackgroundIndexStorage { + mutable std::mutex StorageMu; + llvm::StringMap &Storage; + size_t &CacheHits; + + public: + MemoryShardStorage(llvm::StringMap &Storage, size_t &CacheHits) + : Storage(Storage), CacheHits(CacheHits) {} + + bool storeShard(llvm::StringRef ShardIdentifier, + IndexFileOut Shard) const override { + std::lock_guard Lock(StorageMu); + std::string &str = Storage[ShardIdentifier]; + llvm::raw_string_ostream OS(str); + OS << Shard; + OS.flush(); + return true; + } + llvm::Expected + retrieveShard(llvm::StringRef ShardIdentifier) const override { + std::lock_guard Lock(StorageMu); + if (Storage.find(ShardIdentifier) == Storage.end()) + return llvm::make_error( + "Shard not found.", llvm::inconvertibleErrorCode()); + auto IndexFile = readIndexFile(Storage[ShardIdentifier]); + if (!IndexFile) + return IndexFile; + CacheHits++; + return IndexFile; + } + bool initialize(llvm::StringRef Directory) { return true; } + }; + MockFSProvider FS; + FS.Files[testPath("root/A.h")] = R"cpp( + void common(); + void f_b(); + class A_CC {}; + )cpp"; + FS.Files[testPath("root/A.cc")] = + "#include \"A.h\"\nvoid g() { (void)common; }"; + + llvm::StringMap Storage; + size_t CacheHits = 0; + BackgroundIndexStorage::setStorageFactory( + [&Storage, &CacheHits](llvm::StringRef) { + return std::make_shared(Storage, CacheHits); + }); + + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + // Check nothing is loaded from Storage, but A.cc and A.h has been stored. + { + BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"}); + Idx.enqueue(testPath("root"), Cmd); + Idx.blockUntilIdleForTest(); + } + EXPECT_EQ(CacheHits, 0U); + EXPECT_EQ(Storage.size(), 2U); + EXPECT_NE(Storage.find(testPath("root/A.h")), Storage.end()); + EXPECT_NE(Storage.find(testPath("root/A.cc")), Storage.end()); + + // Check A.cc has been loaded from cache. + { + BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"}); + Idx.enqueue(testPath("root"), Cmd); + Idx.blockUntilIdleForTest(); + } + EXPECT_EQ(CacheHits, 1U); + EXPECT_EQ(Storage.size(), 2U); + EXPECT_NE(Storage.find(testPath("root/A.h")), Storage.end()); + EXPECT_NE(Storage.find(testPath("root/A.cc")), Storage.end()); +} + } // namespace clangd } // namespace clang