Index: clangd/index/Background.h =================================================================== --- clangd/index/Background.h +++ clangd/index/Background.h @@ -81,7 +81,6 @@ // The indexing happens in a background thread, so the symbols will be // available sometime later. void enqueue(const std::vector &ChangedFiles); - void enqueue(const std::string &File); // Cause background threads to stop after ther current task, any remaining // tasks will be discarded. @@ -118,6 +117,21 @@ std::mutex DigestsMu; BackgroundIndexStorage::Factory IndexStorageFactory; + struct Source { + std::string Path; + bool NeedsReIndexing; + Source(llvm::StringRef Path, bool NeedsReIndexing) + : Path(Path), NeedsReIndexing(NeedsReIndexing) {} + }; + // Loads the shards for a single TU and all of its dependencies. Returns the + // list of sources and whether they need to be re-indexed. + std::vector loadShard(const tooling::CompileCommand &Cmd, + BackgroundIndexStorage *IndexStorage, + llvm::StringSet<> &LoadedShards); + // Tries to load shards for the ChangedFiles. + std::vector> + loadShards(std::vector ChangedFiles); + void enqueue(tooling::CompileCommand Cmd, BackgroundIndexStorage *Storage); // queue management using Task = std::function; Index: clangd/index/Background.cpp =================================================================== --- clangd/index/Background.cpp +++ clangd/index/Background.cpp @@ -115,6 +115,19 @@ }; } +// We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or +// relative to Cmd.Directory, which might not be the same as current working +// directory. +llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { + llvm::SmallString<128> AbsolutePath; + if (llvm::sys::path::is_absolute(Cmd.Filename)) { + AbsolutePath = Cmd.Filename; + } else { + AbsolutePath = Cmd.Directory; + llvm::sys::path::append(AbsolutePath, Cmd.Filename); + } + return AbsolutePath; +} } // namespace BackgroundIndex::BackgroundIndex( @@ -204,40 +217,33 @@ [this, ChangedFiles] { trace::Span Tracer("BackgroundIndexEnqueue"); // We're doing this asynchronously, because we'll read shards here too. - // FIXME: read shards here too. - log("Enqueueing {0} commands for indexing", ChangedFiles.size()); SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size())); - // We shuffle the files because processing them in a random order should - // quickly give us good coverage of headers in the project. - std::vector Permutation(ChangedFiles.size()); - std::iota(Permutation.begin(), Permutation.end(), 0); - std::mt19937 Generator(std::random_device{}()); - std::shuffle(Permutation.begin(), Permutation.end(), Generator); - - for (const unsigned I : Permutation) - enqueue(ChangedFiles[I]); + auto NeedsReIndexing = loadShards(std::move(ChangedFiles)); + // Run indexing for files that needs to be updated. + std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(), + std::mt19937(std::random_device{}())); + for (auto &Elem : NeedsReIndexing) + enqueue(std::move(Elem.first), Elem.second); }, ThreadPriority::Normal); } -void BackgroundIndex::enqueue(const std::string &File) { - ProjectInfo Project; - if (auto Cmd = CDB.getCompileCommand(File, &Project)) { - auto *Storage = IndexStorageFactory(Project.SourceRoot); - // Set priority to low, since background indexing is a long running - // task we do not want to eat up cpu when there are any other high - // priority threads. - enqueueTask(Bind( - [this, File, Storage](tooling::CompileCommand Cmd) { - Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir); - if (auto Error = index(std::move(Cmd), Storage)) - log("Indexing {0} failed: {1}", File, std::move(Error)); - }, - std::move(*Cmd)), - ThreadPriority::Low); - } +void BackgroundIndex::enqueue(tooling::CompileCommand Cmd, + BackgroundIndexStorage *Storage) { + enqueueTask(Bind( + [this, Storage](tooling::CompileCommand Cmd) { + Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir); + // We can't use llvm::StringRef here since we are going to + // move from Cmd during the call below. + const std::string FileName = Cmd.Filename; + if (auto Error = index(std::move(Cmd), Storage)) + elog("Indexing {0} failed: {1}", FileName, + std::move(Error)); + }, + std::move(Cmd)), + ThreadPriority::Low); } void BackgroundIndex::enqueueTask(Task T, ThreadPriority Priority) { @@ -299,22 +305,22 @@ } // Build and store new slabs for each updated file. - for (const auto &F : Files) { - llvm::StringRef Path = F.first(); - vlog("Update symbols in {0}", Path); + for (const auto &I : *Index.Sources) { + std::string Path = URICache.resolve(I.first()); SymbolSlab::Builder Syms; RefSlab::Builder Refs; - for (const auto *S : F.second.Symbols) - Syms.insert(*S); - for (const auto *R : F.second.Refs) - Refs.insert(RefToIDs[R], *R); - + auto FileIt = Files.find(Path); + if (FileIt != Files.end()) { + auto &F = *FileIt; + for (const auto *S : F.second.Symbols) + Syms.insert(*S); + for (const auto *R : F.second.Refs) + Refs.insert(RefToIDs[R], *R); + } auto SS = llvm::make_unique(std::move(Syms).build()); auto RS = llvm::make_unique(std::move(Refs).build()); auto IG = llvm::make_unique( getSubGraph(URI::create(Path), Index.Sources.getValue())); - - auto Hash = FilesToUpdate.lookup(Path); // We need to store shards before updating the index, since the latter // consumes slabs. if (IndexStorage) { @@ -327,13 +333,16 @@ elog("Failed to write background-index shard for file {0}: {1}", Path, std::move(Error)); } - - std::lock_guard Lock(DigestsMu); - // This can override a newer version that is added in another thread, - // if this thread sees the older version but finishes later. This should be - // rare in practice. - IndexedFileDigests[Path] = Hash; - IndexedSymbols.update(Path, std::move(SS), std::move(RS)); + { + std::lock_guard Lock(DigestsMu); + auto Hash = I.second.Digest; + // Skip if file is already up to date. + auto DigestIt = IndexedFileDigests.try_emplace(Path); + if (!DigestIt.second && DigestIt.first->second == Hash) + continue; + DigestIt.first->second = Hash; + IndexedSymbols.update(Path, std::move(SS), std::move(RS)); + } } } @@ -342,11 +351,11 @@ while (true) { { std::unique_lock Lock(IndexMu); - if (ShouldStop) // Avoid waiting if stopped. + if (ShouldStop) // Avoid waiting if stopped. break; // Wait until this is notified to stop or `BuildIndexPeriodMs` has past. IndexCV.wait_for(Lock, std::chrono::milliseconds(BuildIndexPeriodMs)); - if (ShouldStop) // Avoid rebuilding index if stopped. + if (ShouldStop) // Avoid rebuilding index if stopped. break; } if (!SymbolsUpdatedSinceLastIndex.exchange(false)) @@ -365,13 +374,7 @@ BackgroundIndexStorage *IndexStorage) { trace::Span Tracer("BackgroundIndex"); SPAN_ATTACH(Tracer, "file", Cmd.Filename); - llvm::SmallString<128> AbsolutePath; - if (llvm::sys::path::is_absolute(Cmd.Filename)) { - AbsolutePath = Cmd.Filename; - } else { - AbsolutePath = Cmd.Directory; - llvm::sys::path::append(AbsolutePath, Cmd.Filename); - } + auto AbsolutePath = getAbsolutePath(Cmd); auto FS = FSProvider.getFileSystem(); auto Buf = FS->getBufferForFile(AbsolutePath); @@ -383,11 +386,6 @@ llvm::StringMap DigestsSnapshot; { std::lock_guard Lock(DigestsMu); - if (IndexedFileDigests.lookup(AbsolutePath) == Hash) { - vlog("No need to index {0}, already up to date", AbsolutePath); - return llvm::Error::success(); - } - DigestsSnapshot = IndexedFileDigests; } @@ -437,8 +435,8 @@ "IndexingAction failed: has uncompilable errors"); } - assert(Index.Symbols && Index.Refs && Index.Sources - && "Symbols, Refs and Sources must be set."); + assert(Index.Symbols && Index.Refs && Index.Sources && + "Symbols, Refs and Sources must be set."); log("Indexed {0} ({1} symbols, {2} refs, {3} files)", Inputs.CompileCommand.Filename, Index.Symbols->size(), @@ -448,12 +446,6 @@ SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size())); update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage); - { - // Make sure hash for the main file is always updated even if there is no - // index data in it. - std::lock_guard Lock(DigestsMu); - IndexedFileDigests[AbsolutePath] = Hash; - } if (BuildIndexPeriodMs > 0) SymbolsUpdatedSinceLastIndex = true; @@ -464,5 +456,146 @@ return llvm::Error::success(); } +std::vector +BackgroundIndex::loadShard(const tooling::CompileCommand &Cmd, + BackgroundIndexStorage *IndexStorage, + llvm::StringSet<> &LoadedShards) { + struct ShardInfo { + std::string AbsolutePath; + std::unique_ptr Shard; + FileDigest Digest; + }; + std::vector IntermediateSymbols; + // Make sure we don't have duplicate elements in the queue. Keys are absolute + // paths. + llvm::StringSet<> InQueue; + auto FS = FSProvider.getFileSystem(); + // Dependencies of this TU, paired with the information about whether they + // need to be re-indexed or not. + std::vector Dependencies; + std::string AbsolutePath = getAbsolutePath(Cmd).str(); + // Up until we load the shard related to a dependency it needs to be + // re-indexed. + Dependencies.emplace_back(AbsolutePath, true); + InQueue.insert(AbsolutePath); + // Goes over each dependency. + for (size_t CurrentDependency = 0; CurrentDependency < Dependencies.size(); + CurrentDependency++) { + llvm::StringRef CurDependencyPath = Dependencies[CurrentDependency].Path; + // If we have already seen this shard before(either loaded or failed) don't + // re-try again. Since the information in the shard won't change from one TU + // to another. + if (!LoadedShards.try_emplace(CurDependencyPath).second) { + // If the dependency needs to be re-indexed, first occurence would already + // have detected that, so we don't need to issue it again. + Dependencies[CurrentDependency].NeedsReIndexing = false; + continue; + } + + auto Shard = IndexStorage->loadShard(CurDependencyPath); + if (!Shard || !Shard->Sources) { + // File will be returned as requiring re-indexing to caller. + vlog("Failed to load shard: {0}", CurDependencyPath); + continue; + } + // These are the edges in the include graph for current dependency. + for (const auto &I : *Shard->Sources) { + auto U = URI::parse(I.getKey()); + if (!U) + continue; + auto AbsolutePath = URI::resolve(*U, CurDependencyPath); + if (!AbsolutePath) + continue; + // Add file as dependency if haven't seen before. + if (InQueue.try_emplace(*AbsolutePath).second) + Dependencies.emplace_back(*AbsolutePath, true); + // The node contains symbol information only for current file, the rest is + // just edges. + if (*AbsolutePath != CurDependencyPath) + continue; + + // We found source file info for current dependency. + assert(I.getValue().Digest != FileDigest{{0}} && "Digest is empty?"); + ShardInfo SI; + SI.AbsolutePath = CurDependencyPath; + SI.Shard = std::move(Shard); + SI.Digest = I.getValue().Digest; + IntermediateSymbols.push_back(std::move(SI)); + // Check if the source needs re-indexing. + // Get the digest, skip it if file doesn't exist. + auto Buf = FS->getBufferForFile(CurDependencyPath); + if (!Buf) { + elog("Couldn't get buffer for file: {0}: {1}", CurDependencyPath, + Buf.getError().message()); + continue; + } + // If digests match then dependency doesn't need re-indexing. + Dependencies[CurrentDependency].NeedsReIndexing = + digest(Buf->get()->getBuffer()) != I.getValue().Digest; + } + } + // Load shard information into background-index. + { + std::lock_guard Lock(DigestsMu); + // This can override a newer version that is added in another thread, + // if this thread sees the older version but finishes later. This + // should be rare in practice. + for (const ShardInfo &SI : IntermediateSymbols) { + auto SS = + SI.Shard->Symbols + ? llvm::make_unique(std::move(*SI.Shard->Symbols)) + : nullptr; + auto RS = SI.Shard->Refs + ? llvm::make_unique(std::move(*SI.Shard->Refs)) + : nullptr; + IndexedFileDigests[SI.AbsolutePath] = SI.Digest; + IndexedSymbols.update(SI.AbsolutePath, std::move(SS), std::move(RS)); + } + } + + return Dependencies; +} + +// Goes over each changed file and loads them from index. Returns the list of +// TUs that had out-of-date/no shards. +std::vector> +BackgroundIndex::loadShards(std::vector ChangedFiles) { + std::vector> + NeedsReIndexing; + // Keeps track of the files that will be reindexed, to make sure we won't + // re-index same dependencies more than once. Keys are AbsolutePaths. + llvm::StringSet<> FilesToIndex; + // Keeps track of the loaded shards to make sure we don't perform redundant + // disk IO. Keys are absolute paths. + llvm::StringSet<> LoadedShards; + for (const auto &File : ChangedFiles) { + ProjectInfo PI; + auto Cmd = CDB.getCompileCommand(File, &PI); + if (!Cmd) + continue; + BackgroundIndexStorage *IndexStorage = IndexStorageFactory(PI.SourceRoot); + auto Dependencies = loadShard(*Cmd, IndexStorage, LoadedShards); + for (const auto &Dependency : Dependencies) { + if (!Dependency.NeedsReIndexing || FilesToIndex.count(Dependency.Path)) + continue; + // FIXME: Currently, we simply schedule indexing on a TU whenever any of + // its dependencies needs re-indexing. We might do it smarter by figuring + // out a minimal set of TUs that will cover all the stale dependencies. + vlog("Enqueueing TU {0} because its dependency {1} needs re-indexing.", + Cmd->Filename, Dependency.Path); + NeedsReIndexing.push_back({std::move(*Cmd), IndexStorage}); + // Mark all of this TU's dependencies as to-be-indexed so that we won't + // try to re-index those. + for (const auto &Dependency : Dependencies) + FilesToIndex.insert(Dependency.Path); + break; + } + } + vlog("Loaded all shards"); + reset(IndexedSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge)); + + return NeedsReIndexing; +} + } // namespace clangd } // namespace clang Index: test/clangd/background-index.test =================================================================== --- test/clangd/background-index.test +++ test/clangd/background-index.test @@ -16,6 +16,5 @@ # RUN: ls %t/.clangd-index/foo.cpp.*.idx # Test the index is read from disk: delete code and restart clangd. -# FIXME: This test currently fails as we don't read the index yet. # RUN: rm %t/foo.cpp -# RUN: clangd -background-index -lit-test < %t/definition.jsonrpc | not FileCheck %t/definition.jsonrpc +# RUN: clangd -background-index -lit-test < %t/definition.jsonrpc | FileCheck %t/definition.jsonrpc Index: unittests/clangd/BackgroundIndexTests.cpp =================================================================== --- unittests/clangd/BackgroundIndexTests.cpp +++ unittests/clangd/BackgroundIndexTests.cpp @@ -9,6 +9,7 @@ using testing::_; using testing::AllOf; +using testing::Contains; using testing::ElementsAre; using testing::Not; using testing::UnorderedElementsAre; @@ -146,7 +147,7 @@ FileURI("unittest:///root/B.cc")})); } -TEST_F(BackgroundIndexTest, ShardStorageWriteTest) { +TEST_F(BackgroundIndexTest, ShardStorageTest) { MockFSProvider FS; FS.Files[testPath("root/A.h")] = R"cpp( void common(); @@ -175,6 +176,16 @@ EXPECT_EQ(CacheHits, 0U); EXPECT_EQ(Storage.size(), 2U); + { + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex Idx(Context::empty(), "", FS, CDB, + [&](llvm::StringRef) { return &MSS; }); + CDB.setCompileCommand(testPath("root"), Cmd); + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + } + EXPECT_EQ(CacheHits, 2U); // Check both A.cc and A.h loaded from cache. + EXPECT_EQ(Storage.size(), 2U); + auto ShardHeader = MSS.loadShard(testPath("root/A.h")); EXPECT_NE(ShardHeader, nullptr); EXPECT_THAT( @@ -278,5 +289,73 @@ EXPECT_THAT(runFuzzyFind(Idx, ""), ElementsAre(Named("Y"))); } +TEST_F(BackgroundIndexTest, ShardStorageLoad) { + MockFSProvider FS; + FS.Files[testPath("root/A.h")] = R"cpp( + void common(); + void f_b(); + class A_CC {}; + )cpp"; + FS.Files[testPath("root/A.cc")] = + "#include \"A.h\"\nvoid g() { (void)common; }"; + + llvm::StringMap Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + // Check nothing is loaded from Storage, but A.cc and A.h has been stored. + { + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex Idx(Context::empty(), "", FS, CDB, + [&](llvm::StringRef) { return &MSS; }); + CDB.setCompileCommand(testPath("root/A.cc"), Cmd); + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + } + + // Change header. + FS.Files[testPath("root/A.h")] = R"cpp( + void common(); + void f_b(); + class A_CC {}; + class A_CCnew {}; + )cpp"; + { + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex Idx(Context::empty(), "", FS, CDB, + [&](llvm::StringRef) { return &MSS; }); + CDB.setCompileCommand(testPath("root"), Cmd); + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + } + EXPECT_EQ(CacheHits, 2U); // Check both A.cc and A.h loaded from cache. + + // Check if the new symbol has arrived. + auto ShardHeader = MSS.loadShard(testPath("root/A.h")); + EXPECT_NE(ShardHeader, nullptr); + EXPECT_THAT(*ShardHeader->Symbols, Contains(Named("A_CCnew"))); + + // Change source. + FS.Files[testPath("root/A.cc")] = + "#include \"A.h\"\nvoid g() { (void)common; }\nvoid f_b() {}"; + { + CacheHits = 0; + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex Idx(Context::empty(), "", FS, CDB, + [&](llvm::StringRef) { return &MSS; }); + CDB.setCompileCommand(testPath("root"), Cmd); + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + } + EXPECT_EQ(CacheHits, 2U); // Check both A.cc and A.h loaded from cache. + + // Check if the new symbol has arrived. + auto ShardSource = MSS.loadShard(testPath("root/A.cc")); + EXPECT_NE(ShardHeader, nullptr); + EXPECT_THAT(*ShardSource->Symbols, + Contains(AllOf(Named("f_b"), Declared(), Defined()))); +} + } // namespace clangd } // namespace clang