Index: clangd/SourceCode.h =================================================================== --- clangd/SourceCode.h +++ clangd/SourceCode.h @@ -18,6 +18,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/Support/Path.h" #include "llvm/Support/SHA1.h" namespace clang { @@ -91,6 +92,20 @@ const SourceManager &SourceMgr); bool IsRangeConsecutive(const Range &Left, const Range &Right); + +// Handle the symbolic link path case where the current working directory +// (getCurrentWorkingDirectory) is a symlink./ We always want to the real +// file path (instead of the symlink path) for the C++ symbols. +// +// Consider the following example: +// +// src dir: /project/src/foo.h +// current working directory (symlink): /tmp/build -> /project/src/ +// +// The file path of Symbol is "/project/src/foo.h" instead of +// "/tmp/build/foo.h" +std::string makeCanonicalPath(llvm::StringRef AbsolutePath, + const SourceManager &SM); } // namespace clangd } // namespace clang #endif Index: clangd/SourceCode.cpp =================================================================== --- clangd/SourceCode.cpp +++ clangd/SourceCode.cpp @@ -239,5 +239,18 @@ return digest(Content); } +std::string makeCanonicalPath(llvm::StringRef AbsolutePath, + const SourceManager &SM) { + if (const DirectoryEntry *Dir = SM.getFileManager().getDirectory( + llvm::sys::path::parent_path(AbsolutePath.str()))) { + StringRef DirName = SM.getFileManager().getCanonicalName(Dir); + llvm::SmallString<128> AbsoluteFilename; + llvm::sys::path::append(AbsoluteFilename, DirName, + llvm::sys::path::filename(AbsolutePath.str())); + return AbsoluteFilename.str(); + } + return AbsolutePath; +} + } // namespace clangd } // namespace clang Index: clangd/index/Background.h =================================================================== --- clangd/index/Background.h +++ clangd/index/Background.h @@ -74,7 +74,6 @@ // The indexing happens in a background thread, so the symbols will be // available sometime later. void enqueue(const std::vector &ChangedFiles); - void enqueue(const std::string &File); // Cause background threads to stop after ther current task, any remaining // tasks will be discarded. @@ -106,13 +105,26 @@ std::mutex DigestsMu; BackgroundIndexStorage::Factory IndexStorageFactory; + struct Source { + std::string Path; + bool NeedsReIndexing; + }; + // Loads the shards for all the sources that was touched by this Cmd when the + // index was build. Returns the list of sources and whether they need to be + // re-indexed. + std::vector loadShard(const tooling::CompileCommand &Cmd, + BackgroundIndexStorage *IndexStorage, + llvm::StringSet<> &VisitedShards); + llvm::Error loadShards( + std::vector ChangedFiles, + std::vector> + &NeedsReIndexing); + void enqueue(tooling::CompileCommand Cmd, BackgroundIndexStorage *Storage); // queue management using Task = std::function; void run(); // Main loop executed by Thread. Runs tasks from Queue. void enqueueTask(Task T); - void enqueueLocked(tooling::CompileCommand Cmd, - BackgroundIndexStorage *IndexStorage); std::mutex QueueMu; unsigned NumActiveTasks = 0; // Only idle when queue is empty *and* no tasks. std::condition_variable QueueCV; Index: clangd/index/Background.cpp =================================================================== --- clangd/index/Background.cpp +++ clangd/index/Background.cpp @@ -86,6 +86,20 @@ return IG; } + +// We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or +// relative to Cmd.Directory, which might not be the same as current working +// directory. +llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { + llvm::SmallString<128> AbsolutePath; + if (sys::path::is_absolute(Cmd.Filename)) { + AbsolutePath = Cmd.Filename; + } else { + AbsolutePath = Cmd.Directory; + sys::path::append(AbsolutePath, Cmd.Filename); + } + return AbsolutePath; +} } // namespace BackgroundIndex::BackgroundIndex( @@ -163,35 +177,32 @@ enqueueTask([this, ChangedFiles] { trace::Span Tracer("BackgroundIndexEnqueue"); // We're doing this asynchronously, because we'll read shards here too. - // FIXME: read shards here too. - log("Enqueueing {0} commands for indexing", ChangedFiles.size()); SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size())); - // We shuffle the files because processing them in a random order should - // quickly give us good coverage of headers in the project. - std::vector Permutation(ChangedFiles.size()); - std::iota(Permutation.begin(), Permutation.end(), 0); - std::mt19937 Generator(std::random_device{}()); - std::shuffle(Permutation.begin(), Permutation.end(), Generator); + std::vector> + NeedsReIndexing; + if (auto Error = loadShards(std::move(ChangedFiles), NeedsReIndexing)) + elog("Loading shards failed: {0}", std::move(Error)); - for (const unsigned I : Permutation) - enqueue(ChangedFiles[I]); + // Run indexing for files that needs to be updated. + std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(), + std::mt19937(std::random_device{}())); + for (auto &Elem : NeedsReIndexing) + enqueue(std::move(Elem.first), Elem.second); }); } -void BackgroundIndex::enqueue(const std::string &File) { - ProjectInfo Project; - if (auto Cmd = CDB.getCompileCommand(File, &Project)) { - auto *Storage = IndexStorageFactory(Project.SourceRoot); - enqueueTask(Bind( - [this, File, Storage](tooling::CompileCommand Cmd) { - Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir); - if (auto Error = index(std::move(Cmd), Storage)) - log("Indexing {0} failed: {1}", File, std::move(Error)); - }, - std::move(*Cmd))); - } +void BackgroundIndex::enqueue(tooling::CompileCommand Cmd, + BackgroundIndexStorage *Storage) { + enqueueTask(Bind( + [this, Storage](tooling::CompileCommand Cmd) { + Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir); + const std::string FileName = Cmd.Filename; + if (auto Error = index(std::move(Cmd), Storage)) + elog("Indexing {0} failed: {1}", FileName, std::move(Error)); + }, + std::move(Cmd))); } void BackgroundIndex::enqueueTask(Task T) { @@ -243,22 +254,35 @@ } // Build and store new slabs for each updated file. - for (const auto &F : Files) { - StringRef Path = F.first(); + for (const auto &I : *Index.Sources) { + std::string Path = URICache.resolve(I.first()); + { + auto Hash = I.second.Digest; + std::lock_guard Lock(DigestsMu); + // Skip if file is already up to date. + auto DigestIt = IndexedFileDigests.try_emplace(Path); + if (!DigestIt.second && DigestIt.first->second == Hash) + continue; + // This can override a newer version that is added in another thread, + // if this thread sees the older version but finishes later. This should + // be rare in practice. + DigestIt.first->second = Hash; + } vlog("Update symbols in {0}", Path); SymbolSlab::Builder Syms; RefSlab::Builder Refs; - for (const auto *S : F.second.Symbols) - Syms.insert(*S); - for (const auto *R : F.second.Refs) - Refs.insert(RefToIDs[R], *R); - + auto FileIt = Files.find(Path); + if (FileIt != Files.end()) { + auto &F = *FileIt; + for (const auto *S : F.second.Symbols) + Syms.insert(*S); + for (const auto *R : F.second.Refs) + Refs.insert(RefToIDs[R], *R); + } auto SS = llvm::make_unique(std::move(Syms).build()); auto RS = llvm::make_unique(std::move(Refs).build()); auto IG = llvm::make_unique( getSubGraph(URI::create(Path), Index.Sources.getValue())); - - auto Hash = FilesToUpdate.lookup(Path); // We need to store shards before updating the index, since the latter // consumes slabs. if (IndexStorage) { @@ -272,11 +296,6 @@ std::move(Error)); } - std::lock_guard Lock(DigestsMu); - // This can override a newer version that is added in another thread, - // if this thread sees the older version but finishes later. This should be - // rare in practice. - IndexedFileDigests[Path] = Hash; IndexedSymbols.update(Path, std::move(SS), std::move(RS)); } } @@ -300,6 +319,7 @@ elog("Warning: could not make absolute file: {0}", EC.message()); return false; // Skip files without absolute path. } + AbsPath = makeCanonicalPath(AbsPath, SM); sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true); auto Digest = digestFile(SM, FID); if (!Digest) @@ -317,13 +337,7 @@ BackgroundIndexStorage *IndexStorage) { trace::Span Tracer("BackgroundIndex"); SPAN_ATTACH(Tracer, "file", Cmd.Filename); - SmallString<128> AbsolutePath; - if (sys::path::is_absolute(Cmd.Filename)) { - AbsolutePath = Cmd.Filename; - } else { - AbsolutePath = Cmd.Directory; - sys::path::append(AbsolutePath, Cmd.Filename); - } + auto AbsolutePath = getAbsolutePath(Cmd); auto FS = FSProvider.getFileSystem(); auto Buf = FS->getBufferForFile(AbsolutePath); @@ -335,15 +349,10 @@ llvm::StringMap DigestsSnapshot; { std::lock_guard Lock(DigestsMu); - if (IndexedFileDigests.lookup(AbsolutePath) == Hash) { - vlog("No need to index {0}, already up to date", AbsolutePath); - return Error::success(); - } - DigestsSnapshot = IndexedFileDigests; } - log("Indexing {0}", Cmd.Filename, toHex(Hash)); + vlog("Indexing {0}", Cmd.Filename, toHex(Hash)); ParseInputs Inputs; Inputs.FS = std::move(FS); Inputs.FS->setCurrentWorkingDirectory(Cmd.Directory); @@ -390,13 +399,141 @@ SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size())); update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage); - { - // Make sure hash for the main file is always updated even if there is no - // index data in it. - std::lock_guard Lock(DigestsMu); - IndexedFileDigests[AbsolutePath] = Hash; + + // FIXME: this should rebuild once-in-a-while, not after every file. + // At that point we should use Dex, too. + vlog("Rebuilding automatic index"); + reset(IndexedSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge)); + + return Error::success(); +} + +std::vector +BackgroundIndex::loadShard(const tooling::CompileCommand &Cmd, + BackgroundIndexStorage *IndexStorage, + llvm::StringSet<> &VisitedShards) { + // Adds the given shard's contents into BackgroundIndex. + // Consumes Symbols and Refs in Shard. + auto AddShardToIndex = [this](llvm::StringRef AbsolutePath, + IndexFileIn *Shard, + const IncludeGraphNode &IGN) { + if (!Shard) + return; + std::unique_ptr SS; + std::unique_ptr RS; + if (Shard->Symbols) + SS = llvm::make_unique(std::move(*Shard->Symbols)); + if (Shard->Refs) + RS = llvm::make_unique(std::move(*Shard->Refs)); + assert(IGN.Digest != FileDigest{{0}} && "Digest is empty?"); + // Load shard information into background-index. + { + std::lock_guard Lock(DigestsMu); + // This can override a newer version that is added in another thread, + // if this thread sees the older version but finishes later. This + // should be rare in practice. + IndexedFileDigests[AbsolutePath] = IGN.Digest; + IndexedSymbols.update(AbsolutePath, std::move(SS), std::move(RS)); + } + }; + + // Make sure we don't have duplicate elements in the queue. Keys are absolute + // paths. + llvm::StringSet<> InQueue; + auto FS = FSProvider.getFileSystem(); + // Dependencies of this TU, paired with the information about whether they + // need to be re-indexed or not. + std::vector Dependencies; + std::string AbsolutePath = getAbsolutePath(Cmd).str(); + // Up until we load the shard related to a dependency it needs to be + // re-indexed. + Dependencies.push_back({AbsolutePath, true}); + InQueue.insert(AbsolutePath); + // Goes over each dependency. + for (size_t CurrentDependency = 0; CurrentDependency < Dependencies.size(); + CurrentDependency++) { + llvm::StringRef CurDependencyPath = Dependencies[CurrentDependency].Path; + // If we have already seen this shard before(either loaded or failed) don't + // re-try again. Since the information in the shard won't change from one TU + // to another. + if (!VisitedShards.try_emplace(CurDependencyPath).second) { + // If the dependency needs to be re-indexed, first occurence would already + // have detected that, so we don't need to issue it again. + Dependencies[CurrentDependency].NeedsReIndexing = false; + continue; + } + + auto Shard = IndexStorage->loadShard(CurDependencyPath); + if (!Shard || !Shard->Sources) { + vlog("Failed to load shard: {0}", CurDependencyPath); + continue; + } + // These are the edges in the include graph for current dependency. + for (const auto &I : *Shard->Sources) { + if (auto U = URI::parse(I.getKey())) { + if (auto AbsolutePath = URI::resolve(*U, CurDependencyPath)) { + if (*AbsolutePath != CurDependencyPath) { + if (InQueue.try_emplace(*AbsolutePath).second) + Dependencies.push_back({*AbsolutePath, true}); + continue; + } + + // We found source file info for current dependency. + AddShardToIndex(CurDependencyPath, Shard.get(), I.getValue()); + // Check if the source needs re-indexing. + // Get the digest, skip it if file doesn't exist. + auto Buf = FS->getBufferForFile(CurDependencyPath); + if (!Buf) { + elog("Couldn't get buffer for file: {0}: {1}", CurDependencyPath, + Buf.getError().message()); + continue; + } + // If digests match then dependency doesn't need re-indexing. + Dependencies[CurrentDependency].NeedsReIndexing = + digest(Buf->get()->getBuffer()) != I.getValue().Digest; + } + } + } } + return Dependencies; +} + +// Goes over each changed file and loads them from index, in the case of a +// missing or out-dated shard enqueues an indexing operation. +llvm::Error BackgroundIndex::loadShards( + std::vector ChangedFiles, + std::vector> + &NeedsReIndexing) { + // Keeps track of the files that will be reindexed, to make sure we won't + // re-index same dependencies more than once. Keys are AbsolutePaths. + llvm::StringSet<> BeingReindexed; + // Keeps track of the loaded shards to make sure we don't perform redundant + // disk IO. Keys are absolute paths. + llvm::StringSet<> LoadedShards; + for (const auto &File : ChangedFiles) { + ProjectInfo PI; + auto Cmd = CDB.getCompileCommand(File, &PI); + if (!Cmd) + continue; + BackgroundIndexStorage *IndexStorage = IndexStorageFactory(PI.SourceRoot); + auto Dependencies = loadShard(*Cmd, IndexStorage, LoadedShards); + for (const auto &Dependency : Dependencies) { + // FIXME: Currently, we simply schedule indexing on a TU whenever any of + // its dependencies needs re-indexing. We might do it smarter by figuring + // out a minimal set of TUs that will cover all the stale dependencies. + if (Dependency.NeedsReIndexing && + !BeingReindexed.count(Dependency.Path)) { + vlog("Enqueueing {0} for {1}", Cmd->Filename, Dependency.Path); + NeedsReIndexing.push_back({std::move(*Cmd), IndexStorage}); + // Mark all of this TU's dependencies as to-be-indexed so that we won't + // try to re-index those. + for (const auto &Dependency : Dependencies) + BeingReindexed.insert(Dependency.Path); + break; + } + } + } // FIXME: this should rebuild once-in-a-while, not after every file. // At that point we should use Dex, too. vlog("Rebuilding automatic index"); Index: clangd/index/SymbolCollector.cpp =================================================================== --- clangd/index/SymbolCollector.cpp +++ clangd/index/SymbolCollector.cpp @@ -58,29 +58,10 @@ SM.getFileManager().getVirtualFileSystem()->makeAbsolute( AbsolutePath)) log("Warning: could not make absolute file: {0}", EC.message()); - if (sys::path::is_absolute(AbsolutePath)) { - // Handle the symbolic link path case where the current working directory - // (getCurrentWorkingDirectory) is a symlink./ We always want to the real - // file path (instead of the symlink path) for the C++ symbols. - // - // Consider the following example: - // - // src dir: /project/src/foo.h - // current working directory (symlink): /tmp/build -> /project/src/ - // - // The file path of Symbol is "/project/src/foo.h" instead of - // "/tmp/build/foo.h" - if (const DirectoryEntry *Dir = SM.getFileManager().getDirectory( - sys::path::parent_path(AbsolutePath.str()))) { - StringRef DirName = SM.getFileManager().getCanonicalName(Dir); - SmallString<128> AbsoluteFilename; - sys::path::append(AbsoluteFilename, DirName, - sys::path::filename(AbsolutePath.str())); - AbsolutePath = AbsoluteFilename; - } - } else if (!Opts.FallbackDir.empty()) { + if (sys::path::is_absolute(AbsolutePath)) + AbsolutePath = makeCanonicalPath(AbsolutePath, SM); + else if (!Opts.FallbackDir.empty()) sys::fs::make_absolute(Opts.FallbackDir, AbsolutePath); - } sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true); return URI::create(AbsolutePath).toString(); Index: test/clangd/background-index.test =================================================================== --- test/clangd/background-index.test +++ test/clangd/background-index.test @@ -16,6 +16,5 @@ # RUN: ls %t/.clangd-index/foo.cpp.*.idx # Test the index is read from disk: delete code and restart clangd. -# FIXME: This test currently fails as we don't read the index yet. # RUN: rm %t/foo.cpp -# RUN: clangd -background-index -lit-test < %t/definition.jsonrpc | not FileCheck %t/definition.jsonrpc +# RUN: clangd -background-index -lit-test < %t/definition.jsonrpc | FileCheck %t/definition.jsonrpc Index: unittests/clangd/BackgroundIndexTests.cpp =================================================================== --- unittests/clangd/BackgroundIndexTests.cpp +++ unittests/clangd/BackgroundIndexTests.cpp @@ -7,6 +7,7 @@ using testing::_; using testing::AllOf; +using testing::Contains; using testing::Not; using testing::UnorderedElementsAre; @@ -125,7 +126,7 @@ FileURI("unittest:///root/B.cc")})); } -TEST_F(BackgroundIndexTest, ShardStorageWriteTest) { +TEST_F(BackgroundIndexTest, ShardStorageTest) { MockFSProvider FS; FS.Files[testPath("root/A.h")] = R"cpp( void common(); @@ -154,6 +155,16 @@ EXPECT_EQ(CacheHits, 0U); EXPECT_EQ(Storage.size(), 2U); + { + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex Idx(Context::empty(), "", FS, CDB, + [&](llvm::StringRef) { return &MSS; }); + CDB.setCompileCommand(testPath("root"), Cmd); + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + } + EXPECT_EQ(CacheHits, 2U); // Check both A.cc and A.h loaded from cache. + EXPECT_EQ(Storage.size(), 2U); + auto ShardHeader = MSS.loadShard(testPath("root/A.h")); EXPECT_NE(ShardHeader, nullptr); EXPECT_THAT( @@ -221,5 +232,73 @@ EmptyIncludeNode()); } +TEST_F(BackgroundIndexTest, ShardStorageLoad) { + MockFSProvider FS; + FS.Files[testPath("root/A.h")] = R"cpp( + void common(); + void f_b(); + class A_CC {}; + )cpp"; + FS.Files[testPath("root/A.cc")] = + "#include \"A.h\"\nvoid g() { (void)common; }"; + + llvm::StringMap Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + // Check nothing is loaded from Storage, but A.cc and A.h has been stored. + { + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex Idx(Context::empty(), "", FS, CDB, + [&](llvm::StringRef) { return &MSS; }); + CDB.setCompileCommand(testPath("root/A.cc"), Cmd); + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + } + + // Change header. + FS.Files[testPath("root/A.h")] = R"cpp( + void common(); + void f_b(); + class A_CC {}; + class A_CCnew {}; + )cpp"; + { + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex Idx(Context::empty(), "", FS, CDB, + [&](llvm::StringRef) { return &MSS; }); + CDB.setCompileCommand(testPath("root"), Cmd); + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + } + EXPECT_EQ(CacheHits, 2U); // Check both A.cc and A.h loaded from cache. + + // Check if the new symbol has arrived. + auto ShardHeader = MSS.loadShard(testPath("root/A.h")); + EXPECT_NE(ShardHeader, nullptr); + EXPECT_THAT(*ShardHeader->Symbols, Contains(Named("A_CCnew"))); + + // Change source. + FS.Files[testPath("root/A.cc")] = + "#include \"A.h\"\nvoid g() { (void)common; }\nvoid f_b() {}"; + { + CacheHits = 0; + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex Idx(Context::empty(), "", FS, CDB, + [&](llvm::StringRef) { return &MSS; }); + CDB.setCompileCommand(testPath("root"), Cmd); + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + } + EXPECT_EQ(CacheHits, 2U); // Check both A.cc and A.h loaded from cache. + + // Check if the new symbol has arrived. + auto ShardSource = MSS.loadShard(testPath("root/A.cc")); + EXPECT_NE(ShardHeader, nullptr); + EXPECT_THAT(*ShardSource->Symbols, + Contains(AllOf(Named("f_b"), Declared(), Defined()))); +} + } // namespace clangd } // namespace clang