diff --git a/clang-tools-extra/clangd/ConfigProvider.h b/clang-tools-extra/clangd/ConfigProvider.h --- a/clang-tools-extra/clangd/ConfigProvider.h +++ b/clang-tools-extra/clangd/ConfigProvider.h @@ -38,8 +38,9 @@ llvm::StringRef Path; /// Hint that stale data is OK to improve performance (e.g. avoid IO). /// FreshTime sets a bound for how old the data can be. - /// If not set, providers should validate caches against the data source. - llvm::Optional FreshTime; + /// By default, providers should validate caches against the data source. + std::chrono::steady_clock::time_point FreshTime = + std::chrono::steady_clock::time_point::max(); }; /// Used to report problems in parsing or interpreting a config. diff --git a/clang-tools-extra/clangd/ConfigProvider.cpp b/clang-tools-extra/clangd/ConfigProvider.cpp --- a/clang-tools-extra/clangd/ConfigProvider.cpp +++ b/clang-tools-extra/clangd/ConfigProvider.cpp @@ -9,6 +9,7 @@ #include "ConfigProvider.h" #include "Config.h" #include "ConfigFragment.h" +#include "support/FileCache.h" #include "support/ThreadsafeFS.h" #include "support/Trace.h" #include "llvm/ADT/ScopeExit.h" @@ -24,89 +25,28 @@ namespace config { // Threadsafe cache around reading a YAML config file from disk. -class FileConfigCache { - std::mutex Mu; - std::chrono::steady_clock::time_point ValidTime = {}; - llvm::SmallVector CachedValue; - llvm::sys::TimePoint<> MTime = {}; - unsigned Size = -1; - - // Called once we are sure we want to read the file. - // REQUIRES: Cache keys are set. Mutex must be held. - void fillCacheFromDisk(llvm::vfs::FileSystem &FS, DiagnosticCallback DC) { - CachedValue.clear(); - - auto Buf = FS.getBufferForFile(Path); - // If we failed to read (but stat succeeded), don't cache failure. - if (!Buf) { - Size = -1; - MTime = {}; - return; - } - - // If file changed between stat and open, we don't know its mtime. - // For simplicity, don't cache the value in this case (use a bad key). - if (Buf->get()->getBufferSize() != Size) { - Size = -1; - MTime = {}; - } - - // Finally parse and compile the actual fragments. - for (auto &Fragment : - Fragment::parseYAML(Buf->get()->getBuffer(), Path, DC)) { - Fragment.Source.Directory = Directory; - CachedValue.push_back(std::move(Fragment).compile(DC)); - } - } - -public: - // Must be set before the cache is used. Not a constructor param to allow - // computing ancestor-relative paths to be deferred. - std::string Path; - // Directory associated with this fragment. +class FileConfigCache : public FileCache { + mutable llvm::SmallVector CachedValue; std::string Directory; - // Retrieves up-to-date config fragments from disk. - // A cached result may be reused if the mtime and size are unchanged. - // (But several concurrent read()s can miss the cache after a single change). - // Future performance ideas: - // - allow caches to be reused based on short elapsed walltime - // - allow latency-sensitive operations to skip revalidating the cache - void read(const ThreadsafeFS &TFS, DiagnosticCallback DC, - llvm::Optional FreshTime, - std::vector &Out) { - std::lock_guard Lock(Mu); - // We're going to update the cache and return whatever's in it. - auto Return = llvm::make_scope_exit( - [&] { llvm::copy(CachedValue, std::back_inserter(Out)); }); - - // Return any sufficiently recent result without doing any further work. - if (FreshTime && ValidTime >= FreshTime) - return; - - // Ensure we bump the ValidTime at the end to allow for reuse. - auto MarkTime = llvm::make_scope_exit( - [&] { ValidTime = std::chrono::steady_clock::now(); }); - - // Stat is cheaper than opening the file, it's usually unchanged. - assert(llvm::sys::path::is_absolute(Path)); - auto FS = TFS.view(/*CWD=*/llvm::None); - auto Stat = FS->status(Path); - // If there's no file, the result is empty. Ensure we have an invalid key. - if (!Stat || !Stat->isRegularFile()) { - MTime = {}; - Size = -1; - CachedValue.clear(); - return; - } - // If the modified-time and size match, assume the content does too. - if (Size == Stat->getSize() && MTime == Stat->getLastModificationTime()) - return; - - // OK, the file has actually changed. Update cache key, compute new value. - Size = Stat->getSize(); - MTime = Stat->getLastModificationTime(); - fillCacheFromDisk(*FS, DC); +public: + FileConfigCache(llvm::StringRef Path, llvm::StringRef Directory) + : FileCache(Path), Directory(Directory) {} + + void get(const ThreadsafeFS &TFS, DiagnosticCallback DC, + std::chrono::steady_clock::time_point FreshTime, + std::vector &Out) const { + read( + TFS, FreshTime, + [&](llvm::Optional Data) { + CachedValue.clear(); + if (Data) + for (auto &Fragment : Fragment::parseYAML(*Data, path(), DC)) { + Fragment.Source.Directory = Directory; + CachedValue.push_back(std::move(Fragment).compile(DC)); + } + }, + [&]() { llvm::copy(CachedValue, std::back_inserter(Out)); }); } }; @@ -120,17 +60,15 @@ std::vector getFragments(const Params &P, DiagnosticCallback DC) const override { std::vector Result; - Cache.read(FS, DC, P.FreshTime, Result); + Cache.get(FS, DC, P.FreshTime, Result); return Result; }; public: AbsFileProvider(llvm::StringRef Path, llvm::StringRef Directory, const ThreadsafeFS &FS) - : FS(FS) { + : Cache(Path, Directory), FS(FS) { assert(llvm::sys::path::is_absolute(Path)); - Cache.Path = Path.str(); - Cache.Directory = Directory.str(); } }; @@ -174,23 +112,21 @@ { std::lock_guard Lock(Mu); for (llvm::StringRef Ancestor : Ancestors) { - auto R = Cache.try_emplace(Ancestor); + auto It = Cache.find(Ancestor); // Assemble the actual config file path only once. - if (R.second) { + if (It == Cache.end()) { llvm::SmallString<256> ConfigPath = Ancestor; path::append(ConfigPath, RelPath); - R.first->second.Path = ConfigPath.str().str(); - R.first->second.Directory = Ancestor.str(); + It = Cache.try_emplace(Ancestor, ConfigPath.str(), Ancestor).first; } - Caches.push_back(&R.first->second); + Caches.push_back(&It->second); } } // Finally query each individual file. // This will take a (per-file) lock for each file that actually exists. std::vector Result; - for (FileConfigCache *Cache : Caches) { - Cache->read(FS, DC, P.FreshTime, Result); - } + for (FileConfigCache *Cache : Caches) + Cache->get(FS, DC, P.FreshTime, Result); return Result; }; diff --git a/clang-tools-extra/clangd/support/CMakeLists.txt b/clang-tools-extra/clangd/support/CMakeLists.txt --- a/clang-tools-extra/clangd/support/CMakeLists.txt +++ b/clang-tools-extra/clangd/support/CMakeLists.txt @@ -19,6 +19,7 @@ add_clang_library(clangdSupport Cancellation.cpp Context.cpp + FileCache.cpp Logger.cpp Markup.cpp MemoryTree.cpp diff --git a/clang-tools-extra/clangd/support/FileCache.h b/clang-tools-extra/clangd/support/FileCache.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/support/FileCache.h @@ -0,0 +1,81 @@ +//===--- FileCache.h - Revalidating cache of data from disk ------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H + +#include "Path.h" +#include "ThreadsafeFS.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/Chrono.h" +#include "llvm/Support/VirtualFileSystem.h" +#include + +namespace clang { +namespace clangd { + +/// Base class for threadsafe cache of data read from a file on disk. +/// +/// We want configuration files to be "live" as much as possible. +/// Reading them every time is simplest, but caching solves a few problems: +/// - reading and parsing is cheap but not free (and happens on hot paths) +/// - we can ignore invalid data and use the old value (we may see truncated +/// compile_commands.json from non-atomic writers) +/// - we avoid reporting the same errors repeatedly +/// +/// We still read and parse the data synchronously on demand, but skip as much +/// work as possible: +/// - if not enough wall-time has elapsed, assume the data is still up-to-date +/// - if we stat the file and it has the same mtime + size, don't read it +/// - obviously we only have to parse when we re-read the file +/// (Tracking OS change events is an alternative, but difficult to do portably.) +/// +/// Caches for particular data (e.g. compilation databases) should inherit and: +/// - add mutable storage for the cached parsed data +/// - add a public interface implemented on top of read() +class FileCache { +protected: + // Path must be absolute. + FileCache(PathRef Path); + + // Updates the cached value if needed, then provides threadsafe access to it. + // + // Specifically: + // - Parse() may be called (if the cache was not up-to-date) + // The lock is held, so cache storage may be safely written. + // Parse(None) means the file doesn't exist. + // - Read() will always be called, to provide access to the value. + // The lock is again held, so the value can be copied or used. + // + // If the last Parse is newer than FreshTime, we don't check metadata. + // - time_point::min() means we only do IO if we never read the file before + // - time_point::max() means we always at least stat the file + // - steady_clock::now() + seconds(1) means we accept 1 second of staleness + void read(const ThreadsafeFS &TFS, + std::chrono::steady_clock::time_point FreshTime, + llvm::function_ref)> Parse, + llvm::function_ref Read) const; + + PathRef path() const { return Path; } + +private: + std::string Path; + // Members are mutable so read() can present a const interface. + // (It is threadsafe and approximates read-through to TFS). + mutable std::mutex Mu; + // Time when the cache was known valid (reflected disk state). + mutable std::chrono::steady_clock::time_point ValidTime; + // Filesystem metadata corresponding to the currently cached data. + mutable std::chrono::system_clock::time_point ModifiedTime; + mutable uint64_t Size; +}; + +} // namespace clangd +} // namespace clang + +#endif diff --git a/clang-tools-extra/clangd/support/FileCache.cpp b/clang-tools-extra/clangd/support/FileCache.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/support/FileCache.cpp @@ -0,0 +1,80 @@ +//===--- FileCache.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "support/FileCache.h" + +namespace clang { +namespace clangd { + +// Sentinel values for the Size cache key. In both cases, a successful stat of +// the file will never result in the cached value being reused. + +// The cached value does not reflect the current content on disk. +static constexpr uint64_t CacheDiskMismatch = + std::numeric_limits::max(); +// The cached value reflects that the file doesn't exist. +static constexpr uint64_t FileNotFound = -2; + +FileCache::FileCache(llvm::StringRef Path) + : Path(Path), ValidTime(std::chrono::steady_clock::time_point::min()), + ModifiedTime(), Size(CacheDiskMismatch) { + assert(llvm::sys::path::is_absolute(Path)); +} + +void FileCache::read( + const ThreadsafeFS &TFS, std::chrono::steady_clock::time_point FreshTime, + llvm::function_ref)> Parse, + llvm::function_ref Read) const { + + std::lock_guard Lock(Mu); + // We're going to update the cache and return whatever's in it. + auto Return = llvm::make_scope_exit(Read); + + // Return any sufficiently recent result without doing any further work. + if (ValidTime > FreshTime) + return; + + // Ensure we always bump ValidTime, so that FreshTime imposes a hard limit on + // how often we do IO. + auto BumpValidTime = llvm::make_scope_exit( + [&] { ValidTime = std::chrono::steady_clock::now(); }); + + // stat is cheaper than opening the file. It's usually unchanged. + assert(llvm::sys::path::is_absolute(Path)); + auto FS = TFS.view(/*CWD=*/llvm::None); + auto Stat = FS->status(Path); + if (!Stat || !Stat->isRegularFile()) { + if (Size != FileNotFound) // Allow "not found" value to be cached. + Parse(llvm::None); + // Ensure the cache key won't match any future stat(). + Size = FileNotFound; + return; + } + // If the modified-time and size match, assume the content does too. + if (Size == Stat->getSize() && + ModifiedTime == Stat->getLastModificationTime()) + return; + + // OK, the file has actually changed. Update cache key, compute new value. + Size = Stat->getSize(); + ModifiedTime = Stat->getLastModificationTime(); + // Now read the file from disk. + if (auto Buf = FS->getBufferForFile(Path)) { + Parse(Buf->get()->getBuffer()); + // Result is cacheable if the actual read size matches the new cache key. + // (We can't update the cache key, because we don't know the new mtime). + if (Buf->get()->getBufferSize() != Size) + Size = CacheDiskMismatch; + } else { + // File was unreadable. Keep the old value and try again next time. + Size = CacheDiskMismatch; + } +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp --- a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp @@ -151,6 +151,7 @@ EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("bar", "baz")); } +// FIXME: delete this test, it's covered by FileCacheTests. TEST(ProviderTest, Staleness) { MockFS FS; diff --git a/clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp b/clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp @@ -0,0 +1,83 @@ +//===-- FileCacheTests.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "support/FileCache.h" + +#include "TestFS.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include + +namespace clang { +namespace clangd { +namespace config { +namespace { + +class TestCache : public FileCache { + MockFS FS; + mutable std::string Value; + +public: + TestCache() : FileCache(testPath("foo.cc")) {} + + void setContents(const char *C) { + if (C) + FS.Files[testPath("foo.cc")] = C; + else + FS.Files.erase(testPath("foo.cc")); + } + + std::string get(std::chrono::steady_clock::time_point FreshTime, + bool ExpectParse) const { + bool GotParse = false; + bool GotRead; + std::string Result; + read( + FS, FreshTime, + [&](llvm::Optional Data) { + GotParse = true; + Value = Data.getValueOr("").str(); + }, + [&]() { + GotRead = true; + Result = Value; + }); + EXPECT_EQ(GotParse, ExpectParse); + EXPECT_TRUE(GotRead); + return Result; + } +}; + +TEST(FileCacheTest, Invalidation) { + TestCache C; + + auto StaleOK = std::chrono::steady_clock::now(); + auto MustBeFresh = StaleOK + std::chrono::hours(1); + + C.setContents("a"); + EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/true)) << "Parsed first time"; + EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)"; + EXPECT_EQ("a", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (stat)"; + C.setContents("bb"); + EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)"; + EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed"; + EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Cached (stat)"; + C.setContents(nullptr); + EXPECT_EQ("bb", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)"; + EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Stat failed"; + EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (404)"; + C.setContents("bb"); // Match the previous stat values! + EXPECT_EQ("", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)"; + EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed"; +} + +} // namespace +} // namespace config +} // namespace clangd +} // namespace clang