diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -26,27 +26,31 @@ /// the dependency scanning filesystem. /// /// It represents one of the following: -/// - an opened source file with minimized contents and a stat value. -/// - an opened source file with original contents and a stat value. -/// - a directory entry with its stat value. -/// - an error value to represent a file system error. +/// - opened file with original contents and a stat value, +/// - opened file with original contents, minimized contents and a stat value, +/// - directory entry with its stat value, +/// - filesystem error, /// - uninitialized entry with unknown status. class CachedFileSystemEntry { public: /// Creates an uninitialized entry. - CachedFileSystemEntry() : MaybeStat(llvm::vfs::Status()) {} + CachedFileSystemEntry() + : MaybeStat(llvm::vfs::Status()), MinimizedContentsAccess(nullptr) {} /// Initialize the cached file system entry. void init(llvm::ErrorOr &&MaybeStatus, StringRef Filename, - llvm::vfs::FileSystem &FS, bool ShouldMinimize = true); + llvm::vfs::FileSystem &FS); /// Initialize the entry as file with minimized or original contents. /// /// The filesystem opens the file even for `stat` calls open to avoid the /// issues with stat + open of minimized files that might lead to a /// mismatching size of the file. - llvm::ErrorOr - initFile(StringRef Filename, llvm::vfs::FileSystem &FS, bool Minimize = true); + llvm::ErrorOr initFile(StringRef Filename, + llvm::vfs::FileSystem &FS); + + /// Minimize contents of the file. + void minimizeFile(); /// \returns True if the entry is initialized. bool isInitialized() const { @@ -56,13 +60,38 @@ /// \returns True if the current entry points to a directory. bool isDirectory() const { return MaybeStat && MaybeStat->isDirectory(); } - /// \returns The error or the file's contents. - llvm::ErrorOr getContents() const { + /// \returns The error or the file's original contents. + llvm::ErrorOr getOriginalContents() const { + if (!MaybeStat) + return MaybeStat.getError(); + assert(!MaybeStat->isDirectory() && "not a file"); + assert(isInitialized() && "not initialized"); + assert(OriginalContents && "not read"); + return OriginalContents->getBuffer(); + } + + /// \returns The error or the file's minimized contents. + llvm::ErrorOr getMinimizedContents() const { if (!MaybeStat) return MaybeStat.getError(); assert(!MaybeStat->isDirectory() && "not a file"); assert(isInitialized() && "not initialized"); - return Contents->getBuffer(); + llvm::MemoryBuffer *Buffer = MinimizedContentsAccess.load(); + assert(Buffer && "not minimized"); + return Buffer->getBuffer(); + } + + /// \returns True if this entry represents a file that can be read. + bool isReadable() const { return MaybeStat && !MaybeStat->isDirectory(); } + + /// \returns True if this cached entry needs to be updated. + bool needsUpdate(bool ShouldBeMinimized) const { + return isReadable() && needsMinimization(ShouldBeMinimized); + } + + /// \returns True if the contents of this entry need to be minimized. + bool needsMinimization(bool ShouldBeMinimized) const { + return ShouldBeMinimized && !MinimizedContentsAccess.load(); } /// \returns The error or the status of the entry. @@ -83,15 +112,16 @@ return PPSkippedRangeMapping; } - CachedFileSystemEntry(CachedFileSystemEntry &&) = default; - CachedFileSystemEntry &operator=(CachedFileSystemEntry &&) = default; - - CachedFileSystemEntry(const CachedFileSystemEntry &) = delete; - CachedFileSystemEntry &operator=(const CachedFileSystemEntry &) = delete; - private: llvm::ErrorOr MaybeStat; - std::unique_ptr Contents; + std::unique_ptr OriginalContents; + + /// Owning storage for the minimized file contents. + std::unique_ptr MinimizedContentsStorage; + /// Atomic view of the minimized file contents. + /// This prevents data races when multiple threads call `needsMinimization`. + std::atomic MinimizedContentsAccess; + PreprocessorSkippedRangeMapping PPSkippedRangeMapping; }; @@ -108,61 +138,70 @@ CachedFileSystemEntry Value; }; + DependencyScanningFilesystemSharedCache(); + /// Returns a cache entry for the corresponding key. /// /// A new cache entry is created if the key is not in the cache. This is a /// thread safe call. - SharedFileSystemEntry &get(StringRef Key, bool Minimized); + SharedFileSystemEntry &get(StringRef Key); private: - class SingleCache { - public: - SingleCache(); - - SharedFileSystemEntry &get(StringRef Key); - - private: - struct CacheShard { - std::mutex CacheLock; - llvm::StringMap Cache; - }; - std::unique_ptr CacheShards; - unsigned NumShards; + struct CacheShard { + std::mutex CacheLock; + llvm::StringMap Cache; }; - - SingleCache CacheMinimized; - SingleCache CacheOriginal; + std::unique_ptr CacheShards; + unsigned NumShards; }; /// This class is a local cache, that caches the 'stat' and 'open' calls to the /// underlying real file system. It distinguishes between minimized and original /// files. class DependencyScanningFilesystemLocalCache { -private: - using SingleCache = - llvm::StringMap; + llvm::StringMap Cache; - SingleCache CacheMinimized; - SingleCache CacheOriginal; - - SingleCache &selectCache(bool Minimized) { - return Minimized ? CacheMinimized : CacheOriginal; +public: + const CachedFileSystemEntry *getCachedEntry(StringRef Filename) { + return Cache[Filename]; } +}; + +/// Reference to a CachedFileSystemEntry. +/// If the underlying entry is an opened file, this wrapper returns the correct +/// contents (original or minimized) and ensures consistency with file size +/// reported by status. +class EntryRef { + /// For entry that is an opened file, this bit signifies whether its contents + /// are minimized. + bool Minimized; + + /// The underlying cached entry. + const CachedFileSystemEntry *Entry; public: - void setCachedEntry(StringRef Filename, bool Minimized, - const CachedFileSystemEntry *Entry) { - SingleCache &Cache = selectCache(Minimized); - bool IsInserted = Cache.try_emplace(Filename, Entry).second; - (void)IsInserted; - assert(IsInserted && "local cache is updated more than once"); + EntryRef(bool Minimized, const CachedFileSystemEntry *Entry) + : Minimized(Minimized), Entry(Entry) {} + + llvm::ErrorOr getStatus() const { + auto MaybeStat = Entry->getStatus(); + if (!MaybeStat || MaybeStat->isDirectory()) + return MaybeStat; + return llvm::vfs::Status::copyWithNewSize(*MaybeStat, + getContents()->size()); + } + + bool isDirectory() const { return Entry->isDirectory(); } + + StringRef getName() const { return Entry->getName(); } + + llvm::ErrorOr getContents() const { + return Minimized ? Entry->getMinimizedContents() + : Entry->getOriginalContents(); } - const CachedFileSystemEntry *getCachedEntry(StringRef Filename, - bool Minimized) { - SingleCache &Cache = selectCache(Minimized); - auto It = Cache.find(Filename); - return It == Cache.end() ? nullptr : It->getValue(); + const PreprocessorSkippedRangeMapping *getPPSkippedRangeMapping() const { + return Minimized ? &Entry->getPPSkippedRangeMapping() : nullptr; } }; @@ -197,8 +236,7 @@ /// Check whether the file should be minimized. bool shouldMinimize(StringRef Filename); - llvm::ErrorOr - getOrCreateFileSystemEntry(const StringRef Filename); + llvm::ErrorOr getOrCreateFileSystemEntry(StringRef Filename); /// The global cache shared between worker threads. DependencyScanningFilesystemSharedCache &SharedCache; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -17,8 +17,7 @@ using namespace dependencies; llvm::ErrorOr -CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS, - bool Minimize) { +CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) { // Load the file and its content from the file system. llvm::ErrorOr> MaybeFile = FS.openFileForRead(Filename); @@ -34,28 +33,29 @@ if (!MaybeBuffer) return MaybeBuffer.getError(); + OriginalContents = std::move(*MaybeBuffer); + return Stat; +} + +void CachedFileSystemEntry::minimizeFile() { + assert(OriginalContents && "minimizing missing contents"); + llvm::SmallString<1024> MinimizedFileContents; // Minimize the file down to directives that might affect the dependencies. - const auto &Buffer = *MaybeBuffer; SmallVector Tokens; - if (!Minimize || minimizeSourceToDependencyDirectives( - Buffer->getBuffer(), MinimizedFileContents, Tokens)) { - // Use the original file unless requested otherwise, or - // if the minimization failed. - // FIXME: Propage the diagnostic if desired by the client. - Contents = std::move(*MaybeBuffer); - return Stat; + if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(), + MinimizedFileContents, Tokens)) { + // FIXME: Propagate the diagnostic if desired by the client. + // Use the original file if the minimization failed. + MinimizedContentsStorage = + llvm::MemoryBuffer::getMemBuffer(*OriginalContents); + MinimizedContentsAccess.store(MinimizedContentsStorage.get()); + return; } - Stat = llvm::vfs::Status(Stat->getName(), Stat->getUniqueID(), - Stat->getLastModificationTime(), Stat->getUser(), - Stat->getGroup(), MinimizedFileContents.size(), - Stat->getType(), Stat->getPermissions()); // The contents produced by the minimizer must be null terminated. assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' && "not null terminated contents"); - Contents = std::make_unique( - std::move(MinimizedFileContents)); // Compute the skipped PP ranges that speedup skipping over inactive // preprocessor blocks. @@ -74,10 +74,19 @@ Mapping[Range.Offset] = Range.Length; } PPSkippedRangeMapping = std::move(Mapping); - return Stat; + + MinimizedContentsStorage = std::make_unique( + std::move(MinimizedFileContents)); + // The algorithm in `getOrCreateFileSystemEntry` uses the presence of + // minimized contents to decide whether an entry is up-to-date or not. + // If it is up-to-date, the skipped range mappings must be already computed. + // This is why we need to store the minimized contents **after** storing the + // skipped range mappings. Failing to do so would lead to a data race. + MinimizedContentsAccess.store(MinimizedContentsStorage.get()); } -DependencyScanningFilesystemSharedCache::SingleCache::SingleCache() { +DependencyScanningFilesystemSharedCache:: + DependencyScanningFilesystemSharedCache() { // This heuristic was chosen using a empirical testing on a // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache // sharding gives a performance edge by reducing the lock contention. @@ -89,19 +98,13 @@ } DependencyScanningFilesystemSharedCache::SharedFileSystemEntry & -DependencyScanningFilesystemSharedCache::SingleCache::get(StringRef Key) { +DependencyScanningFilesystemSharedCache::get(StringRef Key) { CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards]; std::lock_guard LockGuard(Shard.CacheLock); auto It = Shard.Cache.try_emplace(Key); return It.first->getValue(); } -DependencyScanningFilesystemSharedCache::SharedFileSystemEntry & -DependencyScanningFilesystemSharedCache::get(StringRef Key, bool Minimized) { - SingleCache &Cache = Minimized ? CacheMinimized : CacheOriginal; - return Cache.get(Key); -} - /// Whitelist file extensions that should be minimized, treating no extension as /// a source file that should be minimized. /// @@ -146,28 +149,27 @@ } void CachedFileSystemEntry::init(llvm::ErrorOr &&MaybeStatus, - StringRef Filename, llvm::vfs::FileSystem &FS, - bool ShouldMinimize) { + StringRef Filename, + llvm::vfs::FileSystem &FS) { if (!MaybeStatus || MaybeStatus->isDirectory()) MaybeStat = std::move(MaybeStatus); else - MaybeStat = initFile(Filename, FS, ShouldMinimize); + MaybeStat = initFile(Filename, FS); } -llvm::ErrorOr +llvm::ErrorOr DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( - const StringRef Filename) { - bool ShouldMinimize = shouldMinimize(Filename); + StringRef Filename) { + bool ShouldBeMinimized = shouldMinimize(Filename); - if (const auto *Entry = Cache.getCachedEntry(Filename, ShouldMinimize)) - return Entry; + const auto *Entry = Cache.getCachedEntry(Filename); + if (Entry && !Entry->needsUpdate(ShouldBeMinimized)) + return EntryRef(ShouldBeMinimized, Entry); // FIXME: Handle PCM/PCH files. // FIXME: Handle module map files. - DependencyScanningFilesystemSharedCache::SharedFileSystemEntry - &SharedCacheEntry = SharedCache.get(Filename, ShouldMinimize); - const CachedFileSystemEntry *Result; + auto &SharedCacheEntry = SharedCache.get(Filename); { std::lock_guard LockGuard(SharedCacheEntry.ValueLock); CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value; @@ -180,27 +182,30 @@ // files before building them, and then looks for them again. If we // cache the stat failure, it won't see them the second time. return MaybeStatus.getError(); - CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS(), - ShouldMinimize); + CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS()); } - Result = &CacheEntry; + // Checking `needsUpdate` verifies the entry represents an opened file. + // Only checking `needsMinimization` could lead to minimization of files + // that we failed to load (such files don't have `OriginalContents`). + if (CacheEntry.needsUpdate(ShouldBeMinimized)) + CacheEntry.minimizeFile(); } // Store the result in the local cache. - Cache.setCachedEntry(Filename, ShouldMinimize, Result); - return Result; + Entry = &SharedCacheEntry.Value; + return EntryRef(ShouldBeMinimized, Entry); } llvm::ErrorOr DependencyScanningWorkerFilesystem::status(const Twine &Path) { SmallString<256> OwnedFilename; StringRef Filename = Path.toStringRef(OwnedFilename); - const llvm::ErrorOr Result = - getOrCreateFileSystemEntry(Filename); + + llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename); if (!Result) return Result.getError(); - return (*Result)->getStatus(); + return Result->getStatus(); } namespace { @@ -214,7 +219,7 @@ : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} static llvm::ErrorOr> - create(const CachedFileSystemEntry *Entry, + create(EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings); llvm::ErrorOr status() override { return Stat; } @@ -235,21 +240,22 @@ } // end anonymous namespace llvm::ErrorOr> MinimizedVFSFile::create( - const CachedFileSystemEntry *Entry, - ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) { - if (Entry->isDirectory()) + EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) { + if (Entry.isDirectory()) return llvm::ErrorOr>( std::make_error_code(std::errc::is_a_directory)); - llvm::ErrorOr Contents = Entry->getContents(); + llvm::ErrorOr Contents = Entry.getContents(); if (!Contents) return Contents.getError(); auto Result = std::make_unique( - llvm::MemoryBuffer::getMemBuffer(*Contents, Entry->getName(), + llvm::MemoryBuffer::getMemBuffer(*Contents, Entry.getName(), /*RequiresNullTerminator=*/false), - *Entry->getStatus()); - if (!Entry->getPPSkippedRangeMapping().empty() && PPSkipMappings) - (*PPSkipMappings)[Result->Buffer->getBufferStart()] = - &Entry->getPPSkippedRangeMapping(); + *Entry.getStatus()); + + const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping(); + if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings) + (*PPSkipMappings)[Result->Buffer->getBufferStart()] = EntrySkipMappings; + return llvm::ErrorOr>( std::unique_ptr(std::move(Result))); } @@ -259,8 +265,7 @@ SmallString<256> OwnedFilename; StringRef Filename = Path.toStringRef(OwnedFilename); - const llvm::ErrorOr Result = - getOrCreateFileSystemEntry(Filename); + llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename); if (!Result) return Result.getError(); return MinimizedVFSFile::create(Result.get(), PPSkipMappings); diff --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp --- a/clang/unittests/Tooling/DependencyScannerTest.cpp +++ b/clang/unittests/Tooling/DependencyScannerTest.cpp @@ -205,32 +205,46 @@ } namespace dependencies { -TEST(DependencyScanningFilesystem, IgnoredFilesHaveSeparateCache) { +TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) { auto VFS = llvm::makeIntrusiveRefCnt(); - VFS->addFile("/mod.h", 0, llvm::MemoryBuffer::getMemBuffer("// hi there!\n")); + VFS->addFile("/mod.h", 0, + llvm::MemoryBuffer::getMemBuffer("#include \n" + "// hi there!\n")); DependencyScanningFilesystemSharedCache SharedCache; auto Mappings = std::make_unique(); DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings.get()); + DepFS.enableMinimizationOfAllFiles(); // Let's be explicit for clarity. auto StatusMinimized0 = DepFS.status("/mod.h"); DepFS.disableMinimization("/mod.h"); auto StatusFull1 = DepFS.status("/mod.h"); - DepFS.enableMinimizationOfAllFiles(); - - auto StatusMinimized2 = DepFS.status("/mod.h"); - DepFS.disableMinimization("/mod.h"); - auto StatusFull3 = DepFS.status("/mod.h"); EXPECT_TRUE(StatusMinimized0); - EXPECT_EQ(StatusMinimized0->getSize(), 0u); EXPECT_TRUE(StatusFull1); - EXPECT_EQ(StatusFull1->getSize(), 13u); + EXPECT_EQ(StatusMinimized0->getSize(), 17u); + EXPECT_EQ(StatusFull1->getSize(), 30u); +} + +TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) { + auto VFS = llvm::makeIntrusiveRefCnt(); + VFS->addFile("/mod.h", 0, + llvm::MemoryBuffer::getMemBuffer("#include \n" + "// hi there!\n")); + + DependencyScanningFilesystemSharedCache SharedCache; + auto Mappings = std::make_unique(); + DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings.get()); + + DepFS.disableMinimization("/mod.h"); + auto StatusFull0 = DepFS.status("/mod.h"); + DepFS.enableMinimizationOfAllFiles(); + auto StatusMinimized1 = DepFS.status("/mod.h"); - EXPECT_TRUE(StatusMinimized2); - EXPECT_EQ(StatusMinimized2->getSize(), 0u); - EXPECT_TRUE(StatusFull3); - EXPECT_EQ(StatusFull3->getSize(), 13u); + EXPECT_TRUE(StatusFull0); + EXPECT_TRUE(StatusMinimized1); + EXPECT_EQ(StatusFull0->getSize(), 30u); + EXPECT_EQ(StatusMinimized1->getSize(), 17u); } } // end namespace dependencies diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -64,6 +64,8 @@ uint64_t Size, llvm::sys::fs::file_type Type, llvm::sys::fs::perms Perms); + /// Get a copy of a Status with a different size. + static Status copyWithNewSize(const Status &In, uint64_t NewSize); /// Get a copy of a Status with a different name. static Status copyWithNewName(const Status &In, const Twine &NewName); static Status copyWithNewName(const llvm::sys::fs::file_status &In, diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -75,6 +75,12 @@ : Name(Name.str()), UID(UID), MTime(MTime), User(User), Group(Group), Size(Size), Type(Type), Perms(Perms) {} +Status Status::copyWithNewSize(const Status &In, uint64_t NewSize) { + return Status(In.getName(), In.getUniqueID(), In.getLastModificationTime(), + In.getUser(), In.getGroup(), NewSize, In.getType(), + In.getPermissions()); +} + Status Status::copyWithNewName(const Status &In, const Twine &NewName) { return Status(NewName, In.getUniqueID(), In.getLastModificationTime(), In.getUser(), In.getGroup(), In.getSize(), In.getType(),