diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -22,10 +22,55 @@ namespace tooling { namespace dependencies { +/// Original and minimized contents of a cached file entry. Can be shared +/// between multiple instances of `CachedFileSystemEntry`. +struct CachedFileContents { + CachedFileContents() : OriginalAccess(nullptr), MinimizedAccess(nullptr) {} + + /// Read the original contents of the file. + /// + /// The filesystem opens the file even for `stat` calls open to avoid the + /// issues with stat + open of minimized files that might lead to a + /// mismatching size of the file. + llvm::ErrorOr read(StringRef Filename, + llvm::vfs::FileSystem &FS); + + /// Minimize the original contents. + void minimize(); + + /// \returns True if the contents needs to be updated. + bool needsUpdate(bool ShouldBeMinimized) const { + return needsRead() || needsMinimization(ShouldBeMinimized); + } + + /// \returns True if the original contents need to be read from the underlying + /// filesystem. + bool needsRead() const { return !OriginalAccess.load(); } + + /// \returns True if the contents need to be minimized. + bool needsMinimization(bool ShouldBeMinimized) const { + return ShouldBeMinimized && !MinimizedAccess.load(); + } + + std::mutex ValueLock; + + /// Owning storage for file contents. Always mutated under locked `ValueLock`. + std::unique_ptr OriginalStorage; + std::unique_ptr MinimizedStorage; + + /// Atomic view of the file contents. This prevents data races when multiple + /// threads call `needsRead` or `needsMinimization`. + std::atomic OriginalAccess; + std::atomic MinimizedAccess; + + PreprocessorSkippedRangeMapping PPSkippedRangeMapping; +}; + /// An in-memory representation of a file system entity that is of interest to /// the dependency scanning filesystem. /// /// It represents one of the following: +/// - opened file with no contents (yet) and a stat value, /// - opened file with original contents and a stat value, /// - opened file with original contents, minimized contents and a stat value, /// - directory entry with its stat value, @@ -35,11 +80,12 @@ public: /// Creates an uninitialized entry. CachedFileSystemEntry() - : MaybeStat(llvm::vfs::Status()), MinimizedContentsAccess(nullptr) {} + : MaybeStat(llvm::vfs::Status()), ContentsAccess(nullptr) {} /// Initialize the cached file system entry. - void init(llvm::ErrorOr &&MaybeStatus, StringRef Filename, - llvm::vfs::FileSystem &FS); + void init(llvm::ErrorOr &&MaybeStatus) { + MaybeStat = std::move(MaybeStatus); + } /// Initialize the entry as file with minimized or original contents. /// @@ -66,8 +112,11 @@ return MaybeStat.getError(); assert(!MaybeStat->isDirectory() && "not a file"); assert(isInitialized() && "not initialized"); - assert(OriginalContents && "not read"); - return OriginalContents->getBuffer(); + CachedFileContents *Contents = ContentsAccess.load(); + assert(Contents && "contents member not initialized"); + llvm::MemoryBuffer *Buffer = Contents->OriginalAccess.load(); + assert(Buffer && "not read"); + return Buffer->getBuffer(); } /// \returns The error or the file's minimized contents. @@ -76,22 +125,35 @@ return MaybeStat.getError(); assert(!MaybeStat->isDirectory() && "not a file"); assert(isInitialized() && "not initialized"); - llvm::MemoryBuffer *Buffer = MinimizedContentsAccess.load(); + CachedFileContents *Contents = ContentsAccess.load(); + assert(Contents && "contents member not initialized"); + llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load(); assert(Buffer && "not minimized"); return Buffer->getBuffer(); } + /// Get the contents associated with this entry. If none is present, create + /// one with the given factory function. + CachedFileContents *getOrCreateContents( + llvm::function_ref Create) { + CachedFileContents *Contents = ContentsAccess.load(); + if (!Contents) { + Contents = Create(); + ContentsAccess.store(Contents); + } + return Contents; + } + /// \returns True if this entry represents a file that can be read. bool isReadable() const { return MaybeStat && !MaybeStat->isDirectory(); } /// \returns True if this cached entry needs to be updated. bool needsUpdate(bool ShouldBeMinimized) const { - return isReadable() && needsMinimization(ShouldBeMinimized); - } - - /// \returns True if the contents of this entry need to be minimized. - bool needsMinimization(bool ShouldBeMinimized) const { - return ShouldBeMinimized && !MinimizedContentsAccess.load(); + if (isReadable()) { + CachedFileContents *Contents = ContentsAccess.load(); + return !Contents || Contents->needsUpdate(ShouldBeMinimized); + } + return false; } /// \returns The error or the status of the entry. @@ -106,23 +168,27 @@ return MaybeStat->getName(); } + /// \returns The unique ID of the entry. + llvm::sys::fs::UniqueID getUniqueID() const { + assert(isInitialized() && "not initialized"); + assert(MaybeStat && "not valid directory entry"); + return MaybeStat->getUniqueID(); + } + /// Return the mapping between location -> distance that is used to speed up /// the block skipping in the preprocessor. const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const { - return PPSkippedRangeMapping; + CachedFileContents *Contents = std::atomic_load(&ContentsAccess); + assert(Contents && "not minimized"); + return Contents->PPSkippedRangeMapping; } private: llvm::ErrorOr MaybeStat; - std::unique_ptr OriginalContents; - - /// Owning storage for the minimized file contents. - std::unique_ptr MinimizedContentsStorage; - /// Atomic view of the minimized file contents. - /// This prevents data races when multiple threads call `needsMinimization`. - std::atomic MinimizedContentsAccess; - PreprocessorSkippedRangeMapping PPSkippedRangeMapping; + /// Non-owning pointer to the file contents. This can be shared between + /// multiple instances of this class (e.g. between a symlink and its target). + std::atomic ContentsAccess; }; /// This class is a shared cache, that caches the 'stat' and 'open' calls to the @@ -140,16 +206,24 @@ DependencyScanningFilesystemSharedCache(); - /// Returns a cache entry for the corresponding key. - /// - /// A new cache entry is created if the key is not in the cache. This is a - /// thread safe call. - SharedFileSystemEntry &get(StringRef Key); + /// Returns a cache entry for the corresponding path. + /// New cache entry is created if the path is not in the cache. This is + /// a thread safe call. + SharedFileSystemEntry &getEntry(StringRef Path); + + /// Returns cached contents for the given file. + /// Entry is created in the cache and returned in case contents of file with + /// the given UID were not read yet. This is a thread safe call. + CachedFileContents &getFileContents(StringRef Filename, + llvm::sys::fs::UniqueID UID); private: struct CacheShard { std::mutex CacheLock; llvm::StringMap Cache; + + llvm::SpecificBumpPtrAllocator ContentsAllocator; + llvm::DenseMap ContentsCache; }; std::unique_ptr CacheShards; unsigned NumShards; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -17,7 +17,9 @@ using namespace dependencies; llvm::ErrorOr -CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) { +CachedFileContents::read(StringRef Filename, llvm::vfs::FileSystem &FS) { + assert(!OriginalAccess.load() && "reading file for the second time"); + // Load the file and its content from the file system. llvm::ErrorOr> MaybeFile = FS.openFileForRead(Filename); @@ -33,32 +35,39 @@ if (!MaybeBuffer) return MaybeBuffer.getError(); - OriginalContents = std::move(*MaybeBuffer); + OriginalStorage = std::move(*MaybeBuffer); + OriginalAccess.store(OriginalStorage.get()); return Stat; } -void CachedFileSystemEntry::minimizeFile() { - assert(OriginalContents && "minimizing missing contents"); +void CachedFileContents::minimize() { + auto *Original = OriginalAccess.load(); + // FIXME: This should really be an assert. We should make it one once we + // tackle the big FIXME before `CachedFileContents::read` call in + // `DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry`. + if (!Original) + return; + + assert(!MinimizedAccess.load() && "minimizing file for the second time"); llvm::SmallString<1024> MinimizedFileContents; // Minimize the file down to directives that might affect the dependencies. SmallVector Tokens; - if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(), + if (minimizeSourceToDependencyDirectives(Original->getBuffer(), MinimizedFileContents, Tokens)) { // FIXME: Propagate the diagnostic if desired by the client. // Use the original file if the minimization failed. - MinimizedContentsStorage = - llvm::MemoryBuffer::getMemBuffer(*OriginalContents); - MinimizedContentsAccess.store(MinimizedContentsStorage.get()); + MinimizedStorage = llvm::MemoryBuffer::getMemBuffer(*Original); + MinimizedAccess.store(MinimizedStorage.get()); return; } // The contents produced by the minimizer must be null terminated. assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' && "not null terminated contents"); - MinimizedContentsStorage = std::make_unique( + MinimizedStorage = std::make_unique( std::move(MinimizedFileContents)); - MinimizedContentsAccess.store(MinimizedContentsStorage.get()); + MinimizedAccess.store(MinimizedStorage.get()); // Compute the skipped PP ranges that speedup skipping over inactive // preprocessor blocks. @@ -92,11 +101,20 @@ } DependencyScanningFilesystemSharedCache::SharedFileSystemEntry & -DependencyScanningFilesystemSharedCache::get(StringRef Key) { +DependencyScanningFilesystemSharedCache::getEntry(StringRef Key) { CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards]; std::lock_guard LockGuard(Shard.CacheLock); - auto It = Shard.Cache.try_emplace(Key); - return It.first->getValue(); + return Shard.Cache[Key]; +} + +CachedFileContents &DependencyScanningFilesystemSharedCache::getFileContents( + StringRef Filename, llvm::sys::fs::UniqueID UID) { + CacheShard &Shard = CacheShards[llvm::hash_value(Filename) % NumShards]; + std::lock_guard LockGuard(Shard.CacheLock); + CachedFileContents *&Value = Shard.ContentsCache[UID]; + if (!Value) + Value = new (Shard.ContentsAllocator.Allocate()) CachedFileContents; + return *Value; } /// Whitelist file extensions that should be minimized, treating no extension as @@ -142,15 +160,6 @@ return !NotToBeMinimized.contains(Filename); } -void CachedFileSystemEntry::init(llvm::ErrorOr &&MaybeStatus, - StringRef Filename, - llvm::vfs::FileSystem &FS) { - if (!MaybeStatus || MaybeStatus->isDirectory()) - MaybeStat = std::move(MaybeStatus); - else - MaybeStat = initFile(Filename, FS); -} - llvm::ErrorOr DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( StringRef Filename) { @@ -163,7 +172,7 @@ // FIXME: Handle PCM/PCH files. // FIXME: Handle module map files. - auto &SharedCacheEntry = SharedCache.get(Filename); + auto &SharedCacheEntry = SharedCache.getEntry(Filename); { std::lock_guard LockGuard(SharedCacheEntry.ValueLock); CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value; @@ -176,14 +185,35 @@ // files before building them, and then looks for them again. If we // cache the stat failure, it won't see them the second time. return MaybeStatus.getError(); - CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS()); + CacheEntry.init(std::move(MaybeStatus)); } - // Checking `needsUpdate` verifies the entry represents an opened file. - // Only checking `needsMinimization` could lead to minimization of files - // that we failed to load (such files don't have `OriginalContents`). - if (CacheEntry.needsUpdate(ShouldBeMinimized)) - CacheEntry.minimizeFile(); + if (CacheEntry.isReadable()) { + auto *Contents = CacheEntry.getOrCreateContents([&]() { + return &SharedCache.getFileContents(Filename, CacheEntry.getUniqueID()); + }); + + if (Contents->needsUpdate(ShouldBeMinimized)) { + std::lock_guard ContentLockGuard(Contents->ValueLock); + + if (Contents->needsRead()) + // FIXME: This read can fail. + // In that case, we should probably update `CacheEntry::MaybeStat`. + // However, that is concurrently read at the start of this function + // (`needsUpdate` -> `isReadable`), leading to a data race. If we try + // to avoid the data race by returning an `error_code` here (without + // updating the entry stat value, we can end up attempting to read the + // file again in the future, which breaks the consistency guarantees + // we want to provide in presence of FS changes. (We would fail for + // the first time and potentially succeed for the second time.) The + // correct solution would be to keep an `AttemptedRead` bit, set it to + // `true` on the first read and prevent future reads of the same file. + Contents->read(CacheEntry.getName(), getUnderlyingFS()); + + if (Contents->needsMinimization(ShouldBeMinimized)) + Contents->minimize(); + } + } } // Store the result in the local cache.