diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -256,6 +256,11 @@ "test module file extension '%0' has different version (%1.%2) than expected " "(%3.%4)">; +def err_missing_vfs_stat_cache_file : Error< + "stat cache file '%0' not found">, DefaultFatal; +def err_invalid_vfs_stat_cache : Error< + "invalid stat cache file '%0'">, DefaultFatal; + def err_missing_vfs_overlay_file : Error< "virtual filesystem overlay file '%0' not found">, DefaultFatal; def err_invalid_vfs_overlay : Error< diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3357,6 +3357,8 @@ HelpText<"Add directory to SYSTEM include search path, " "absolute paths are relative to -isysroot">, MetaVarName<"">, Flags<[CC1Option]>; +def ivfsstatcache : JoinedOrSeparate<["-"], "ivfsstatcache">, Group, Flags<[CC1Option]>, + HelpText<"Use the stat data cached in file instead of doing filesystem syscalls. See clang-stat-cache utility.">; def ivfsoverlay : JoinedOrSeparate<["-"], "ivfsoverlay">, Group, Flags<[CC1Option]>, HelpText<"Overlay the virtual filesystem described by file over the real file system">; def imultilib : Separate<["-"], "imultilib">, Group; diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -296,6 +296,7 @@ IntrusiveRefCntPtr createVFSFromOverlayFiles(ArrayRef VFSOverlayFiles, + ArrayRef VFSStatCacheFiles, DiagnosticsEngine &Diags, IntrusiveRefCntPtr BaseFS); diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h --- a/clang/include/clang/Lex/HeaderSearchOptions.h +++ b/clang/include/clang/Lex/HeaderSearchOptions.h @@ -181,6 +181,9 @@ /// of computing the module hash. llvm::SmallSetVector ModulesIgnoreMacros; + /// The set of user-provided stat cache files. + std::vector VFSStatCacheFiles; + /// The set of user-provided virtual filesystem overlay files. std::vector VFSOverlayFiles; @@ -250,6 +253,10 @@ SystemHeaderPrefixes.emplace_back(Prefix, IsSystemHeader); } + void AddVFSStatCacheFile(StringRef Name) { + VFSStatCacheFiles.push_back(std::string(Name)); + } + void AddVFSOverlayFile(StringRef Name) { VFSOverlayFiles.push_back(std::string(Name)); } diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -574,7 +574,7 @@ // performs the initialization too late (once both target and language // options are read). PP.getFileManager().setVirtualFileSystem(createVFSFromOverlayFiles( - HSOpts.VFSOverlayFiles, PP.getDiagnostics(), + HSOpts.VFSOverlayFiles, HSOpts.VFSStatCacheFiles, PP.getDiagnostics(), PP.getFileManager().getVirtualFileSystemPtr())); InitializedHeaderSearchPaths = true; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -83,6 +83,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/StatCacheFileSystem.h" #include "llvm/Support/VersionTuple.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -3084,6 +3085,9 @@ GenerateArg(Args, Opt, P.Prefix, SA); } + for (const std::string &F : Opts.VFSStatCacheFiles) + GenerateArg(Args, OPT_ivfsstatcache, F, SA); + for (const std::string &F : Opts.VFSOverlayFiles) GenerateArg(Args, OPT_ivfsoverlay, F, SA); } @@ -3217,6 +3221,9 @@ Opts.AddSystemHeaderPrefix( A->getValue(), A->getOption().matches(OPT_system_header_prefix)); + for (const auto *A : Args.filtered(OPT_ivfsstatcache)) + Opts.AddVFSStatCacheFile(A->getValue()); + for (const auto *A : Args.filtered(OPT_ivfsoverlay)) Opts.AddVFSOverlayFile(A->getValue()); @@ -4747,12 +4754,31 @@ const CompilerInvocation &CI, DiagnosticsEngine &Diags, IntrusiveRefCntPtr BaseFS) { return createVFSFromOverlayFiles(CI.getHeaderSearchOpts().VFSOverlayFiles, + CI.getHeaderSearchOpts().VFSStatCacheFiles, Diags, std::move(BaseFS)); } IntrusiveRefCntPtr clang::createVFSFromOverlayFiles( - ArrayRef VFSOverlayFiles, DiagnosticsEngine &Diags, + ArrayRef VFSOverlayFiles, + ArrayRef VFSStatCacheFiles, DiagnosticsEngine &Diags, IntrusiveRefCntPtr BaseFS) { + for (const auto &File : VFSStatCacheFiles) { + llvm::ErrorOr> Buffer = + BaseFS->getBufferForFile(File); + if (!Buffer) { + Diags.Report(diag::err_missing_vfs_stat_cache_file) << File; + continue; + } + + auto StatCache = + llvm::vfs::StatCacheFileSystem::create(std::move(*Buffer), BaseFS); + + if (errorToBool(StatCache.takeError())) + Diags.Report(diag::err_invalid_vfs_stat_cache) << File; + else + BaseFS = std::move(*StatCache); + } + if (VFSOverlayFiles.empty()) return BaseFS; diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -71,6 +71,7 @@ clang-refactor clang-diff clang-scan-deps + clang-stat-cache diagtool hmaptool ) diff --git a/clang/test/Driver/vfsstatcache.c b/clang/test/Driver/vfsstatcache.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/vfsstatcache.c @@ -0,0 +1,5 @@ +// RUN: %clang -ivfsstatcache foo.h -### %s 2>&1 | FileCheck %s +// CHECK: "-ivfsstatcache" "foo.h" + +// RUN: not %clang -ivfsstatcache foo.h %s 2>&1 | FileCheck -check-prefix=CHECK-MISSING %s +// CHECK-MISSING: stat cache file 'foo.h' not found diff --git a/clang/test/clang-stat-cache/cache-effects.c b/clang/test/clang-stat-cache/cache-effects.c new file mode 100644 --- /dev/null +++ b/clang/test/clang-stat-cache/cache-effects.c @@ -0,0 +1,63 @@ +#include "foo.h" + +// Testing the effects of a cache is tricky, because it's just supposed to speed +// things up, not change the behavior. In this test, we are using an outdated +// cache to trick HeaderSearch into finding the wrong module and show that it is +// being used. + +// Clear the module cache. +// RUN: rm -rf %t +// RUN: mkdir -p %t/Inputs +// RUN: mkdir -p %t/Inputs/Foo1 +// RUN: mkdir -p %t/Inputs/Foo2 +// RUN: mkdir -p %t/modules-to-compare + +// === +// Create a Foo module in the Foo1 direcotry. +// RUN: echo 'void meow(void);' > %t/Inputs/Foo1/foo.h +// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo1/module.map + +// === +// Compile the module. Note that the compiler has 2 header search paths: +// Foo2 and Foo1 in that order. The module has been created in Foo1, and +// it is the only version available now. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -Rmodule-build %s 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-before.pcm + +// === +// Create a stat cache for our inputs directory +// RUN: clang-stat-cache %t/Inputs -o %t/stat.cache + +// === +// As a sanity check, re-run the same compilation with the cache and check that +// the module does not change. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache %s -Rmodule-build 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm + +// RUN: diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm + +// === +// Now introduce a different Foo module in the Foo2 directory which is before +// Foo1 in the search paths. +// RUN: echo 'void meow2(void);' > %t/Inputs/Foo2/foo.h +// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo2/module.map + +// === +// Because we're using the (now-outdated) stat cache, this compilation +// should still be using the first module. It will not see the new one +// which is earlier in the search paths. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build -Rmodule-import %s 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm + +// RUN: diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm + +// === +// Regenerate the stat cache for our Inputs directory +// RUN: clang-stat-cache -f %t/Inputs -o %t/stat.cache 2>&1 + +// === +// Use the module and now see that we are recompiling the new one. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build %s 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm + +// RUN: not diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm diff --git a/clang/test/clang-stat-cache/errors.test b/clang/test/clang-stat-cache/errors.test new file mode 100644 --- /dev/null +++ b/clang/test/clang-stat-cache/errors.test @@ -0,0 +1,42 @@ +RUN: rm -rf %t +RUN: mkdir -p %t + +RUN: not clang-stat-cache %t/not-there -o %t/stat.cache 2>&1 | FileCheck --check-prefix=NO-SUCH-DIR %s +NO-SUCH-DIR: Failed to stat the target directory: {{[Nn]}}o such file or directory + +RUN: not clang-stat-cache %t -o %t/not-there/stat.cache 2>&1 | FileCheck --check-prefix=NO-SUCH-FILE %s +NO-SUCH-FILE: Failed to open cache file: '{{.*}}': {{[Nn]}}o such file or directory + +# Use mixed-case directories to exercise the case insensitive implementation. +RUN: mkdir -p %t/Dir +RUN: mkdir -p %t/Dir2 + +# Try to overwrite a few invalid caches +RUN: echo "Not a stat cache" > %t/stat.cache +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s +RUN: echo "Not a stat cache, but bigger than the stat cache header" > %t/stat.cache +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s +RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > %t/stat.cache +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s + +INVALID-CACHE: The output cache file exists and is not a valid stat cache. Aborting. + +# Test the force flag +RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > %t/stat.cache +RUN: clang-stat-cache %t/Dir -f -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE-FORCE %s +INVALID-CACHE-FORCE: The output cache file exists and is not a valid stat cache. Forced update. + +# Generate a valid cache for dir +RUN: rm %t/stat.cache +RUN: clang-stat-cache %t/Dir -o %t/stat.cache +RUN: cp %t/stat.cache %t/stat.cache.save + +# Try with same base direcotry but with extraneous separators +RUN: clang-stat-cache %t/Dir/// -v -o %t/stat.cache | FileCheck --check-prefix=EXTRA-SEP %s +EXTRA-SEP-NOT: Existing cache has different directory. Regenerating... +EXTRA-SEP: Cache up-to-date, exiting + +# Rewrite the cache with a different base directory +RUN: clang-stat-cache %t/Dir2 -o %t/stat.cache 2>&1 | FileCheck --check-prefix=OTHER-DIR %s +OTHER-DIR: Existing cache has different directory. Regenerating... + diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt --- a/clang/tools/CMakeLists.txt +++ b/clang/tools/CMakeLists.txt @@ -15,6 +15,7 @@ if(HAVE_CLANG_REPL_SUPPORT) add_clang_subdirectory(clang-repl) endif() +add_clang_subdirectory(clang-stat-cache) add_clang_subdirectory(c-index-test) diff --git a/clang/tools/clang-stat-cache/CMakeLists.txt b/clang/tools/clang-stat-cache/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang/tools/clang-stat-cache/CMakeLists.txt @@ -0,0 +1,19 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + ) + +add_clang_tool(clang-stat-cache + clang-stat-cache.cpp + ) + +if(APPLE) +set(CLANG_STAT_CACHE_LIB_DEPS + "-framework CoreServices" + ) +endif() + +clang_target_link_libraries(clang-stat-cache + PRIVATE + ${CLANG_STAT_CACHE_LIB_DEPS} + ) diff --git a/clang/tools/clang-stat-cache/clang-stat-cache.cpp b/clang/tools/clang-stat-cache/clang-stat-cache.cpp new file mode 100644 --- /dev/null +++ b/clang/tools/clang-stat-cache/clang-stat-cache.cpp @@ -0,0 +1,318 @@ +//===- clang-stat-cache.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StatCacheFileSystem.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" + +#include + +#ifdef __APPLE__ +#include + +#include +#include +#endif // __APPLE__ + +// The clang-stat-cache utility creates an on-disk cache for the stat data +// of a file-system tree which is expected to be immutable during a build. + +using namespace llvm; +using llvm::vfs::StatCacheFileSystem; + +cl::OptionCategory StatCacheCategory("clang-stat-cache options"); + +cl::opt OutputFilename("o", cl::Required, + cl::desc("Specify output filename"), + cl::value_desc("filename"), + cl::cat(StatCacheCategory)); + +cl::opt TargetDirectory(cl::Positional, cl::Required, + cl::value_desc("dirname"), + cl::cat(StatCacheCategory)); + +cl::opt Verbose("v", cl::desc("More verbose output")); +cl::opt Force("f", cl::desc("Force cache generation")); + +#if __APPLE__ +// Used by checkContentsValidity. See below. +struct CallbackInfo { + bool SeenChanges = false; +}; + +// Used by checkContentsValidity. See below. +static void FSEventsCallback(ConstFSEventStreamRef streamRef, void *CtxInfo, + size_t numEvents, void *eventPaths, + const FSEventStreamEventFlags *eventFlags, + const FSEventStreamEventId *eventIds) { + CallbackInfo *Info = static_cast(CtxInfo); + for (size_t i = 0; i < numEvents; ++i) { + // The kFSEventStreamEventFlagHistoryDone is set on the last 'historical' + // event passed to the callback. This means it is passed after the callback + // all the relevant activity between the StartEvent of the stream and the + // point the stream was created. + // If the callback didn't see any other event, it means there haven't been + // any alterations to the target directory hierarchy and the cache contents + // is still up-to-date. + if (eventFlags[i] & kFSEventStreamEventFlagHistoryDone) { + // Let's stop the main queue and go back to our non-queue code. + CFRunLoopStop(CFRunLoopGetCurrent()); + break; + } + + // If we see any event outisde of the kFSEventStreamEventFlagHistoryDone + // one, there have been changes to the target directory. + Info->SeenChanges = true; + } +} + +// FSEvents-based check for cache contents validity. We store the latest +// FSEventStreamEventId in the cache as a ValidityToken and check if any +// file system events affected the base directory since the cache was +// generated. +static bool checkContentsValidity(uint64_t &ValidityToken) { + CFStringRef TargetDir = CFStringCreateWithCStringNoCopy( + kCFAllocatorDefault, TargetDirectory.c_str(), kCFStringEncodingASCII, + kCFAllocatorNull); + CFArrayRef PathsToWatch = + CFArrayCreate(nullptr, (const void **)&TargetDir, 1, nullptr); + CallbackInfo Info; + FSEventStreamContext Ctx = {0, &Info, nullptr, nullptr, nullptr}; + FSEventStreamRef Stream; + CFAbsoluteTime Latency = 0; // Latency in seconds. Do not wait. + + // Start at the latest event stored in the cache. + FSEventStreamEventId StartEvent = ValidityToken; + // Update the Validity token with the current latest event. + ValidityToken = FSEventsGetCurrentEventId(); + + // Create the stream + Stream = + FSEventStreamCreate(NULL, &FSEventsCallback, &Ctx, PathsToWatch, + StartEvent, Latency, kFSEventStreamCreateFlagNone); + + // Associate the stream with the main queue. + FSEventStreamSetDispatchQueue(Stream, dispatch_get_main_queue()); + // Start the stream (needs the queue to run to do anything). + if (!FSEventStreamStart(Stream)) { + errs() << "Failed to create FS event stream. " + << "Considering the cache up-to-date.\n"; + return true; + } + + // Start the main queue. It will be exited by our callback when it got + // confirmed it processed all events. + CFRunLoopRun(); + + return !Info.SeenChanges; +} + +#else // __APPLE__ + +// There is no cross-platform way to implement a validity check. If this +// platform doesn't support it, just consider the cache contents always +// valid. When that's the case, the tool running cache generation needs +// to have the knowledge to do it only when needed. +static bool checkContentsValidity(uint64_t &ValidityToken) { return true; } + +#endif // __APPLE__ + +// Populate Generator with the stat cache data for the filesystem tree +// rooted at BasePath. +static std::error_code +populateHashTable(StringRef BasePath, + StatCacheFileSystem::StatCacheWriter &Generator) { + using namespace llvm; + using namespace sys::fs; + + std::error_code ErrorCode; + + // Just loop over the target directory using a recursive iterator. + // This invocation follows symlinks, so we are going to potentially + // store the status of the same file multiple times with different + // names. + for (recursive_directory_iterator I(BasePath, ErrorCode), E; + I != E && !ErrorCode; I.increment(ErrorCode)) { + StringRef Path = I->path(); + sys::fs::file_status s; + // This can fail (broken symlink) and leave the file_status with + // its default values. The reader knows this. + status(Path, s); + + Generator.addEntry(Path, s); + } + + return ErrorCode; +} + +static bool checkCacheValid(int FD, raw_fd_ostream &Out, + uint64_t &ValidityToken) { + sys::fs::file_status Status; + auto EC = sys::fs::status(FD, Status); + if (EC) { + llvm::errs() << "fstat failed: " + << llvm::toString(llvm::errorCodeToError(EC)) << "\n"; + return false; + } + + auto Size = Status.getSize(); + if (Size == 0) { + // New file. +#ifdef __APPLE__ + // Get the current (global) FSEvent id and use this as ValidityToken. + ValidityToken = FSEventsGetCurrentEventId(); +#endif + return false; + } + + auto ErrorOrBuffer = MemoryBuffer::getOpenFile( + sys::fs::convertFDToNativeFile(FD), OutputFilename, Status.getSize()); + + // Refuse to write to this cache file if it exists but its contents do + // not look like a valid cache file. + StringRef BaseDir; + bool IsCaseSensitive; + bool VersionMatch; + if (auto E = StatCacheFileSystem::validateCacheFile( + (*ErrorOrBuffer)->getMemBufferRef(), BaseDir, IsCaseSensitive, + VersionMatch, ValidityToken)) { + llvm::errs() << "The output cache file exists and is not a valid stat " + "cache."; + if (!Force) { + llvm::errs() << " Aborting.\n"; + exit(1); + } + + consumeError(std::move(E)); + llvm::errs() << " Forced update.\n"; + return false; + } + + if (BaseDir != TargetDirectory && + (IsCaseSensitive || !BaseDir.equals_insensitive(TargetDirectory))) { + llvm::errs() << "Existing cache has different directory. Regenerating...\n"; + return false; + } + + if (!VersionMatch) { + llvm::errs() + << "Exisitng cache has different version number. Regenerating...\n"; + return false; + } + + // Basic structure checks have passed. Lets see if we can prove that the cache + // contents are still valid. + bool IsValid = checkContentsValidity(ValidityToken); + if (IsValid) { + // The cache is valid, but we might have gotten an updated ValidityToken. + // Update the cache with it as clang-stat-cache is just going to exit after + // returning from this function. + StatCacheFileSystem::updateValidityToken(Out, ValidityToken); + } + return IsValid && !Force; +} + +int main(int argc, char *argv[]) { + cl::ParseCommandLineOptions(argc, argv); + + llvm::SmallString<128> CanonicalDirectory = StringRef(TargetDirectory); + + // Remove extraneous separators from the end of the basename. + while (!CanonicalDirectory.empty() && + sys::path::is_separator(CanonicalDirectory.back())) + CanonicalDirectory.pop_back(); + // Canonicalize separators on Windows + llvm::sys::path::make_preferred(CanonicalDirectory); + TargetDirectory = std::string(CanonicalDirectory); + + StringRef Dirname(TargetDirectory); + + std::error_code EC; + int FD; + EC = sys::fs::openFileForReadWrite( + OutputFilename, FD, llvm::sys::fs::CD_OpenAlways, llvm::sys::fs::OF_None); + if (EC) { + llvm::errs() << "Failed to open cache file: " + << toString(llvm::createFileError(OutputFilename, EC)) << "\n"; + return 1; + } + + raw_fd_ostream Out(FD, /* ShouldClose=*/true); + + uint64_t ValidityToken = 0; + // Check if the cache is valid and up-to-date. + if (checkCacheValid(FD, Out, ValidityToken)) { + if (Verbose) + outs() << "Cache up-to-date, exiting\n"; + return 0; + } + + if (Verbose) + outs() << "Building a stat cache for '" << TargetDirectory << "' into '" + << OutputFilename << "'\n"; + + // Do not generate a cache for NFS. Iterating huge directory hierarchies + // over NFS will be very slow. Better to let the compiler search only the + // pieces that it needs than use a cache that takes ages to populate. + bool IsLocal; + EC = sys::fs::is_local(Dirname, IsLocal); + if (EC) { + errs() << "Failed to stat the target directory: " + << llvm::toString(llvm::errorCodeToError(EC)) << "\n"; + return 1; + } + + if (!IsLocal && !Force) { + errs() << "Target directory is not a local filesystem. " + << "Not populating the cache.\n"; + return 0; + } + + sys::fs::file_status BaseDirStatus; + if (std::error_code EC = status(Dirname, BaseDirStatus)) { + errs() << "Failed to stat the target directory: " + << llvm::toString(llvm::errorCodeToError(EC)) << "\n"; + return 1; + } + + // Check if the filesystem hosting the target directory is case sensitive. + bool IsCaseSensitive = true; +#ifdef _PC_CASE_SENSITIVE + IsCaseSensitive = + ::pathconf(TargetDirectory.c_str(), _PC_CASE_SENSITIVE) == 1; +#endif + StatCacheFileSystem::StatCacheWriter Generator( + Dirname, BaseDirStatus, IsCaseSensitive, ValidityToken); + + // Populate the cache. + auto startTime = llvm::TimeRecord::getCurrentTime(); + populateHashTable(Dirname, Generator); + auto duration = llvm::TimeRecord::getCurrentTime(); + duration -= startTime; + + if (Verbose) + errs() << "populateHashTable took: " << duration.getWallTime() << "s\n"; + + // Write the cache to disk. + startTime = llvm::TimeRecord::getCurrentTime(); + int Size = Generator.writeStatCache(Out); + duration = llvm::TimeRecord::getCurrentTime(); + duration -= startTime; + + if (Verbose) + errs() << "writeStatCache took: " << duration.getWallTime() << "s\n"; + + // We might have opened a pre-exising cache which was bigger. + llvm::sys::fs::resize_file(FD, Size); + + return 0; +} diff --git a/llvm/include/llvm/Support/StatCacheFileSystem.h b/llvm/include/llvm/Support/StatCacheFileSystem.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Support/StatCacheFileSystem.h @@ -0,0 +1,110 @@ +//===- StatCacheFileSystem.h - Status Caching Proxy File System -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_STATCACHEFILESYSTEM_H +#define LLVM_SUPPORT_STATCACHEFILESYSTEM_H + +#include "llvm/Support/VirtualFileSystem.h" + +#include + +namespace llvm { +template class OnDiskIterableChainedHashTable; +template class OnDiskChainedHashTableGenerator; + +namespace vfs { + +/// A ProxyFileSystem using cached information for status() rather than going to +/// the underlying filesystem. +/// +/// When dealing with a huge tree of (mostly) immutable filesystem content +/// like an SDK, it can be very costly to ask the underlying filesystem for +/// `stat` data. Even when caching the `stat`s internally, having many +/// concurrent Clangs accessing the same tree in a similar way causes +/// contention. As SDK files are mostly immutable, we can pre-compute the status +/// information using clang-stat-cache and use that information directly without +/// accessing the real filesystem until Clang needs to open a file. This can +/// speed up module verification and HeaderSearch by significant amounts. +class StatCacheFileSystem : public llvm::vfs::ProxyFileSystem { + class StatCacheLookupInfo; + using StatCacheType = + llvm::OnDiskIterableChainedHashTable; + + class StatCacheGenerationInfo; + using StatCacheGeneratorType = + llvm::OnDiskChainedHashTableGenerator; + + explicit StatCacheFileSystem(std::unique_ptr CacheFile, + IntrusiveRefCntPtr FS, + bool IsCaseSensitive); + +public: + /// Create a StatCacheFileSystem from the passed \a CacheBuffer, a + /// MemoryBuffer representing the contents of the \a CacheFilename file. The + /// returned filesystem will be overlaid on top of \a FS. + static Expected> + create(std::unique_ptr CacheBuffer, + IntrusiveRefCntPtr FS); + + /// The status override which will consult the cache if \a Path is in the + /// cached filesystem tree. + llvm::ErrorOr status(const Twine &Path) override; + +public: + /// A helper class to generate stat caches. + class StatCacheWriter { + llvm::SmallString<128> BaseDir; + bool IsCaseSensitive; + uint64_t ValidityToken; + StatCacheGeneratorType *Generator; + std::list PathStorage; + + public: + /// Create a StatCacheWriter + /// + /// \param BaseDir The base directory for the path. Every filename passed to + /// addEntry() needs to start with this base directory. + /// \param Status The status entry for the base directory. + /// \param IsCaseSensitive Whether the cache is case sensitive. + /// \param ValidityToken A 64 bits token that gets embedded in the cache and + /// can be used by generator tools to check for the + /// cache validity in a platform-specific way. + StatCacheWriter(StringRef BaseDir, const sys::fs::file_status &Status, + bool IsCaseSensitive, uint64_t ValidityToken = 0); + ~StatCacheWriter(); + + /// Add a cache entry storing \a Status for the file at \a Path. + void addEntry(StringRef Path, const sys::fs::file_status &Status); + + /// Write the cache file to \a Out. + size_t writeStatCache(raw_fd_ostream &Out); + }; + +public: + /// Validate that the file content in \a Buffer is a valid stat cache file. + /// \a BaseDir, \a IsCaseSensitive and \a ValidityToken are output parameters + /// that get populated by this call. + static Error validateCacheFile(llvm::MemoryBufferRef Buffer, + StringRef &BaseDir, bool &IsCaseSensitive, + bool &VersionMatch, uint64_t &ValidityToken); + + /// Update the ValidityToken data in \a CacheFile. + static void updateValidityToken(raw_fd_ostream &CacheFile, + uint64_t ValidityToken); + +private: + std::unique_ptr StatCacheFile; + llvm::StringRef StatCachePrefix; + std::unique_ptr StatCache; + bool IsCaseSensitive = true; +}; + +} // namespace vfs +} // namespace llvm + +#endif // LLVM_SUPPORT_STATCACHEFILESYSTEM_H diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -212,6 +212,7 @@ SmallVector.cpp SourceMgr.cpp SpecialCaseList.cpp + StatCacheFileSystem.cpp Statistic.cpp StringExtras.cpp StringMap.cpp diff --git a/llvm/lib/Support/StatCacheFileSystem.cpp b/llvm/lib/Support/StatCacheFileSystem.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Support/StatCacheFileSystem.cpp @@ -0,0 +1,306 @@ +//===- StatCacheFileSystem.cpp - Status Caching Proxy File System ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/StatCacheFileSystem.h" + +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/OnDiskHashTable.h" + +namespace llvm { +namespace vfs { + +class StatCacheFileSystem::StatCacheLookupInfo { +public: + typedef StringRef external_key_type; + typedef StringRef internal_key_type; + typedef llvm::sys::fs::file_status data_type; + typedef uint32_t hash_value_type; + typedef uint32_t offset_type; + + static bool EqualKey(const internal_key_type &a, const internal_key_type &b) { + return a == b; + } + + static hash_value_type ComputeHash(const internal_key_type &a) { + return hash_value(a); + } + + static std::pair + ReadKeyDataLength(const unsigned char *&d) { + using namespace llvm::support; + unsigned KeyLen = endian::readNext(d); + unsigned DataLen = endian::readNext(d); + return std::make_pair(KeyLen, DataLen); + } + + static const internal_key_type &GetInternalKey(const external_key_type &x) { + return x; + } + + static const external_key_type &GetExternalKey(const internal_key_type &x) { + return x; + } + + static internal_key_type ReadKey(const unsigned char *d, unsigned n) { + return StringRef((const char *)d, n); + } + + static data_type ReadData(const internal_key_type &k, const unsigned char *d, + unsigned DataLen) { + data_type Result; + memcpy(&Result, d, sizeof(Result)); + return Result; + } +}; + +class StatCacheFileSystem::StatCacheGenerationInfo { +public: + typedef StringRef key_type; + typedef const StringRef &key_type_ref; + typedef sys::fs::file_status data_type; + typedef const sys::fs::file_status &data_type_ref; + typedef uint32_t hash_value_type; + typedef uint32_t offset_type; + + /// Calculate the hash for Key + static hash_value_type ComputeHash(key_type_ref Key) { + return static_cast(hash_value(Key)); + } + + /// Return the lengths, in bytes, of the given Key/Data pair. + static std::pair + EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data) { + using namespace llvm::support; + endian::Writer LE(Out, little); + unsigned KeyLen = Key.size(); + unsigned DataLen = sizeof(Data); + LE.write(KeyLen); + LE.write(DataLen); + return std::make_pair(KeyLen, DataLen); + } + + static void EmitKey(raw_ostream &Out, key_type_ref Key, unsigned KeyLen) { + Out.write(Key.data(), KeyLen); + } + + /// Write Data to Out. DataLen is the length from EmitKeyDataLength. + static void EmitData(raw_ostream &Out, key_type_ref Key, data_type_ref Data, + unsigned Len) { + Out.write((const char *)&Data, Len); + } + + static bool EqualKey(key_type_ref Key1, key_type_ref Key2) { + return Key1 == Key2; + } +}; + +// The format of the stat cache is (pseudo-code): +// struct stat_cache { +// char Magic[4]; // "STAT" or "Stat" +// uint32_t BucketOffset; // See BucketOffset in OnDiskHashTable.h +// uint64_t ValidityToken; // Platofrm specific data allowing to check +// // whether the cache is up-to-date. +// uint32_t Version; // The stat cache format version. +// char BaseDir[N]; // Zero terminated path to the base directory +// < OnDiskHashtable Data > // Data for the has table. The keys are the +// // relative paths under BaseDir. The data is +// // llvm::sys::fs::file_status structures. +// }; + +#define MAGIC_CASE_SENSITIVE "Stat" +#define MAGIC_CASE_INSENSITIVE "STAT" +#define STAT_CACHE_VERSION 1 + +namespace { +struct StatCacheHeader { + char Magic[4]; + uint32_t BucketOffset; + uint64_t ValidityToken; + uint32_t Version; + char BaseDir[1]; +}; +} // namespace + +StatCacheFileSystem::StatCacheFileSystem( + std::unique_ptr CacheFile, IntrusiveRefCntPtr FS, + bool IsCaseSensitive) + : ProxyFileSystem(std::move(FS)), StatCacheFile(std::move(CacheFile)), + IsCaseSensitive(IsCaseSensitive) { + const char *CacheFileStart = StatCacheFile->getBufferStart(); + auto *Header = reinterpret_cast(CacheFileStart); + + uint32_t BucketOffset = Header->BucketOffset; + StatCachePrefix = StringRef(Header->BaseDir); + // HashTableStart points at the beginning of the data emitted by the + // OnDiskHashTable. + const unsigned char *HashTableStart = (const unsigned char *)CacheFileStart + + StatCachePrefix.size() + + sizeof(StatCacheHeader); + StatCache.reset(StatCacheType::Create( + (const unsigned char *)CacheFileStart + BucketOffset, HashTableStart, + (const unsigned char *)CacheFileStart)); +} + +Expected> +StatCacheFileSystem::create(std::unique_ptr CacheBuffer, + IntrusiveRefCntPtr FS) { + StringRef BaseDir; + bool IsCaseSensitive; + bool VersionMatch; + uint64_t ValidityToken; + if (auto E = validateCacheFile(*CacheBuffer, BaseDir, IsCaseSensitive, + VersionMatch, ValidityToken)) + return E; + if (!VersionMatch) { + return createStringError(inconvertibleErrorCode(), + CacheBuffer->getBufferIdentifier() + + ": Mismatched cache file version"); + } + return new StatCacheFileSystem(std::move(CacheBuffer), FS, IsCaseSensitive); +} + +ErrorOr StatCacheFileSystem::status(const Twine &Path) { + SmallString<180> StringPath; + Path.toVector(StringPath); + // If the cache is not case sensitive, do all operations on lower-cased paths. + if (!IsCaseSensitive) + std::transform(StringPath.begin(), StringPath.end(), StringPath.begin(), + toLower); + + // Canonicalize the path. This removes single dot path components, + // but it also gets rid of repeating separators. + llvm::sys::path::remove_dots(StringPath); + + // If on Windows, canonicalize separators. + llvm::sys::path::make_preferred(StringPath); + + // Check if the requested path falls into the cache. + StringRef SuffixPath(StringPath); + if (!SuffixPath.consume_front(StatCachePrefix)) + return ProxyFileSystem::status(Path); + + auto It = StatCache->find(SuffixPath); + if (It == StatCache->end()) { + // We didn't find the file in the cache even though it started with the + // cache prefix. It could be that the file doesn't exist, or the spelling + // the path is different. `remove_dots` canonicalizes the path by removing + // `.` and excess separators, but leaves `..` since it isn't semantically + // preserving to remove them in the presence of symlinks. If the path + // does not contain '..' we can safely say it doesn't exist. + if (std::find(sys::path::begin(SuffixPath), sys::path::end(SuffixPath), + "..") == sys::path::end(SuffixPath)) { + return llvm::errc::no_such_file_or_directory; + } + return ProxyFileSystem::status(Path); + } + + // clang-stat-cache will record entries for broken symlnks with a default- + // constructed Status. This will have a default-constructed UinqueID. + if ((*It).getUniqueID() == llvm::sys::fs::UniqueID()) + return llvm::errc::no_such_file_or_directory; + + return llvm::vfs::Status::copyWithNewName(*It, Path); +} + +StatCacheFileSystem::StatCacheWriter::StatCacheWriter( + StringRef BaseDir, const sys::fs::file_status &Status, bool IsCaseSensitive, + uint64_t ValidityToken) + : BaseDir(IsCaseSensitive ? BaseDir.str() : BaseDir.lower()), + IsCaseSensitive(IsCaseSensitive), ValidityToken(ValidityToken), + Generator(new StatCacheGeneratorType()) { + addEntry(BaseDir, Status); + // If on Windows, canonicalize separators. + llvm::sys::path::make_preferred(this->BaseDir); +} + +StatCacheFileSystem::StatCacheWriter::~StatCacheWriter() { delete Generator; } + +void StatCacheFileSystem::StatCacheWriter::addEntry( + StringRef Path, const sys::fs::file_status &Status) { + llvm::SmallString<128> StoredPath; + +#if defined(_WIN32) + StoredPath = Path; + llvm::sys::path::make_preferred(StoredPath); + Path = StoredPath; +#endif + + if (!IsCaseSensitive) { + StoredPath = Path.lower(); + Path = StoredPath; + } + + LLVM_ATTRIBUTE_UNUSED bool Consumed = Path.consume_front(BaseDir); + assert(Consumed && "Path does not start with expected prefix."); + + PathStorage.emplace_back(Path.str()); + Generator->insert(PathStorage.back(), Status); +} + +size_t +StatCacheFileSystem::StatCacheWriter::writeStatCache(raw_fd_ostream &Out) { + const uint32_t Version = STAT_CACHE_VERSION; + // Magic value. + if (IsCaseSensitive) + Out.write(MAGIC_CASE_SENSITIVE, 4); + else + Out.write(MAGIC_CASE_INSENSITIVE, 4); + // Placeholder for BucketOffset, filled in below. + Out.write("\0\0\0\0", 4); + // Write out the validity token. + Out.write((const char *)&ValidityToken, sizeof(ValidityToken)); + // Write out the version. + Out.write((const char *)&Version, sizeof(Version)); + // Write out the base directory for the cache. + Out.write(BaseDir.c_str(), BaseDir.size() + 1); + // Write out the hashtable data. + uint32_t BucketOffset = Generator->Emit(Out); + int Size = Out.tell(); + // Move back to right after the Magic to insert BucketOffset + Out.seek(4); + Out.write((const char *)&BucketOffset, sizeof(BucketOffset)); + return Size; +} + +Error StatCacheFileSystem::validateCacheFile(MemoryBufferRef Buffer, + StringRef &BaseDir, + bool &IsCaseSensitive, + bool &VersionMatch, + uint64_t &ValidityToken) { + auto *Header = + reinterpret_cast(Buffer.getBufferStart()); + if (Buffer.getBufferSize() < sizeof(StatCacheHeader) || + (memcmp(Header->Magic, MAGIC_CASE_INSENSITIVE, sizeof(Header->Magic)) && + memcmp(Header->Magic, MAGIC_CASE_SENSITIVE, sizeof(Header->Magic))) || + Header->BucketOffset > Buffer.getBufferSize()) + return createStringError(inconvertibleErrorCode(), "Invalid cache file"); + + auto PathLen = + strnlen(Header->BaseDir, + Buffer.getBufferSize() - offsetof(StatCacheHeader, BaseDir)); + if (Header->BaseDir[PathLen] != 0) + return createStringError(inconvertibleErrorCode(), "Invalid cache file"); + + IsCaseSensitive = Header->Magic[1] == MAGIC_CASE_SENSITIVE[1]; + VersionMatch = Header->Version == STAT_CACHE_VERSION; + BaseDir = StringRef(Header->BaseDir, PathLen); + ValidityToken = Header->ValidityToken; + + return ErrorSuccess(); +} + +void StatCacheFileSystem::updateValidityToken(raw_fd_ostream &CacheFile, + uint64_t ValidityToken) { + CacheFile.pwrite(reinterpret_cast(&ValidityToken), + sizeof(ValidityToken), + offsetof(StatCacheHeader, ValidityToken)); +} + +} // namespace vfs +} // namespace llvm diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp --- a/llvm/unittests/Support/VirtualFileSystemTest.cpp +++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp @@ -14,9 +14,11 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/StatCacheFileSystem.h" #include "llvm/Testing/Support/SupportHelpers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include #include #include @@ -3228,3 +3230,306 @@ " DummyFileSystem (RecursiveContents)\n", Output); } + +class StatCacheFileSystemTest : public ::testing::Test { +public: + void SetUp() override {} + + template + void createStatCacheFileSystem( + StringRef OutputFile, StringRef BaseDir, bool IsCaseSensitive, + IntrusiveRefCntPtr &Result, + StringCollection &Filenames, + IntrusiveRefCntPtr Lower = new ErrorDummyFileSystem(), + uint64_t ValidityToken = 0) { + sys::fs::file_status s; + status(BaseDir, s); + vfs::StatCacheFileSystem::StatCacheWriter Generator( + BaseDir, s, IsCaseSensitive, ValidityToken); + std::error_code ErrorCode; + + Result.reset(); + + // Base path should be present in the stat cache. + Filenames.push_back(std::string(BaseDir)); + + for (sys::fs::recursive_directory_iterator I(BaseDir, ErrorCode), E; + I != E && !ErrorCode; I.increment(ErrorCode)) { + Filenames.push_back(I->path()); + StringRef Path(Filenames.back().c_str()); + status(Path, s); + Generator.addEntry(Path, s); + } + + { + raw_fd_ostream StatCacheFile(OutputFile, ErrorCode); + ASSERT_FALSE(ErrorCode); + Generator.writeStatCache(StatCacheFile); + } + + loadCacheFile(OutputFile, ValidityToken, Lower, Result); + } + + void loadCacheFile(StringRef OutputFile, uint64_t ExpectedValidityToken, + IntrusiveRefCntPtr Lower, + IntrusiveRefCntPtr &Result) { + auto ErrorOrBuffer = MemoryBuffer::getFile(OutputFile); + EXPECT_TRUE(ErrorOrBuffer); + StringRef CacheBaseDir; + bool IsCaseSensitive; + bool VersionMatch; + uint64_t FileValidityToken; + auto E = vfs::StatCacheFileSystem::validateCacheFile( + (*ErrorOrBuffer)->getMemBufferRef(), CacheBaseDir, IsCaseSensitive, + VersionMatch, FileValidityToken); + ASSERT_FALSE(E); + EXPECT_TRUE(VersionMatch); + EXPECT_EQ(FileValidityToken, ExpectedValidityToken); + auto ExpectedCache = + vfs::StatCacheFileSystem::create(std::move(*ErrorOrBuffer), Lower); + ASSERT_FALSE(ExpectedCache.takeError()); + Result = *ExpectedCache; + } + + template + void + compareStatCacheToRealFS(IntrusiveRefCntPtr CacheFS, + const StringCollection &Files) { + IntrusiveRefCntPtr RealFS = vfs::getRealFileSystem(); + + for (auto &File : Files) { + auto ErrorOrStatus1 = RealFS->status(File); + auto ErrorOrStatus2 = CacheFS->status(File); + + EXPECT_EQ((bool)ErrorOrStatus1, (bool)ErrorOrStatus2); + if (!ErrorOrStatus1 || !ErrorOrStatus2) + continue; + + vfs::Status s1 = *ErrorOrStatus1, s2 = *ErrorOrStatus2; + EXPECT_EQ(s1.getName(), s2.getName()); + EXPECT_EQ(s1.getType(), s2.getType()); + EXPECT_EQ(s1.getPermissions(), s2.getPermissions()); + EXPECT_EQ(s1.getLastModificationTime(), s2.getLastModificationTime()); + EXPECT_EQ(s1.getUniqueID(), s2.getUniqueID()); + EXPECT_EQ(s1.getUser(), s2.getUser()); + EXPECT_EQ(s1.getGroup(), s2.getGroup()); + EXPECT_EQ(s1.getSize(), s2.getSize()); + } + } +}; + +TEST_F(StatCacheFileSystemTest, Basic) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempFile _ab(TestDirectory.path("a/b")); + TempDir _ac(TestDirectory.path("a/c")); + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); + TempFile _ace(TestDirectory.path("a/c/e")); + TempFile _acf(TestDirectory.path("a/c/f"), "", "More dummy contents"); + TempDir _ag(TestDirectory.path("a/g")); + TempFile _agh(TestDirectory.path("a/g/h")); + + StringRef BaseDir(_a.path()); + + SmallVector Filenames; + IntrusiveRefCntPtr StatCacheFS; + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, + Filenames); + ASSERT_TRUE(StatCacheFS); + compareStatCacheToRealFS(StatCacheFS, Filenames); +} + +TEST_F(StatCacheFileSystemTest, CaseSensitivity) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempDir _ac(TestDirectory.path("a/c")); + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); + TempDir _b(TestDirectory.path("B")); + TempDir _bc(TestDirectory.path("B/c")); + TempFile _bcd(TestDirectory.path("B/c/D"), "", "Dummy contents"); + + StringRef BaseDir(TestDirectory.path()); + SmallVector Filenames; + IntrusiveRefCntPtr StatCacheFS; + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, + Filenames); + ASSERT_TRUE(StatCacheFS); + + auto ErrorOrStatus = StatCacheFS->status(_acd.path()); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(_bcd.path()); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/C/d")); + EXPECT_FALSE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("A/C/d")); + EXPECT_FALSE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/c/D")); + EXPECT_FALSE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/c/d")); + EXPECT_FALSE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/C/d")); + EXPECT_FALSE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("B/C/D")); + EXPECT_FALSE(ErrorOrStatus); + + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ false, StatCacheFS, + Filenames); + ASSERT_TRUE(StatCacheFS); + ErrorOrStatus = StatCacheFS->status(_acd.path()); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(_bcd.path()); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/C/d")); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("A/C/d")); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/c/D")); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/c/d")); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/C/d")); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("B/C/D")); + EXPECT_TRUE(ErrorOrStatus); +} + +TEST_F(StatCacheFileSystemTest, DotDot) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempDir _ab(TestDirectory.path("a/b")); + TempFile _abd(TestDirectory.path("a/b/d")); + TempDir _ac(TestDirectory.path("a/c")); + TempFile _acd(TestDirectory.path("a/c/d")); + + StringRef BaseDir(_a.path()); + SmallVector Filenames; + IntrusiveRefCntPtr StatCacheFS; + auto RealFS = vfs::getRealFileSystem(); + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, Filenames, + RealFS); + ASSERT_TRUE(StatCacheFS); + + // Create a file in the cached prefix after the cache was created. + TempFile _abe(TestDirectory.path("a/b/e")); + // Verify the cache is kicking in. + ASSERT_FALSE(StatCacheFS->status(_abe.path())); + // We can access the new file using a ".." because the StatCache will + // just pass that request to the FileSystem below it. + const SmallString<128> PathsToTest[] = { + TestDirectory.path("a/b/../e"), + TestDirectory.path("a/b/../c/d"), + TestDirectory.path("a/b/.."), + }; + compareStatCacheToRealFS(StatCacheFS, PathsToTest); +} + +#ifdef LLVM_ON_UNIX +TEST_F(StatCacheFileSystemTest, Links) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempLink _ab("d", TestDirectory.path("a/b")); + TempFile _ac(TestDirectory.path("a/c")); + TempDir _ad(TestDirectory.path("a/d")); + TempFile _add(TestDirectory.path("a/d/d"), "", "Dummy contents"); + TempFile _ade(TestDirectory.path("a/d/e")); + TempFile _adf(TestDirectory.path("a/d/f"), "", "More dummy contents"); + TempLink _adg(_ad.path(), TestDirectory.path("a/d/g")); + TempDir _ah(TestDirectory.path("a/h")); + TempLink _ahi(_ad.path(), TestDirectory.path("a/h/i")); + TempLink _ahj("no_such_file", TestDirectory.path("a/h/j")); + + StringRef BaseDir(_a.path()); + + SmallVector Filenames; + IntrusiveRefCntPtr StatCacheFS; + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, + Filenames); + ASSERT_TRUE(StatCacheFS); + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), + TestDirectory.path("a/d/g/g")), + Filenames.end()); + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), + TestDirectory.path("a/b/e")), + Filenames.end()); + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), + TestDirectory.path("a/h/i/f")), + Filenames.end()); + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), + TestDirectory.path("a/h/j")), + Filenames.end()); + compareStatCacheToRealFS(StatCacheFS, Filenames); + + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, Filenames, + vfs::getRealFileSystem()); + const SmallString<128> PathsToTest[] = { + TestDirectory.path("a/h/i/../c"), + TestDirectory.path("a/b/../d"), + TestDirectory.path("a/g/g/../c"), + TestDirectory.path("a/b/.."), + }; + compareStatCacheToRealFS(StatCacheFS, PathsToTest); +} +#endif + +TEST_F(StatCacheFileSystemTest, Canonical) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempFile _ab(TestDirectory.path("a/b")); + TempDir _ac(TestDirectory.path("a/c")); + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); + + StringRef BaseDir(_a.path()); + SmallVector Filenames; + IntrusiveRefCntPtr StatCacheFS; + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, + Filenames); + ASSERT_TRUE(StatCacheFS); + + const SmallString<128> PathsToTest[] = { + TestDirectory.path("./a/b"), TestDirectory.path("a//./b"), + TestDirectory.path("a///b"), TestDirectory.path("a//c//d"), + TestDirectory.path("a//c/./d"), TestDirectory.path("a/./././b"), + TestDirectory.path("a/.//.//.//b"), + }; + compareStatCacheToRealFS(StatCacheFS, PathsToTest); +} + +TEST_F(StatCacheFileSystemTest, ValidityToken) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempFile _ab(TestDirectory.path("a/b")); + TempDir _ac(TestDirectory.path("a/c")); + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); + + StringRef BaseDir(_a.path()); + IntrusiveRefCntPtr StatCacheFS; + { + SmallVector Filenames; + uint64_t ValidityToken = 0x1234567890abcfef; + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, + Filenames, new DummyFileSystem(), ValidityToken); + ASSERT_TRUE(StatCacheFS); + } + + uint64_t UpdatedValidityToken = 0xabcdef0123456789; + { + std::error_code EC; + raw_fd_ostream CacheFile(TestDirectory.path("stat.cache"), EC, + sys::fs::CD_OpenAlways); + ASSERT_FALSE(EC); + vfs::StatCacheFileSystem::updateValidityToken(CacheFile, + UpdatedValidityToken); + } + + loadCacheFile(TestDirectory.path("stat.cache"), UpdatedValidityToken, + new DummyFileSystem(), StatCacheFS); + EXPECT_TRUE(StatCacheFS); +}