diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -122,6 +122,32 @@ return It.first->getValue(); } +/// Whitelist file extensions that should be minimized, treating no extension as +/// a source file that should be minimized. +/// +/// This is kinda hacky, it would be better if we knew what kind of file Clang +/// was expecting instead. +static bool shouldMinimize(StringRef Filename) { + StringRef Ext = llvm::sys::path::extension(Filename); + if (Ext.empty()) + return true; // C++ standard library + return llvm::StringSwitch(Ext) + .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true) + .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true) + .CasesLower(".m", ".mm", true) + .CasesLower(".i", ".ii", ".mi", ".mmi", true) + .CasesLower(".def", ".inc", true) + .Default(false); +} + + +static bool shouldCacheStatFailures(StringRef Filename) { + StringRef Ext = llvm::sys::path::extension(Filename); + if (Ext.empty()) + return false; // This may be the module cache directory. + return shouldMinimize(Filename); // Only cache stat failures on source files. +} + llvm::ErrorOr DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( const StringRef Filename) { @@ -132,7 +158,8 @@ // FIXME: Handle PCM/PCH files. // FIXME: Handle module map files. - bool KeepOriginalSource = IgnoredFiles.count(Filename); + bool KeepOriginalSource = IgnoredFiles.count(Filename) || + !shouldMinimize(Filename); DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &SharedCacheEntry = SharedCache.get(Filename); const CachedFileSystemEntry *Result; @@ -143,9 +170,16 @@ if (!CacheEntry.isValid()) { llvm::vfs::FileSystem &FS = getUnderlyingFS(); auto MaybeStatus = FS.status(Filename); - if (!MaybeStatus) - CacheEntry = CachedFileSystemEntry(MaybeStatus.getError()); - else if (MaybeStatus->isDirectory()) + if (!MaybeStatus) { + if (!shouldCacheStatFailures(Filename)) + // HACK: We need to always restat non source files if the stat fails. + // This is because Clang first looks up the module cache and module + // files before building them, and then looks for them again. If we + // cache the stat failure, it won't see them the second time. + return MaybeStatus.getError(); + else + CacheEntry = CachedFileSystemEntry(MaybeStatus.getError()); + } else if (MaybeStatus->isDirectory()) CacheEntry = CachedFileSystemEntry::createDirectoryEntry( std::move(*MaybeStatus)); else diff --git a/clang/test/ClangScanDeps/Inputs/module.modulemap b/clang/test/ClangScanDeps/Inputs/module.modulemap new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/module.modulemap @@ -0,0 +1,7 @@ +module header1 { + header "header.h" +} + +module header2 { + header "header2.h" +} diff --git a/clang/test/ClangScanDeps/Inputs/modules_cdb.json b/clang/test/ClangScanDeps/Inputs/modules_cdb.json new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/modules_cdb.json @@ -0,0 +1,13 @@ +[ +{ + "directory": "DIR", + "command": "clang -E -fsyntax-only DIR/modules_cdb_input2.cpp -IInputs -D INCLUDE_HEADER2 -MD -MF DIR/modules_cdb2.d -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-modules -fimplicit-module-maps", + "file": "DIR/modules_cdb_input2.cpp" +}, +{ + "directory": "DIR", + "command": "clang -E DIR/modules_cdb_input.cpp -IInputs -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-modules -fimplicit-module-maps", + "file": "DIR/modules_cdb_input.cpp" +} +] + diff --git a/clang/test/ClangScanDeps/modules.cpp b/clang/test/ClangScanDeps/modules.cpp new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/modules.cpp @@ -0,0 +1,41 @@ +// RUN: rm -rf %t.dir +// RUN: rm -rf %t.cdb +// RUN: rm -rf %t.module-cache +// RUN: mkdir -p %t.dir +// RUN: cp %s %t.dir/modules_cdb_input.cpp +// RUN: cp %s %t.dir/modules_cdb_input2.cpp +// RUN: mkdir %t.dir/Inputs +// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h +// RUN: cp %S/Inputs/header2.h %t.dir/Inputs/header2.h +// RUN: cp %S/Inputs/module.modulemap %t.dir/Inputs/module.modulemap +// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb.json > %t.cdb +// +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s +// +// The output order is non-deterministic when using more than one thread, +// so check the output using two runs. Note that the 'NOT' check is not used +// as it might fail if the results for `modules_cdb_input.cpp` are reported before +// `modules_cdb_input2.cpp`. +// +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: FileCheck --check-prefix=CHECK1 %s +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \ +// RUN: FileCheck --check-prefix=CHECK1 %s +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: FileCheck --check-prefix=CHECK2 %s +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \ +// RUN: FileCheck --check-prefix=CHECK2 %s + +#include "header.h" + +// CHECK1: modules_cdb_input2.cpp +// CHECK1-NEXT: modules_cdb_input2.cpp +// CHECK1-NEXT: Inputs{{/|\\}}module.modulemap +// CHECK1-NEXT: Inputs{{/|\\}}header2.h +// CHECK1: Inputs{{/|\\}}header.h + +// CHECK2: modules_cdb_input.cpp +// CHECK2-NEXT: Inputs{{/|\\}}module.modulemap +// CHECK2-NEXT: Inputs{{/|\\}}header.h +// CHECK2NO-NOT: header2