diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -402,6 +402,9 @@ /// Record code for \#pragma diagnostic mappings. DIAG_PRAGMA_MAPPINGS, + + /// Record code for the indices of used header search entries. + HEADER_SEARCH_ENTRY_USAGE, }; /// Record code for extension blocks. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2287,6 +2287,10 @@ /// Loads comments ranges. void ReadComments() override; + /// Visit the indices of used header search directories. + void visitHeaderSearchEntryUsage(const serialization::ModuleFile &MF, + llvm::function_ref Visitor); + /// Visit all the input files of the given module file. void visitInputFiles(serialization::ModuleFile &MF, bool IncludeSystem, bool Complain, diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -173,6 +173,12 @@ /// unique module files based on AST contents. ASTFileSignature ASTBlockHash; + /// The number of header search entries for which the usage is tracked. + unsigned HeaderSearchUsageSize = 0; + + /// The bit vector denoting usage of each header search entry. + const char *HeaderSearchUsage = nullptr; + /// Whether this module has been directly imported by the /// user. bool DirectlyImported = false; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -48,7 +48,8 @@ public: DependencyScanningService(ScanningMode Mode, ScanningOutputFormat Format, bool ReuseFileManager = true, - bool SkipExcludedPPRanges = true); + bool SkipExcludedPPRanges = true, + bool OptimizeArgs = false); ScanningMode getMode() const { return Mode; } @@ -58,6 +59,8 @@ bool canSkipExcludedPPRanges() const { return SkipExcludedPPRanges; } + bool canOptimizeArgs() const { return OptimizeArgs; } + DependencyScanningFilesystemSharedCache &getSharedCache() { return SharedCache; } @@ -70,6 +73,8 @@ /// ranges by bumping the buffer pointer in the lexer instead of lexing the /// tokens in the range until reaching the corresponding directive. const bool SkipExcludedPPRanges; + /// Whether to optimize the modules' command-line arguments. + const bool OptimizeArgs; /// The global file system cache. DependencyScanningFilesystemSharedCache SharedCache; }; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -77,6 +77,8 @@ /// worker. If null, the file manager will not be reused. llvm::IntrusiveRefCntPtr Files; ScanningOutputFormat Format; + /// Whether to optimize the modules' command-line arguments. + bool OptimizeArgs; }; } // end namespace dependencies diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -158,7 +158,8 @@ class ModuleDepCollector final : public DependencyCollector { public: ModuleDepCollector(std::unique_ptr Opts, - CompilerInstance &I, DependencyConsumer &C); + CompilerInstance &I, DependencyConsumer &C, + bool OptimizeArgs); void attachToPreprocessor(Preprocessor &PP) override; void attachToASTReader(ASTReader &R) override; @@ -170,6 +171,8 @@ CompilerInstance &Instance; /// The consumer of collected dependency information. DependencyConsumer &Consumer; + /// Whether to optimize the modules' command-line arguments. + bool OptimizeArgs; /// Path to the main source file. std::string MainFile; /// Hash identifying the compilation conditions of the current TU. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -4738,7 +4738,9 @@ // Read and process a record. Record.clear(); - Expected MaybeRecordType = Stream.readRecord(Entry.ID, Record); + StringRef Blob; + Expected MaybeRecordType = + Stream.readRecord(Entry.ID, Record, &Blob); if (!MaybeRecordType) { // FIXME this drops the error. return Failure; @@ -4770,6 +4772,12 @@ F->PragmaDiagMappings.insert(F->PragmaDiagMappings.end(), Record.begin(), Record.end()); break; + case HEADER_SEARCH_ENTRY_USAGE: + if (F) { + F->HeaderSearchUsageSize = Record[0]; + F->HeaderSearchUsage = Blob.data(); + } + break; } } } @@ -9114,6 +9122,18 @@ } } +void ASTReader::visitHeaderSearchEntryUsage( + const serialization::ModuleFile &MF, + llvm::function_ref Visitor) { + const char *byte = MF.HeaderSearchUsage; + for (unsigned i = 0, e = MF.HeaderSearchUsageSize; i < e;) { + for (unsigned b = 0; b < 8 && i < e; ++b, ++i) + if (*byte & (1 << b)) + Visitor(i); + ++byte; + } +} + void ASTReader::visitInputFiles(serialization::ModuleFile &MF, bool IncludeSystem, bool Complain, llvm::function_ref &v) { + std::string str; + str.reserve(v.size() / 8); + for (unsigned i = 0, e = v.size(); i < e;) { + char byte = 0; + for (unsigned b = 0; b < 8 && i < e; ++b, ++i) + byte |= v[i] << b; + str += byte; + } + return str; +} + //===----------------------------------------------------------------------===// // Type serialization //===----------------------------------------------------------------------===// @@ -1050,6 +1062,8 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context) { + using namespace llvm; + // Flush first to prepare the PCM hash (signature). Stream.FlushToWord(); auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3; @@ -1093,10 +1107,24 @@ // Note: we don't serialize the log or serialization file names, because they // are generally transient files and will almost always be overridden. Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record); + Record.clear(); // Write out the diagnostic/pragma mappings. WritePragmaDiagnosticMappings(Diags, /* isModule = */ WritingModule); + // Header search entry usage. + auto HSEntryUsage = PP.getHeaderSearchInfo().ComputeUserEntryUsage(); + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_ENTRY_USAGE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of bits. + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Bit vector. + unsigned HSUsageAbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + { + RecordData::value_type Record[] = {HEADER_SEARCH_ENTRY_USAGE, + HSEntryUsage.size()}; + Stream.EmitRecordWithBlob(HSUsageAbbrevCode, Record, bytes(HSEntryUsage)); + } + // Leave the options block. Stream.ExitBlock(); return Signature; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp @@ -14,6 +14,6 @@ DependencyScanningService::DependencyScanningService( ScanningMode Mode, ScanningOutputFormat Format, bool ReuseFileManager, - bool SkipExcludedPPRanges) + bool SkipExcludedPPRanges, bool OptimizeArgs) : Mode(Mode), Format(Format), ReuseFileManager(ReuseFileManager), - SkipExcludedPPRanges(SkipExcludedPPRanges) {} + SkipExcludedPPRanges(SkipExcludedPPRanges), OptimizeArgs(OptimizeArgs) {} diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -52,10 +52,10 @@ StringRef WorkingDirectory, DependencyConsumer &Consumer, llvm::IntrusiveRefCntPtr DepFS, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings, - ScanningOutputFormat Format) + ScanningOutputFormat Format, bool OptimizeArgs) : WorkingDirectory(WorkingDirectory), Consumer(Consumer), - DepFS(std::move(DepFS)), PPSkipMappings(PPSkipMappings), - Format(Format) {} + DepFS(std::move(DepFS)), PPSkipMappings(PPSkipMappings), Format(Format), + OptimizeArgs(OptimizeArgs) {} bool runInvocation(std::shared_ptr Invocation, FileManager *FileMgr, @@ -121,7 +121,7 @@ break; case ScanningOutputFormat::Full: Compiler.addDependencyCollector(std::make_shared( - std::move(Opts), Compiler, Consumer)); + std::move(Opts), Compiler, Consumer, OptimizeArgs)); break; } @@ -145,13 +145,14 @@ llvm::IntrusiveRefCntPtr DepFS; ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings; ScanningOutputFormat Format; + bool OptimizeArgs; }; } // end anonymous namespace DependencyScanningWorker::DependencyScanningWorker( DependencyScanningService &Service) - : Format(Service.getFormat()) { + : Format(Service.getFormat()), OptimizeArgs(Service.canOptimizeArgs()) { DiagOpts = new DiagnosticOptions(); PCHContainerOps = std::make_shared(); RealFS = llvm::vfs::createPhysicalFileSystem(); @@ -194,7 +195,7 @@ Tool.setPrintErrorMessage(false); Tool.setDiagnosticConsumer(&DC); DependencyScanningAction Action(WorkingDirectory, Consumer, DepFS, - PPSkipMappings.get(), Format); + PPSkipMappings.get(), Format, OptimizeArgs); return !Tool.run(&Action); }); } diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -18,9 +18,18 @@ using namespace tooling; using namespace dependencies; -static CompilerInvocation -makeInvocationForModuleBuildWithoutPaths(const ModuleDeps &Deps, - const CompilerInvocation &Invocation) { +static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, + ASTReader &Reader, + const serialization::ModuleFile &MF) { + std::vector Entries = Opts.UserEntries; + Opts.UserEntries.clear(); + Reader.visitHeaderSearchEntryUsage( + MF, [&](unsigned Idx) { Opts.UserEntries.push_back(Entries[Idx]); }); +} + +static CompilerInvocation makeInvocationForModuleBuildWithoutPaths( + const ModuleDeps &Deps, const CompilerInvocation &Invocation, + llvm::function_ref Optimize) { // Make a deep copy of the invocation. CompilerInvocation CI(Invocation); @@ -34,6 +43,8 @@ CI.getLangOpts()->ImplicitModules = false; + Optimize(CI); + return CI; } @@ -203,8 +214,12 @@ MD.FileDeps.insert(IF.getFile()->getName()); }); - MD.Invocation = - makeInvocationForModuleBuildWithoutPaths(MD, Instance.getInvocation()); + MD.Invocation = makeInvocationForModuleBuildWithoutPaths( + MD, Instance.getInvocation(), [&](CompilerInvocation &CI) { + if (MDC.OptimizeArgs) + optimizeHeaderSearchOpts(CI.getHeaderSearchOpts(), + *MDC.Instance.getASTReader(), *MF); + }); MD.ID.ContextHash = MD.Invocation.getModuleHash(); llvm::DenseSet AddedModules; @@ -236,8 +251,9 @@ ModuleDepCollector::ModuleDepCollector( std::unique_ptr Opts, CompilerInstance &I, - DependencyConsumer &C) - : Instance(I), Consumer(C), Opts(std::move(Opts)) {} + DependencyConsumer &C, bool OptimizeArgs) + : Instance(I), Consumer(C), OptimizeArgs(OptimizeArgs), + Opts(std::move(Opts)) {} void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) { PP.addPPCallbacks(std::make_unique(Instance, *this)); diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/a/a.h b/clang/test/ClangScanDeps/Inputs/header-search-pruning/a/a.h new file mode 100644 diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/b/b.h b/clang/test/ClangScanDeps/Inputs/header-search-pruning/b/b.h new file mode 100644 diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/begin/begin.h b/clang/test/ClangScanDeps/Inputs/header-search-pruning/begin/begin.h new file mode 100644 diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/cdb.json b/clang/test/ClangScanDeps/Inputs/header-search-pruning/cdb.json new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/header-search-pruning/cdb.json @@ -0,0 +1,7 @@ +[ + { + "directory": "DIR", + "command": "clang -E DIR/header-search-pruning.cpp -Ibegin -I1 -Ia -I3 -I4 -I5 -I6 -Ib -I8 -Iend DEFINES -fmodules -fcxx-modules -fmodules-cache-path=DIR/module-cache -fimplicit-modules -fmodule-map-file=DIR/module.modulemap", + "file": "DIR/header-search-pruning.cpp" + } +] diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/end/end.h b/clang/test/ClangScanDeps/Inputs/header-search-pruning/end/end.h new file mode 100644 diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/mod.h b/clang/test/ClangScanDeps/Inputs/header-search-pruning/mod.h new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/header-search-pruning/mod.h @@ -0,0 +1,11 @@ +#include "begin.h" + +#ifdef INCLUDE_A +#include "a.h" +#endif + +#ifdef INCLUDE_B +#include "b.h" +#endif + +#include "end.h" diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/module.modulemap b/clang/test/ClangScanDeps/Inputs/header-search-pruning/module.modulemap new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/header-search-pruning/module.modulemap @@ -0,0 +1,4 @@ +module mod { + header "mod.h" + export * +} diff --git a/clang/test/ClangScanDeps/header-search-pruning.cpp b/clang/test/ClangScanDeps/header-search-pruning.cpp new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/header-search-pruning.cpp @@ -0,0 +1,85 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: cp -r %S/Inputs/header-search-pruning/* %t +// RUN: cp %S/header-search-pruning.cpp %t/header-search-pruning.cpp +// RUN: sed -e "s|DIR|%/t|g" -e "s|DEFINES|-DINCLUDE_A|g" %S/Inputs/header-search-pruning/cdb.json > %t/cdb_a.json +// RUN: sed -e "s|DIR|%/t|g" -e "s|DEFINES|-DINCLUDE_B|g" %S/Inputs/header-search-pruning/cdb.json > %t/cdb_b.json +// RUN: sed -e "s|DIR|%/t|g" -e "s|DEFINES|-DINCLUDE_A -DINCLUDE_B|g" %S/Inputs/header-search-pruning/cdb.json > %t/cdb_ab.json +// +// RUN: clang-scan-deps -compilation-database %t/cdb_a.json -format experimental-full -optimize-args >> %t/result_a.json +// RUN: cat %t/result_a.json | sed 's/\\/\//g' | FileCheck --check-prefixes=CHECK_A %s +// +// RUN: clang-scan-deps -compilation-database %t/cdb_b.json -format experimental-full -optimize-args >> %t/result_b.json +// RUN: cat %t/result_b.json | sed 's/\\/\//g' | FileCheck --check-prefixes=CHECK_B %s +// +// RUN: clang-scan-deps -compilation-database %t/cdb_ab.json -format experimental-full -optimize-args >> %t/result_ab.json +// RUN: cat %t/result_ab.json | sed 's/\\/\//g' | FileCheck --check-prefixes=CHECK_AB %s + +#include "mod.h" + +// CHECK_A: { +// CHECK_A-NEXT: "modules": [ +// CHECK_A-NEXT: { +// CHECK_A-NEXT: "clang-module-deps": [], +// CHECK_A-NEXT: "clang-modulemap-file": "{{.*}}", +// CHECK_A-NEXT: "command-line": [ +// CHECK_A-NEXT: "-cc1" +// CHECK_A: "-I", +// CHECK_A-NEXT: "begin", +// CHECK_A-NEXT: "-I", +// CHECK_A-NEXT: "a", +// CHECK_A-NEXT: "-I", +// CHECK_A-NEXT: "end" +// CHECK_A: ], +// CHECK_A-NEXT: "context-hash": "{{.*}}", +// CHECK_A-NEXT: "file-deps": [ +// CHECK_A: ], +// CHECK_A-NEXT: "name": "mod" +// CHECK_A-NEXT: } +// CHECK_A-NEXT: ] +// CHECK_A: } + +// CHECK_B: { +// CHECK_B-NEXT: "modules": [ +// CHECK_B-NEXT: { +// CHECK_B-NEXT: "clang-module-deps": [], +// CHECK_B-NEXT: "clang-modulemap-file": "{{.*}}", +// CHECK_B-NEXT: "command-line": [ +// CHECK_B-NEXT: "-cc1" +// CHECK_B: "-I", +// CHECK_B-NEXT: "begin", +// CHECK_B-NEXT: "-I", +// CHECK_B-NEXT: "b", +// CHECK_B-NEXT: "-I", +// CHECK_B-NEXT: "end" +// CHECK_B: ], +// CHECK_B-NEXT: "context-hash": "{{.*}}", +// CHECK_B-NEXT: "file-deps": [ +// CHECK_B: ], +// CHECK_B-NEXT: "name": "mod" +// CHECK_B-NEXT: } +// CHECK_B-NEXT: ] +// CHECK_B: } + +// CHECK_AB: { +// CHECK_AB-NEXT: "modules": [ +// CHECK_AB-NEXT: { +// CHECK_AB-NEXT: "clang-module-deps": [], +// CHECK_AB-NEXT: "clang-modulemap-file": "{{.*}}", +// CHECK_AB-NEXT: "command-line": [ +// CHECK_AB-NEXT: "-cc1" +// CHECK_AB: "-I", +// CHECK_AB-NEXT: "begin", +// CHECK_AB-NEXT: "-I", +// CHECK_AB-NEXT: "a", +// CHECK_AB-NEXT: "-I", +// CHECK_AB-NEXT: "b", +// CHECK_AB-NEXT: "-I", +// CHECK_AB-NEXT: "end" +// CHECK_AB: ], +// CHECK_AB-NEXT: "context-hash": "{{.*}}", +// CHECK_AB-NEXT: "file-deps": [ +// CHECK_AB: ], +// CHECK_AB-NEXT: "name": "mod" +// CHECK_AB-NEXT: } +// CHECK_AB-NEXT: ] +// CHECK_AB: } diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -163,6 +163,11 @@ "'-fmodule-file=', '-o', '-fmodule-map-file='."), llvm::cl::init(false), llvm::cl::cat(DependencyScannerCategory)); +static llvm::cl::opt OptimizeArgs( + "optimize-args", + llvm::cl::desc("Whether to optimize command-line arguments of modules."), + llvm::cl::init(false), llvm::cl::cat(DependencyScannerCategory)); + llvm::cl::opt NumThreads("j", llvm::cl::Optional, llvm::cl::desc("Number of worker threads to use (default: use " @@ -357,7 +362,26 @@ private: StringRef lookupPCMPath(ModuleID MID) { - return Modules[IndexedModuleID{MID, 0}].ImplicitModulePCMPath; + auto PCMPath = PCMPaths.insert({IndexedModuleID{MID, 0}, ""}); + if (PCMPath.second) + PCMPath.first->second = constructPCMPath(lookupModuleDeps(MID)); + return PCMPath.first->second; + } + + /// Construct a path where to put the explicitly built PCM - essentially the + /// path to implicitly built PCM with the context hash replaced by the final + /// (potentially modified) context hash. + std::string constructPCMPath(const ModuleDeps &MD) const { + const std::string &ImplicitPCMPath = MD.ImplicitModulePCMPath; + StringRef Filename = llvm::sys::path::filename(ImplicitPCMPath); + StringRef ImplicitContextHashPath = + llvm::sys::path::parent_path(ImplicitPCMPath); + StringRef ModuleCachePath = + llvm::sys::path::parent_path(ImplicitContextHashPath); + + SmallString<64> ExplicitPCMPath = ModuleCachePath; + llvm::sys::path::append(ExplicitPCMPath, MD.ID.ContextHash, Filename); + return std::string(ExplicitPCMPath); } const ModuleDeps &lookupModuleDeps(ModuleID MID) { @@ -395,6 +419,8 @@ std::mutex Lock; std::unordered_map Modules; + std::unordered_map + PCMPaths; std::vector Inputs; }; @@ -554,7 +580,7 @@ SharedStream DependencyOS(llvm::outs()); DependencyScanningService Service(ScanMode, Format, ReuseFileManager, - SkipExcludedPPRanges); + SkipExcludedPPRanges, OptimizeArgs); llvm::ThreadPool Pool(llvm::hardware_concurrency(NumThreads)); std::vector> WorkerTools; for (unsigned I = 0; I < Pool.getThreadCount(); ++I)