diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -402,6 +402,9 @@ /// Record code for \#pragma diagnostic mappings. DIAG_PRAGMA_MAPPINGS, + + /// Record code for the indices of used header search entries. + HEADER_SEARCH_ENTRY_USAGE, }; /// Record code for extension blocks. diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -20,6 +20,7 @@ #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ContinuousRangeMap.h" #include "clang/Serialization/ModuleFileExtension.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SetVector.h" @@ -173,6 +174,9 @@ /// unique module files based on AST contents. ASTFileSignature ASTBlockHash; + /// The bit vector denoting usage of each header search entry (true = used). + llvm::BitVector SearchPathUsage; + /// Whether this module has been directly imported by the /// user. bool DirectlyImported = false; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -48,7 +48,8 @@ public: DependencyScanningService(ScanningMode Mode, ScanningOutputFormat Format, bool ReuseFileManager = true, - bool SkipExcludedPPRanges = true); + bool SkipExcludedPPRanges = true, + bool OptimizeArgs = false); ScanningMode getMode() const { return Mode; } @@ -58,6 +59,8 @@ bool canSkipExcludedPPRanges() const { return SkipExcludedPPRanges; } + bool canOptimizeArgs() const { return OptimizeArgs; } + DependencyScanningFilesystemSharedCache &getSharedCache() { return SharedCache; } @@ -70,6 +73,8 @@ /// ranges by bumping the buffer pointer in the lexer instead of lexing the /// tokens in the range until reaching the corresponding directive. const bool SkipExcludedPPRanges; + /// Whether to optimize the modules' command-line arguments. + const bool OptimizeArgs; /// The global file system cache. DependencyScanningFilesystemSharedCache SharedCache; }; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -83,6 +83,8 @@ /// worker. If null, the file manager will not be reused. llvm::IntrusiveRefCntPtr Files; ScanningOutputFormat Format; + /// Whether to optimize the modules' command-line arguments. + bool OptimizeArgs; }; } // end namespace dependencies diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -194,7 +194,7 @@ public: ModuleDepCollector(std::unique_ptr Opts, CompilerInstance &I, DependencyConsumer &C, - CompilerInvocation &&OriginalCI); + CompilerInvocation &&OriginalCI, bool OptimizeArgs); void attachToPreprocessor(Preprocessor &PP) override; void attachToASTReader(ASTReader &R) override; @@ -219,6 +219,8 @@ std::unique_ptr Opts; /// The original Clang invocation passed to dependency scanner. CompilerInvocation OriginalInvocation; + /// Whether to optimize the modules' command-line arguments. + bool OptimizeArgs; /// Checks whether the module is known as being prebuilt. bool isPrebuiltModule(const Module *M); @@ -226,8 +228,9 @@ /// Constructs a CompilerInvocation that can be used to build the given /// module, excluding paths to discovered modular dependencies that are yet to /// be built. - CompilerInvocation - makeInvocationForModuleBuildWithoutPaths(const ModuleDeps &Deps) const; + CompilerInvocation makeInvocationForModuleBuildWithoutPaths( + const ModuleDeps &Deps, + llvm::function_ref Optimize) const; }; } // end namespace dependencies diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -4726,7 +4726,9 @@ // Read and process a record. Record.clear(); - Expected MaybeRecordType = Stream.readRecord(Entry.ID, Record); + StringRef Blob; + Expected MaybeRecordType = + Stream.readRecord(Entry.ID, Record, &Blob); if (!MaybeRecordType) { // FIXME this drops the error. return Failure; @@ -4758,6 +4760,17 @@ F->PragmaDiagMappings.insert(F->PragmaDiagMappings.end(), Record.begin(), Record.end()); break; + case HEADER_SEARCH_ENTRY_USAGE: + if (!F) + break; + unsigned Count = Record[0]; + const char *Byte = Blob.data(); + F->SearchPathUsage = llvm::BitVector(Count, 0); + for (unsigned I = 0; I < Count; ++Byte) + for (unsigned Bit = 0; Bit < 8 && I < Count; ++Bit, ++I) + if (*Byte & (1 << Bit)) + F->SearchPathUsage[I] = 1; + break; } } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -132,6 +132,18 @@ sizeof(T) * v.size()); } +static std::string bytes(const std::vector &V) { + std::string Str; + Str.reserve(V.size() / 8); + for (unsigned I = 0, E = V.size(); I < E;) { + char Byte = 0; + for (unsigned Bit = 0; Bit < 8 && I < E; ++Bit, ++I) + Byte |= V[I] << Bit; + Str += Byte; + } + return Str; +} + //===----------------------------------------------------------------------===// // Type serialization //===----------------------------------------------------------------------===// @@ -1050,6 +1062,8 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context) { + using namespace llvm; + // Flush first to prepare the PCM hash (signature). Stream.FlushToWord(); auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3; @@ -1093,10 +1107,24 @@ // Note: we don't serialize the log or serialization file names, because they // are generally transient files and will almost always be overridden. Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record); + Record.clear(); // Write out the diagnostic/pragma mappings. WritePragmaDiagnosticMappings(Diags, /* isModule = */ WritingModule); + // Header search entry usage. + auto HSEntryUsage = PP.getHeaderSearchInfo().computeUserEntryUsage(); + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_ENTRY_USAGE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of bits. + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Bit vector. + unsigned HSUsageAbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + { + RecordData::value_type Record[] = {HEADER_SEARCH_ENTRY_USAGE, + HSEntryUsage.size()}; + Stream.EmitRecordWithBlob(HSUsageAbbrevCode, Record, bytes(HSEntryUsage)); + } + // Leave the options block. Stream.ExitBlock(); return Signature; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp @@ -15,9 +15,9 @@ DependencyScanningService::DependencyScanningService( ScanningMode Mode, ScanningOutputFormat Format, bool ReuseFileManager, - bool SkipExcludedPPRanges) + bool SkipExcludedPPRanges, bool OptimizeArgs) : Mode(Mode), Format(Format), ReuseFileManager(ReuseFileManager), - SkipExcludedPPRanges(SkipExcludedPPRanges) { + SkipExcludedPPRanges(SkipExcludedPPRanges), OptimizeArgs(OptimizeArgs) { // Initialize targets for object file support. llvm::InitializeAllTargets(); llvm::InitializeAllTargetMCs(); diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -151,10 +151,11 @@ StringRef WorkingDirectory, DependencyConsumer &Consumer, llvm::IntrusiveRefCntPtr DepFS, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings, - ScanningOutputFormat Format, llvm::Optional ModuleName = None) + ScanningOutputFormat Format, bool OptimizeArgs, + llvm::Optional ModuleName = None) : WorkingDirectory(WorkingDirectory), Consumer(Consumer), DepFS(std::move(DepFS)), PPSkipMappings(PPSkipMappings), Format(Format), - ModuleName(ModuleName) {} + OptimizeArgs(OptimizeArgs), ModuleName(ModuleName) {} bool runInvocation(std::shared_ptr Invocation, FileManager *FileMgr, @@ -243,15 +244,16 @@ break; case ScanningOutputFormat::Full: Compiler.addDependencyCollector(std::make_shared( - std::move(Opts), Compiler, Consumer, std::move(OriginalInvocation))); + std::move(Opts), Compiler, Consumer, std::move(OriginalInvocation), + OptimizeArgs)); break; } // Consider different header search and diagnostic options to create // different modules. This avoids the unsound aliasing of module PCMs. // - // TODO: Implement diagnostic bucketing and header search pruning to reduce - // the impact of strict context hashing. + // TODO: Implement diagnostic bucketing to reduce the impact of strict + // context hashing. Compiler.getHeaderSearchOpts().ModulesStrictContextHash = true; std::unique_ptr Action; @@ -273,6 +275,7 @@ llvm::IntrusiveRefCntPtr DepFS; ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings; ScanningOutputFormat Format; + bool OptimizeArgs; llvm::Optional ModuleName; }; @@ -280,7 +283,7 @@ DependencyScanningWorker::DependencyScanningWorker( DependencyScanningService &Service) - : Format(Service.getFormat()) { + : Format(Service.getFormat()), OptimizeArgs(Service.canOptimizeArgs()) { PCHContainerOps = std::make_shared(); PCHContainerOps->registerReader( std::make_unique()); @@ -352,7 +355,8 @@ [&](DiagnosticConsumer &DC, DiagnosticOptions &DiagOpts) { DependencyScanningAction Action( WorkingDirectory, Consumer, DepFS, - PPSkipMappings.get(), Format, ModuleName); + PPSkipMappings.get(), Format, OptimizeArgs, + ModuleName); // Create an invocation that uses the underlying file // system to ensure that any file system requests that // are made by the driver do not go through the diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -17,8 +17,20 @@ using namespace tooling; using namespace dependencies; +static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, + ASTReader &Reader, + const serialization::ModuleFile &MF) { + // Only preserve search paths that were used during the dependency scan. + std::vector Entries = Opts.UserEntries; + Opts.UserEntries.clear(); + for (unsigned I = 0; I < Entries.size(); ++I) + if (MF.SearchPathUsage[I]) + Opts.UserEntries.push_back(Entries[I]); +} + CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths( - const ModuleDeps &Deps) const { + const ModuleDeps &Deps, + llvm::function_ref Optimize) const { // Make a deep copy of the original Clang invocation. CompilerInvocation CI(OriginalInvocation); @@ -41,6 +53,8 @@ CI.getFrontendOpts().ModuleMapFiles.push_back(PrebuiltModule.ModuleMapFile); } + Optimize(CI); + return CI; } @@ -235,7 +249,12 @@ llvm::DenseSet SeenModules; addAllSubmodulePrebuiltDeps(M, MD, SeenModules); - MD.Invocation = MDC.makeInvocationForModuleBuildWithoutPaths(MD); + MD.Invocation = MDC.makeInvocationForModuleBuildWithoutPaths( + MD, [&](CompilerInvocation &CI) { + if (MDC.OptimizeArgs) + optimizeHeaderSearchOpts(CI.getHeaderSearchOpts(), + *MDC.Instance.getASTReader(), *MF); + }); MD.ID.ContextHash = MD.Invocation.getModuleHash(); llvm::DenseSet AddedModules; @@ -287,9 +306,9 @@ ModuleDepCollector::ModuleDepCollector( std::unique_ptr Opts, CompilerInstance &I, - DependencyConsumer &C, CompilerInvocation &&OriginalCI) + DependencyConsumer &C, CompilerInvocation &&OriginalCI, bool OptimizeArgs) : Instance(I), Consumer(C), Opts(std::move(Opts)), - OriginalInvocation(std::move(OriginalCI)) {} + OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {} void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) { PP.addPPCallbacks(std::make_unique(Instance, *this)); diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/a/a.h b/clang/test/ClangScanDeps/Inputs/header-search-pruning/a/a.h new file mode 100644 diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/b/b.h b/clang/test/ClangScanDeps/Inputs/header-search-pruning/b/b.h new file mode 100644 diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/begin/begin.h b/clang/test/ClangScanDeps/Inputs/header-search-pruning/begin/begin.h new file mode 100644 diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/cdb.json b/clang/test/ClangScanDeps/Inputs/header-search-pruning/cdb.json new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/header-search-pruning/cdb.json @@ -0,0 +1,7 @@ +[ + { + "directory": "DIR", + "command": "clang -E DIR/header-search-pruning.cpp -Ibegin -I1 -Ia -I3 -I4 -I5 -I6 -Ib -I8 -Iend DEFINES -fmodules -fcxx-modules -fmodules-cache-path=DIR/module-cache -fimplicit-modules -fmodule-map-file=DIR/module.modulemap", + "file": "DIR/header-search-pruning.cpp" + } +] diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/end/end.h b/clang/test/ClangScanDeps/Inputs/header-search-pruning/end/end.h new file mode 100644 diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/mod.h b/clang/test/ClangScanDeps/Inputs/header-search-pruning/mod.h new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/header-search-pruning/mod.h @@ -0,0 +1,11 @@ +#include "begin.h" + +#ifdef INCLUDE_A +#include "a.h" +#endif + +#ifdef INCLUDE_B +#include "b.h" +#endif + +#include "end.h" diff --git a/clang/test/ClangScanDeps/Inputs/header-search-pruning/module.modulemap b/clang/test/ClangScanDeps/Inputs/header-search-pruning/module.modulemap new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/header-search-pruning/module.modulemap @@ -0,0 +1,4 @@ +module mod { + header "mod.h" + export * +} diff --git a/clang/test/ClangScanDeps/header-search-pruning.cpp b/clang/test/ClangScanDeps/header-search-pruning.cpp new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/header-search-pruning.cpp @@ -0,0 +1,85 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: cp -r %S/Inputs/header-search-pruning/* %t +// RUN: cp %S/header-search-pruning.cpp %t/header-search-pruning.cpp +// RUN: sed -e "s|DIR|%/t|g" -e "s|DEFINES|-DINCLUDE_A|g" %S/Inputs/header-search-pruning/cdb.json > %t/cdb_a.json +// RUN: sed -e "s|DIR|%/t|g" -e "s|DEFINES|-DINCLUDE_B|g" %S/Inputs/header-search-pruning/cdb.json > %t/cdb_b.json +// RUN: sed -e "s|DIR|%/t|g" -e "s|DEFINES|-DINCLUDE_A -DINCLUDE_B|g" %S/Inputs/header-search-pruning/cdb.json > %t/cdb_ab.json +// +// RUN: clang-scan-deps -compilation-database %t/cdb_a.json -format experimental-full -optimize-args >> %t/result_a.json +// RUN: cat %t/result_a.json | sed 's/\\/\//g' | FileCheck --check-prefixes=CHECK_A %s +// +// RUN: clang-scan-deps -compilation-database %t/cdb_b.json -format experimental-full -optimize-args >> %t/result_b.json +// RUN: cat %t/result_b.json | sed 's/\\/\//g' | FileCheck --check-prefixes=CHECK_B %s +// +// RUN: clang-scan-deps -compilation-database %t/cdb_ab.json -format experimental-full -optimize-args >> %t/result_ab.json +// RUN: cat %t/result_ab.json | sed 's/\\/\//g' | FileCheck --check-prefixes=CHECK_AB %s + +#include "mod.h" + +// CHECK_A: { +// CHECK_A-NEXT: "modules": [ +// CHECK_A-NEXT: { +// CHECK_A-NEXT: "clang-module-deps": [], +// CHECK_A-NEXT: "clang-modulemap-file": "{{.*}}", +// CHECK_A-NEXT: "command-line": [ +// CHECK_A-NEXT: "-cc1" +// CHECK_A: "-I", +// CHECK_A-NEXT: "begin", +// CHECK_A-NEXT: "-I", +// CHECK_A-NEXT: "a", +// CHECK_A-NEXT: "-I", +// CHECK_A-NEXT: "end" +// CHECK_A: ], +// CHECK_A-NEXT: "context-hash": "{{.*}}", +// CHECK_A-NEXT: "file-deps": [ +// CHECK_A: ], +// CHECK_A-NEXT: "name": "mod" +// CHECK_A-NEXT: } +// CHECK_A-NEXT: ] +// CHECK_A: } + +// CHECK_B: { +// CHECK_B-NEXT: "modules": [ +// CHECK_B-NEXT: { +// CHECK_B-NEXT: "clang-module-deps": [], +// CHECK_B-NEXT: "clang-modulemap-file": "{{.*}}", +// CHECK_B-NEXT: "command-line": [ +// CHECK_B-NEXT: "-cc1" +// CHECK_B: "-I", +// CHECK_B-NEXT: "begin", +// CHECK_B-NEXT: "-I", +// CHECK_B-NEXT: "b", +// CHECK_B-NEXT: "-I", +// CHECK_B-NEXT: "end" +// CHECK_B: ], +// CHECK_B-NEXT: "context-hash": "{{.*}}", +// CHECK_B-NEXT: "file-deps": [ +// CHECK_B: ], +// CHECK_B-NEXT: "name": "mod" +// CHECK_B-NEXT: } +// CHECK_B-NEXT: ] +// CHECK_B: } + +// CHECK_AB: { +// CHECK_AB-NEXT: "modules": [ +// CHECK_AB-NEXT: { +// CHECK_AB-NEXT: "clang-module-deps": [], +// CHECK_AB-NEXT: "clang-modulemap-file": "{{.*}}", +// CHECK_AB-NEXT: "command-line": [ +// CHECK_AB-NEXT: "-cc1" +// CHECK_AB: "-I", +// CHECK_AB-NEXT: "begin", +// CHECK_AB-NEXT: "-I", +// CHECK_AB-NEXT: "a", +// CHECK_AB-NEXT: "-I", +// CHECK_AB-NEXT: "b", +// CHECK_AB-NEXT: "-I", +// CHECK_AB-NEXT: "end" +// CHECK_AB: ], +// CHECK_AB-NEXT: "context-hash": "{{.*}}", +// CHECK_AB-NEXT: "file-deps": [ +// CHECK_AB: ], +// CHECK_AB-NEXT: "name": "mod" +// CHECK_AB-NEXT: } +// CHECK_AB-NEXT: ] +// CHECK_AB: } diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -170,6 +170,11 @@ "specified directory instead the module cache directory."), llvm::cl::cat(DependencyScannerCategory)); +static llvm::cl::opt OptimizeArgs( + "optimize-args", + llvm::cl::desc("Whether to optimize command-line arguments of modules."), + llvm::cl::init(false), llvm::cl::cat(DependencyScannerCategory)); + llvm::cl::opt NumThreads("j", llvm::cl::Optional, llvm::cl::desc("Number of worker threads to use (default: use " @@ -507,7 +512,7 @@ SharedStream DependencyOS(llvm::outs()); DependencyScanningService Service(ScanMode, Format, ReuseFileManager, - SkipExcludedPPRanges); + SkipExcludedPPRanges, OptimizeArgs); llvm::ThreadPool Pool(llvm::hardware_concurrency(NumThreads)); std::vector> WorkerTools; for (unsigned I = 0; I < Pool.getThreadCount(); ++I)