diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -183,6 +183,8 @@ /// identifying the conditions under which the module was built. std::string getModuleHash() const; + std::string getModuleHash(bool UseStrictContextHash) const; + /// @} /// @name Option Subgroups /// @{ diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -11,6 +11,7 @@ #define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_MODULE_DEP_COLLECTOR_H #include "clang/Basic/LLVM.h" +#include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearch.h" @@ -49,6 +50,16 @@ /// treated as separate modules for the purpose of a build. std::string ContextHash; + /// The relaxed context hash of a module represents the \c ContextHash that + /// would have been used with strict context hashing disabled. This is only + /// used kept track off in order to compute module aliasing efficency of + /// strict context hashing w.r.t relaxed context hashing. + std::string RelaxedContextHash; + + // This is the hash of the AST represented by the module. This is only used to + // keep track of module aliasing efficiency. + ASTFileSignature ModuleSignature; + /// The path to the modulemap file which defines this module. /// /// This can be used to explicitly build this module. This file will @@ -153,6 +164,7 @@ DependencyConsumer &Consumer; std::string MainFile; std::string ContextHash; + std::string RelaxedContextHash; std::vector MainDeps; std::unordered_map Deps; std::unique_ptr Opts; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3715,6 +3715,10 @@ } std::string CompilerInvocation::getModuleHash() const { + return getModuleHash(getHeaderSearchOpts().ModulesStrictContextHash); +} + +std::string CompilerInvocation::getModuleHash(bool UseStrictContextHash) const { // Note: For QoI reasons, the things we use as a hash here should all be // dumped via the -module-info flag. using llvm::hash_code; @@ -3780,7 +3784,7 @@ hsOpts.ModulesValidateDiagnosticOptions); code = hash_combine(code, hsOpts.ResourceDir); - if (hsOpts.ModulesStrictContextHash) { + if (UseStrictContextHash) { hash_code SHPC = hash_combine_range(hsOpts.SystemHeaderPrefixes.begin(), hsOpts.SystemHeaderPrefixes.end()); hash_code UEC = hash_combine_range(hsOpts.UserEntries.begin(), diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -64,11 +64,13 @@ FileID PrevFID) { if (Reason != PPCallbacks::EnterFile) return; - + // This has to be delayed as the context hash can change at the start of // `CompilerInstance::ExecuteAction`. if (MDC.ContextHash.empty()) { MDC.ContextHash = Instance.getInvocation().getModuleHash(); + MDC.RelaxedContextHash = Instance.getInvocation().getModuleHash( + /* UseStrictContextHash */ false); MDC.Consumer.handleContextHash(MDC.ContextHash); } @@ -152,8 +154,10 @@ MD.ModuleName = M->getFullModuleName(); MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName()); MD.ContextHash = MDC.ContextHash; + MD.RelaxedContextHash = MDC.RelaxedContextHash; serialization::ModuleFile *MF = MDC.Instance.getASTReader()->getModuleManager().lookup(M->getASTFile()); + MD.ModuleSignature = MF->ASTSignature; MDC.Instance.getASTReader()->visitInputFiles( *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) { MD.FileDeps.insert(IF.getFile()->getName()); diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -6,12 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "clang/Basic/Module.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" #include "clang/Tooling/JSONCompilationDatabase.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/InitLLVM.h" @@ -20,6 +22,7 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -165,6 +168,13 @@ llvm::cl::init(false), llvm::cl::cat(DependencyScannerCategory)); +llvm::cl::opt GatherMetrics( + "metrics", llvm::cl::Optional, + llvm::cl::desc( + "Gather metrics during tool operation to measure overhead of modules " + "built in explicit mode with respect to implicit modules."), + llvm::cl::init(false), llvm::cl::cat(DependencyScannerCategory)); + } // end anonymous namespace /// \returns object-file path derived from source-file path. @@ -327,6 +337,65 @@ OS << llvm::formatv("{0:2}\n", Value(std::move(Output))); } + void computeAndPrintBuildMetrics(raw_ostream &OS) { + std::unordered_map + RelaxedHashesEfficiencyInfo; + std::set UniqueModuleSignatures; + for (auto &&M : Modules) { + std::string RelaxedHashModule = + M.second.ModuleName + '-' + M.second.RelaxedContextHash; + auto &BuiltModulesInfo = RelaxedHashesEfficiencyInfo[RelaxedHashModule]; + BuiltModulesInfo.BuiltModules.push_back(&M.second); + auto InsertionResult = + UniqueModuleSignatures.insert(M.second.ModuleSignature); + if (InsertionResult.second) + BuiltModulesInfo.UniqueASTFileSignatures++; + } + + size_t TotalStrictContextHashModules = Modules.size(); + size_t TotalRelaxedHashModules = RelaxedHashesEfficiencyInfo.size(); + auto PercentageModuleNumberIncrease = + 100 * ((TotalStrictContextHashModules - TotalRelaxedHashModules) / + TotalRelaxedHashModules); + + OS << "Total relaxed hash modules: " << TotalRelaxedHashModules << "\n"; + OS << "Total unique AST signatures: " << UniqueModuleSignatures.size() + << "\n"; + OS << "Total strict context hash modules: " << TotalStrictContextHashModules + << "\n"; + OS << "Module number increase: " << PercentageModuleNumberIncrease + << "%\n\n"; + + OS << "Details:\n"; + for (auto &&It : RelaxedHashesEfficiencyInfo) { + auto &Duplicates = It.second.BuiltModules; + size_t NumDuplicates = Duplicates.size(); + + assert(NumDuplicates > 0 && "Cannot have a relaxed hash module that " + "doesn't map to at least a strict" + "one!"); + + if (NumDuplicates > 1) { + OS << "Relaxed hash module: " << It.first << " gets duplicated as (" + << It.second.UniqueASTFileSignatures << "/" << NumDuplicates + << ") modules:\n\n"; + + auto OutputModuleDep = [](raw_ostream &OS, ModuleDeps *MD) { + OS << MD->ModuleName << "-" << MD->ContextHash; + }; + + auto DuplicatesIt = Duplicates.begin(); + OutputModuleDep(OS, *DuplicatesIt); + ++DuplicatesIt; + for (auto End = Duplicates.end(); DuplicatesIt != End; ++DuplicatesIt) { + OS << "; "; + OutputModuleDep(OS, *DuplicatesIt); + } + OS << "\n\n"; + } + } + } + private: StringRef lookupPCMPath(ClangModuleDep CMD) { return Modules[ContextModulePair{CMD.ContextHash, CMD.ModuleName, 0}] @@ -340,6 +409,11 @@ return I->second; }; + struct BuiltModulesEfficiencyInfo { + std::vector BuiltModules; + size_t UniqueASTFileSignatures; + }; + struct ContextModulePair { std::string ContextHash; std::string ModuleName; @@ -543,5 +617,8 @@ if (Format == ScanningOutputFormat::Full) FD.printFullOutput(llvm::outs()); + if (GatherMetrics) + FD.computeAndPrintBuildMetrics(llvm::errs()); + return HadErrors; }