Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h =================================================================== --- clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h +++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h @@ -23,6 +23,10 @@ namespace tooling { namespace dependencies { +/// A callback to lookup module outputs for "-fmodule-file=", "-o" etc. +using LookupModuleOutputCallback = + llvm::function_ref; + /// The full dependencies and module graph for a specific input. struct FullDependencies { /// The identifier of the C++20 module this translation unit exports. @@ -45,17 +49,8 @@ /// determined that the differences are benign for this compilation. std::vector ClangModuleDeps; - /// The original command line of the TU (excluding the compiler executable). - std::vector OriginalCommandLine; - - /// Get the full command line. - /// - /// \param LookupModuleOutput This function is called to fill in - /// "-fmodule-file=", "-o" and other output - /// arguments for dependencies. - std::vector getCommandLine( - llvm::function_ref - LookupModuleOutput) const; + /// The command line of the TU (excluding the compiler executable). + std::vector CommandLine; }; struct FullDependenciesResult { @@ -92,12 +87,16 @@ /// function for a single \c DependencyScanningTool in a /// single build. Use a different one for different tools, /// and clear it between builds. + /// \param LookupModuleOutput This function is called to fill in + /// "-fmodule-file=", "-o" and other output + /// arguments for dependencies. /// /// \returns a \c StringError with the diagnostic output if clang errors /// occurred, \c FullDependencies otherwise. llvm::Expected getFullDependencies(const std::vector &CommandLine, StringRef CWD, const llvm::StringSet<> &AlreadySeen, + LookupModuleOutputCallback LookupModuleOutput, llvm::Optional ModuleName = None); private: @@ -106,8 +105,9 @@ class FullDependencyConsumer : public DependencyConsumer { public: - FullDependencyConsumer(const llvm::StringSet<> &AlreadySeen) - : AlreadySeen(AlreadySeen) {} + FullDependencyConsumer(const llvm::StringSet<> &AlreadySeen, + LookupModuleOutputCallback LookupModuleOutput) + : AlreadySeen(AlreadySeen), LookupModuleOutput(LookupModuleOutput) {} void handleDependencyOutputOpts(const DependencyOutputOptions &) override {} @@ -127,6 +127,11 @@ ContextHash = std::move(Hash); } + std::string lookupModuleOutput(const ModuleID &ID, + ModuleOutputKind Kind) override { + return LookupModuleOutput(ID, Kind); + } + FullDependenciesResult getFullDependencies( const std::vector &OriginalCommandLine) const; @@ -138,6 +143,7 @@ std::string ContextHash; std::vector OutputPaths; const llvm::StringSet<> &AlreadySeen; + LookupModuleOutputCallback LookupModuleOutput; }; } // end namespace dependencies Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h =================================================================== --- clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -42,6 +42,9 @@ virtual void handleModuleDependency(ModuleDeps MD) = 0; virtual void handleContextHash(std::string Hash) = 0; + + virtual std::string lookupModuleOutput(const ModuleID &ID, + ModuleOutputKind Kind) = 0; }; /// An individual dependency scanning worker that is able to run on its own Index: clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h =================================================================== --- clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -119,25 +119,11 @@ // the primary TU. bool ImportedByMainFile = false; - /// Whether the TU had a dependency file. The path in \c BuildInvocation is - /// cleared to avoid leaking the specific path from the TU into the module. - bool HadDependencyFile = false; - - /// Whether the TU had serialized diagnostics. The path in \c BuildInvocation - /// is cleared to avoid leaking the specific path from the TU into the module. - bool HadSerializedDiagnostics = false; - /// Compiler invocation that can be used to build this module (without paths). CompilerInvocation BuildInvocation; /// Gets the canonical command line suitable for passing to clang. - /// - /// \param LookupModuleOutput This function is called to fill in - /// "-fmodule-file=", "-o" and other output - /// arguments. - std::vector getCanonicalCommandLine( - llvm::function_ref - LookupModuleOutput) const; + std::vector getCanonicalCommandLine() const; }; class ModuleDepCollector; @@ -237,9 +223,12 @@ /// Constructs a CompilerInvocation that can be used to build the given /// module, excluding paths to discovered modular dependencies that are yet to /// be built. - CompilerInvocation makeInvocationForModuleBuildWithoutPaths( + CompilerInvocation makeInvocationForModuleBuildWithoutOutputs( const ModuleDeps &Deps, llvm::function_ref Optimize) const; + + /// Add paths that require looking up outputs to the given dependencies. + void addOutputPaths(ModuleDeps &Deps); }; } // end namespace dependencies Index: clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp =================================================================== --- clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp +++ clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp @@ -13,18 +13,12 @@ using namespace tooling; using namespace dependencies; -std::vector FullDependencies::getCommandLine( - llvm::function_ref - LookupModuleOutput) const { +static std::vector +makeTUCommandLineWithoutPaths(ArrayRef OriginalCommandLine) { std::vector Args = OriginalCommandLine; Args.push_back("-fno-implicit-modules"); Args.push_back("-fno-implicit-module-maps"); - for (const PrebuiltModuleDep &PMD : PrebuiltModuleDeps) - Args.push_back("-fmodule-file=" + PMD.PCMFile); - for (ModuleID MID : ClangModuleDeps) - Args.push_back("-fmodule-file=" + - LookupModuleOutput(MID, ModuleOutputKind::ModuleFile)); // These arguments are unused in explicit compiles. llvm::erase_if(Args, [](StringRef Arg) { @@ -72,6 +66,11 @@ void handleContextHash(std::string Hash) override {} + std::string lookupModuleOutput(const ModuleID &ID, + ModuleOutputKind Kind) override { + llvm::report_fatal_error("unexpected call to lookupModuleOutput"); + } + void printDependencies(std::string &S) { assert(Opts && "Handled dependency output options."); @@ -113,8 +112,9 @@ DependencyScanningTool::getFullDependencies( const std::vector &CommandLine, StringRef CWD, const llvm::StringSet<> &AlreadySeen, + LookupModuleOutputCallback LookupModuleOutput, llvm::Optional ModuleName) { - FullDependencyConsumer Consumer(AlreadySeen); + FullDependencyConsumer Consumer(AlreadySeen, LookupModuleOutput); llvm::Error Result = Worker.computeDependencies(CWD, CommandLine, Consumer, ModuleName); if (Result) @@ -126,16 +126,24 @@ const std::vector &OriginalCommandLine) const { FullDependencies FD; - FD.OriginalCommandLine = ArrayRef(OriginalCommandLine).slice(1); + FD.CommandLine = makeTUCommandLineWithoutPaths( + ArrayRef(OriginalCommandLine).slice(1)); FD.ID.ContextHash = std::move(ContextHash); FD.FileDeps.assign(Dependencies.begin(), Dependencies.end()); + for (const PrebuiltModuleDep &PMD : PrebuiltModuleDeps) + FD.CommandLine.push_back("-fmodule-file=" + PMD.PCMFile); + for (auto &&M : ClangModuleDeps) { auto &MD = M.second; - if (MD.ImportedByMainFile) + if (MD.ImportedByMainFile) { FD.ClangModuleDeps.push_back(MD.ID); + FD.CommandLine.push_back( + "-fmodule-file=" + + LookupModuleOutput(MD.ID, ModuleOutputKind::ModuleFile)); + } } FD.PrebuiltModuleDeps = std::move(PrebuiltModuleDeps); Index: clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp =================================================================== --- clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -42,7 +42,47 @@ Opts.UserEntries.push_back(Entries[Idx]); } -CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths( +static std::vector splitString(std::string S, char Separator) { + SmallVector Segments; + StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false); + std::vector Result; + Result.reserve(Segments.size()); + for (StringRef Segment : Segments) + Result.push_back(Segment.str()); + return Result; +} + +void ModuleDepCollector::addOutputPaths(ModuleDeps &Deps) { + CompilerInvocation &CI = Deps.BuildInvocation; + for (ModuleID MID : Deps.ClangModuleDeps) + CI.getFrontendOpts().ModuleFiles.push_back( + Consumer.lookupModuleOutput(MID, ModuleOutputKind::ModuleFile)); + + CI.getFrontendOpts().OutputFile = + Consumer.lookupModuleOutput(Deps.ID, ModuleOutputKind::ModuleFile); + if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty()) + CI.getDiagnosticOpts().DiagnosticSerializationFile = + Consumer.lookupModuleOutput( + Deps.ID, ModuleOutputKind::DiagnosticSerializationFile); + if (!CI.getDependencyOutputOpts().OutputFile.empty()) { + CI.getDependencyOutputOpts().OutputFile = + Consumer.lookupModuleOutput(Deps.ID, ModuleOutputKind::DependencyFile); + CI.getDependencyOutputOpts().Targets = + splitString(Consumer.lookupModuleOutput( + Deps.ID, ModuleOutputKind::DependencyTargets), + '\0'); + if (!CI.getDependencyOutputOpts().OutputFile.empty() && + CI.getDependencyOutputOpts().Targets.empty()) { + // Fallback to -o as dependency target, as in the driver. + SmallString<128> Target; + quoteMakeTarget(CI.getFrontendOpts().OutputFile, Target); + CI.getDependencyOutputOpts().Targets.push_back(std::string(Target)); + } + } +} + +CompilerInvocation +ModuleDepCollector::makeInvocationForModuleBuildWithoutOutputs( const ModuleDeps &Deps, llvm::function_ref Optimize) const { // Make a deep copy of the original Clang invocation. @@ -58,8 +98,12 @@ CI.getFrontendOpts().OutputFile.clear(); CI.getCodeGenOpts().MainFileName.clear(); CI.getCodeGenOpts().DwarfDebugFlags.clear(); - CI.getDiagnosticOpts().DiagnosticSerializationFile.clear(); - CI.getDependencyOutputOpts().OutputFile.clear(); + // Map output paths that affect behaviour to "-" so their existence is in the + // context hash. The final path will be computed in addOutputPaths. + if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty()) + CI.getDiagnosticOpts().DiagnosticSerializationFile = "-"; + if (!CI.getDependencyOutputOpts().OutputFile.empty()) + CI.getDependencyOutputOpts().OutputFile = "-"; CI.getDependencyOutputOpts().Targets.clear(); CI.getFrontendOpts().ProgramAction = frontend::GenerateModule; @@ -78,6 +122,17 @@ CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60; CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60; + // Inputs + InputKind ModuleMapInputKind(CI.getFrontendOpts().DashX.getLanguage(), + InputKind::Format::ModuleMap); + CI.getFrontendOpts().Inputs.emplace_back(Deps.ClangModuleMapFile, + ModuleMapInputKind); + CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps; + + // Report the prebuilt modules this module uses. + for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) + CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile); + // Remove any macro definitions that are explicitly ignored. if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) { llvm::erase_if( @@ -91,12 +146,6 @@ CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear(); } - // Report the prebuilt modules this module uses. - for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) - CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile); - - CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps; - Optimize(CI); // The original invocation probably didn't have strict context hash enabled. @@ -125,49 +174,8 @@ return std::vector{Args.begin(), Args.end()}; } -static std::vector splitString(std::string S, char Separator) { - SmallVector Segments; - StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false); - std::vector Result; - Result.reserve(Segments.size()); - for (StringRef Segment : Segments) - Result.push_back(Segment.str()); - return Result; -} - -std::vector ModuleDeps::getCanonicalCommandLine( - llvm::function_ref - LookupModuleOutput) const { - CompilerInvocation CI(BuildInvocation); - FrontendOptions &FrontendOpts = CI.getFrontendOpts(); - - InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(), - InputKind::Format::ModuleMap); - FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind); - FrontendOpts.OutputFile = - LookupModuleOutput(ID, ModuleOutputKind::ModuleFile); - if (HadSerializedDiagnostics) - CI.getDiagnosticOpts().DiagnosticSerializationFile = - LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile); - if (HadDependencyFile) { - DependencyOutputOptions &DepOpts = CI.getDependencyOutputOpts(); - DepOpts.OutputFile = - LookupModuleOutput(ID, ModuleOutputKind::DependencyFile); - DepOpts.Targets = splitString( - LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0'); - if (!DepOpts.OutputFile.empty() && DepOpts.Targets.empty()) { - // Fallback to -o as dependency target, as in the driver. - SmallString<128> Target; - quoteMakeTarget(FrontendOpts.OutputFile, Target); - DepOpts.Targets.push_back(std::string(Target)); - } - } - - for (ModuleID MID : ClangModuleDeps) - FrontendOpts.ModuleFiles.push_back( - LookupModuleOutput(MID, ModuleOutputKind::ModuleFile)); - - return serializeCompilerInvocation(CI); +std::vector ModuleDeps::getCanonicalCommandLine() const { + return serializeCompilerInvocation(BuildInvocation); } static std::string getModuleContextHash(const ModuleDeps &MD) { @@ -190,23 +198,16 @@ return ""; }); - // Hash the input file paths and module dependencies. These paths may differ - // even if the invocation is identical if they depend on the contents of the - // files in the TU -- for example, case-insensitive paths to modulemap files. - // Usually such a case would indicate a missed optimization to canonicalize, - // but it may be difficult to canonicalize all cases when there is a VFS. - HashBuilder.add(MD.ClangModuleMapFile); - for (const auto &Dep : MD.PrebuiltModuleDeps) - HashBuilder.add(Dep.PCMFile); + // Hash the module dependencies. These paths may differ even if the invocation + // is identical if they depend on the contents of the files in the TU -- for + // example, case-insensitive paths to modulemap files. Usually such a case + // would indicate a missed optimization to canonicalize, but it may be + // difficult to canonicalize all cases when there is a VFS. for (const auto &ID : MD.ClangModuleDeps) { HashBuilder.add(ID.ModuleName); HashBuilder.add(ID.ContextHash); } - // Hash options that affect which callbacks are made for outputs. - HashBuilder.add(MD.HadDependencyFile); - HashBuilder.add(MD.HadSerializedDiagnostics); - llvm::BLAKE3Result<16> Hash = HashBuilder.final(); std::array Words; static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words"); @@ -387,22 +388,20 @@ llvm::DenseSet SeenModules; addAllSubmodulePrebuiltDeps(M, MD, SeenModules); - MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths( + MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutOutputs( MD, [&](CompilerInvocation &BuildInvocation) { if (MDC.OptimizeArgs) optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(), *MDC.ScanInstance.getASTReader(), *MF); }); - MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts() - .DiagnosticSerializationFile.empty(); - MD.HadDependencyFile = - !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty(); llvm::DenseSet AddedModules; addAllSubmoduleDeps(M, MD, AddedModules); - // Do this last since it requires the dependencies. + // Compute the context hash from the inputs. Requires dependencies. MD.ID.ContextHash = getModuleContextHash(MD); + // Finish the compiler invocation. Requires dependencies and the context hash. + MDC.addOutputPaths(MD); return MD.ID; } Index: clang/tools/clang-scan-deps/ClangScanDeps.cpp =================================================================== --- clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/Driver/Driver.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" @@ -268,10 +269,7 @@ Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)}); } - ID.CommandLine = - FD.getCommandLine([&](const ModuleID &MID, ModuleOutputKind MOK) { - return lookupModuleOutput(MID, MOK); - }); + ID.CommandLine = FD.CommandLine; Inputs.push_back(std::move(ID)); } @@ -301,10 +299,7 @@ {"file-deps", toJSONSorted(MD.FileDeps)}, {"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)}, {"clang-modulemap-file", MD.ClangModuleMapFile}, - {"command-line", MD.getCanonicalCommandLine( - [&](const ModuleID &MID, ModuleOutputKind MOK) { - return lookupModuleOutput(MID, MOK); - })}, + {"command-line", MD.getCanonicalCommandLine()}, }; OutModules.push_back(std::move(O)); } @@ -330,42 +325,6 @@ } private: - std::string lookupModuleOutput(const ModuleID &MID, ModuleOutputKind MOK) { - // Cache the PCM path, since it will be queried repeatedly for each module. - // The other outputs are only queried once during getCanonicalCommandLine. - auto PCMPath = PCMPaths.insert({MID, ""}); - if (PCMPath.second) - PCMPath.first->second = constructPCMPath(MID); - switch (MOK) { - case ModuleOutputKind::ModuleFile: - return PCMPath.first->second; - case ModuleOutputKind::DependencyFile: - return PCMPath.first->second + ".d"; - case ModuleOutputKind::DependencyTargets: - // Null-separate the list of targets. - return join(ModuleDepTargets, StringRef("\0", 1)); - case ModuleOutputKind::DiagnosticSerializationFile: - return PCMPath.first->second + ".diag"; - } - llvm_unreachable("Fully covered switch above!"); - } - - /// Construct a path for the explicitly built PCM. - std::string constructPCMPath(ModuleID MID) const { - auto MDIt = Modules.find(IndexedModuleID{MID, 0}); - assert(MDIt != Modules.end()); - const ModuleDeps &MD = MDIt->second; - - StringRef Filename = llvm::sys::path::filename(MD.ImplicitModulePCMPath); - StringRef ModuleCachePath = llvm::sys::path::parent_path( - llvm::sys::path::parent_path(MD.ImplicitModulePCMPath)); - - SmallString<256> ExplicitPCMPath(!ModuleFilesDir.empty() ? ModuleFilesDir - : ModuleCachePath); - llvm::sys::path::append(ExplicitPCMPath, MD.ID.ContextHash, Filename); - return std::string(ExplicitPCMPath); - } - struct IndexedModuleID { ModuleID ID; mutable size_t InputIndex; @@ -395,7 +354,6 @@ std::mutex Lock; std::unordered_map Modules; - std::unordered_map PCMPaths; std::vector Inputs; }; @@ -417,6 +375,42 @@ return false; } +/// Construct a path for the explicitly built PCM. +static std::string constructPCMPath(ModuleID MID, StringRef OutputDir) { + SmallString<256> ExplicitPCMPath(OutputDir); + llvm::sys::path::append(ExplicitPCMPath, MID.ContextHash, + MID.ModuleName + "-" + MID.ContextHash + ".pcm"); + return std::string(ExplicitPCMPath); +} + +static std::string lookupModuleOutput(const ModuleID &MID, ModuleOutputKind MOK, + StringRef OutputDir) { + std::string PCMPath = constructPCMPath(MID, OutputDir); + switch (MOK) { + case ModuleOutputKind::ModuleFile: + return PCMPath; + case ModuleOutputKind::DependencyFile: + return PCMPath + ".d"; + case ModuleOutputKind::DependencyTargets: + // Null-separate the list of targets. + return join(ModuleDepTargets, StringRef("\0", 1)); + case ModuleOutputKind::DiagnosticSerializationFile: + return PCMPath + ".diag"; + } + llvm_unreachable("Fully covered switch above!"); +} + +static std::string getModuleCachePath(ArrayRef Args) { + for (StringRef Arg : llvm::reverse(Args)) { + Arg.consume_front("/clang:"); + if (Arg.consume_front("-fmodules-cache-path=")) + return std::string(Arg); + } + SmallString<128> Path; + driver::Driver::getDefaultModuleCachePath(Path); + return std::string(Path); +} + int main(int argc, const char **argv) { llvm::InitLLVM X(argc, argv); llvm::cl::HideUnrelatedOptions(DependencyScannerCategory); @@ -545,6 +539,14 @@ Optional MaybeModuleName; if (!ModuleName.empty()) MaybeModuleName = ModuleName; + + std::string OutputDir(ModuleFilesDir); + if (OutputDir.empty()) + OutputDir = getModuleCachePath(Input->CommandLine); + auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) { + return ::lookupModuleOutput(MID, MOK, OutputDir); + }; + // Run the tool on it. if (Format == ScanningOutputFormat::Make) { auto MaybeFile = WorkerTools[I]->getDependencyFile( @@ -554,7 +556,8 @@ HadErrors = true; } else { auto MaybeFullDeps = WorkerTools[I]->getFullDependencies( - Input->CommandLine, CWD, AlreadySeenModules, MaybeModuleName); + Input->CommandLine, CWD, AlreadySeenModules, LookupOutput, + MaybeModuleName); if (handleFullDependencyToolResult(Filename, MaybeFullDeps, FD, LocalIndex, DependencyOS, Errs)) HadErrors = true;