diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -12,6 +12,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LLVM.h" #include "clang/Driver/Action.h" +#include "clang/Driver/InputInfo.h" #include "clang/Driver/Options.h" #include "clang/Driver/Phases.h" #include "clang/Driver/ToolChain.h" @@ -38,13 +39,14 @@ namespace driver { - class Command; - class Compilation; - class InputInfo; - class JobList; - class JobAction; - class SanitizerArgs; - class ToolChain; +typedef SmallVector InputInfoList; + +class Command; +class Compilation; +class JobList; +class JobAction; +class SanitizerArgs; +class ToolChain; /// Describes the kind of LTO mode selected via -f(no-)?lto(=.*)? options. enum LTOKind { @@ -171,9 +173,11 @@ /// The file to log CC_LOG_DIAGNOSTICS output to, if enabled. std::string CCLogDiagnosticsFilename; + /// An input type and its arguments. + using InputTy = std::pair; + /// A list of inputs and their types for the given arguments. - typedef SmallVector, 16> - InputList; + using InputList = SmallVector; /// Whether the driver should follow g++ like behavior. bool CCCIsCXX() const { return Mode == GXXMode; } @@ -413,6 +417,18 @@ void BuildUniversalActions(Compilation &C, const ToolChain &TC, const InputList &BAInputs) const; + /// BuildOffloadingActions - Construct the list of actions to perform for the + /// offloading toolchain that will be embedded in the host. + /// + /// \param C - The compilation that is being built. + /// \param Args - The input arguments. + /// \param Input - The input type and arguments + /// \param HostAction - The host action used in the offloading toolchain. + Action *BuildOffloadingActions(Compilation &C, + llvm::opt::DerivedArgList &Args, + const InputTy &Input, + Action *HostAction) const; + /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false. /// If TypoCorrect is true and the file does not exist, see if it looks @@ -503,13 +519,12 @@ /// BuildJobsForAction - Construct the jobs to perform for the action \p A and /// return an InputInfo for the result of running \p A. Will only construct /// jobs for a given (Action, ToolChain, BoundArch, DeviceKind) tuple once. - InputInfo - BuildJobsForAction(Compilation &C, const Action *A, const ToolChain *TC, - StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, - const char *LinkingOutput, - std::map, InputInfo> - &CachedResults, - Action::OffloadKind TargetDeviceOffloadKind) const; + InputInfoList BuildJobsForAction( + Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, + bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, + std::map, InputInfoList> + &CachedResults, + Action::OffloadKind TargetDeviceOffloadKind) const; /// Returns the default name for linked images (e.g., "a.out"). const char *getDefaultImageName() const; @@ -617,10 +632,10 @@ /// Helper used in BuildJobsForAction. Doesn't use the cache when building /// jobs specifically for the given action, but will use the cache when /// building jobs for the Action's inputs. - InputInfo BuildJobsForActionNoCache( + InputInfoList BuildJobsForActionNoCache( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map, InputInfo> + std::map, InputInfoList> &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2472,6 +2472,8 @@ PosFlag, NegFlag, BothFlags<[NoArgumentUnused, HelpHidden]>>; def static_openmp: Flag<["-"], "static-openmp">, HelpText<"Use the static host OpenMP runtime while linking.">; +def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group, + HelpText<"Use the new driver for OpenMP offloading.">; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group; defm escaping_block_tail_calls : BoolFOption<"escaping-block-tail-calls", diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3830,6 +3830,11 @@ // Builder to be used to build offloading actions. OffloadingActionBuilder OffloadBuilder(C, Args, Inputs); + // Offload kinds active for this compilation. + unsigned OffloadKinds = Action::OFK_None; + if (C.hasOffloadToolChain()) + OffloadKinds |= Action::OFK_OpenMP; + // Construct the actions to perform. HeaderModulePrecompileJobAction *HeaderModuleAction = nullptr; ActionList LinkerInputs; @@ -3850,14 +3855,16 @@ // Use the current host action in any of the offloading actions, if // required. - if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) - break; + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; for (phases::ID Phase : PL) { // Add any offload action the host action depends on. - Current = OffloadBuilder.addDeviceDependencesToHostAction( - Current, InputArg, Phase, PL.back(), FullPL); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + Current = OffloadBuilder.addDeviceDependencesToHostAction( + Current, InputArg, Phase, PL.back(), FullPL); if (!Current) break; @@ -3890,6 +3897,11 @@ break; } + // Try to build the offloading actions and add the result as a dependency + // to the host. + if (Args.hasArg(options::OPT_fopenmp_new_driver)) + Current = BuildOffloadingActions(C, Args, I, Current); + // FIXME: Should we include any prior module file outputs as inputs of // later actions in the same command line? @@ -3907,8 +3919,9 @@ // Use the current host action in any of the offloading actions, if // required. - if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) - break; + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; if (Current->getType() == types::TY_Nothing) break; @@ -3919,7 +3932,11 @@ Actions.push_back(Current); // Add any top level actions generated for offloading. - OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg); + else if (Current) + Current->propagateHostOffloadInfo(OffloadKinds, + /*BoundArch=*/nullptr); } // Add a link action if necessary. @@ -3931,8 +3948,9 @@ } if (!LinkerInputs.empty()) { - if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) - LinkerInputs.push_back(Wrapper); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) + LinkerInputs.push_back(Wrapper); Action *LA; // Check if this Linker Job should emit a static library. if (ShouldEmitStaticLibrary(Args)) { @@ -3940,7 +3958,11 @@ } else { LA = C.MakeAction(LinkerInputs, types::TY_Image); } - LA = OffloadBuilder.processHostLinkAction(LA); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + LA = OffloadBuilder.processHostLinkAction(LA); + if (Args.hasArg(options::OPT_fopenmp_new_driver)) + LA->propagateHostOffloadInfo(OffloadKinds, + /*BoundArch=*/nullptr); Actions.push_back(LA); } @@ -4026,6 +4048,68 @@ Args.ClaimAllArgs(options::OPT_cuda_compile_host_device); } +Action *Driver::BuildOffloadingActions(Compilation &C, + llvm::opt::DerivedArgList &Args, + const InputTy &Input, + Action *HostAction) const { + if (!isa(HostAction)) + return HostAction; + + SmallVector ToolChains; + ActionList DeviceActions; + + types::ID InputType = Input.first; + const Arg *InputArg = Input.second; + + auto OpenMPTCRange = C.getOffloadToolChains(); + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; ++TI) + ToolChains.push_back(TI->second); + + for (unsigned I = 0; I < ToolChains.size(); ++I) + DeviceActions.push_back(C.MakeAction(*InputArg, InputType)); + + if (DeviceActions.empty()) + return HostAction; + + auto PL = types::getCompilationPhases(*this, Args, InputType); + + for (phases::ID Phase : PL) { + if (Phase == phases::Link) { + assert(Phase == PL.back() && "linking must be final compilation step."); + break; + } + + auto TC = ToolChains.begin(); + for (Action *&A : DeviceActions) { + A = ConstructPhaseAction(C, Args, Phase, A); + + if (isa(A)) { + HostAction->setCannotBeCollapsedWithNextDependentAction(); + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain(), + /*BourdArch=*/nullptr, Action::OFK_OpenMP); + OffloadAction::DeviceDependences DDep; + DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + A = C.MakeAction(HDep, DDep); + } + ++TC; + } + } + + OffloadAction::DeviceDependences DDeps; + + auto TC = ToolChains.begin(); + for (Action *A : DeviceActions) { + DDeps.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + TC++; + } + + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain(), + /*BoundArch=*/nullptr, DDeps); + return C.MakeAction(HDep, DDeps); +} + Action *Driver::ConstructPhaseAction( Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input, Action::OffloadKind TargetDeviceOffloadKind) const { @@ -4188,7 +4272,7 @@ ArchNames.insert(A->getValue()); // Set of (Action, canonical ToolChain triple) pairs we've built jobs for. - std::map, InputInfo> CachedResults; + std::map, InputInfoList> CachedResults; for (Action *A : C.getActions()) { // If we are linking an image for multiple archs then the linker wants // -arch_multiple and -final_output . Unfortunately, this @@ -4645,10 +4729,11 @@ return TriplePlusArch; } -InputInfo Driver::BuildJobsForAction( +InputInfoList Driver::BuildJobsForAction( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map, InputInfo> &CachedResults, + std::map, InputInfoList> + &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { std::pair ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; @@ -4656,17 +4741,18 @@ if (CachedResult != CachedResults.end()) { return CachedResult->second; } - InputInfo Result = BuildJobsForActionNoCache( + InputInfoList Result = BuildJobsForActionNoCache( C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput, CachedResults, TargetDeviceOffloadKind); CachedResults[ActionTC] = Result; return Result; } -InputInfo Driver::BuildJobsForActionNoCache( +InputInfoList Driver::BuildJobsForActionNoCache( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map, InputInfo> &CachedResults, + std::map, InputInfoList> + &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); @@ -4704,7 +4790,7 @@ // If there is a single device option, just generate the job for it. if (OA->hasSingleDeviceDependence()) { - InputInfo DevA; + InputInfoList DevA; OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { DevA = @@ -4722,7 +4808,7 @@ OA->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { - OffloadDependencesInputInfo.push_back(BuildJobsForAction( + OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false, /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); @@ -4731,6 +4817,17 @@ A = BuildingForOffloadDevice ? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) : OA->getHostDependence(); + + // We may have already built this action as a part of the offloading + // toolchain, return the cached input if so. + std::pair ActionTC = { + OA->getHostDependence(), + GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + if (CachedResults.find(ActionTC) != CachedResults.end()) { + InputInfoList Inputs = CachedResults[ActionTC]; + Inputs.append(OffloadDependencesInputInfo); + return Inputs; + } } if (const InputAction *IA = dyn_cast(A)) { @@ -4740,9 +4837,9 @@ Input.claim(); if (Input.getOption().matches(options::OPT_INPUT)) { const char *Name = Input.getValue(); - return InputInfo(A, Name, /* _BaseInput = */ Name); + return {InputInfo(A, Name, /* _BaseInput = */ Name)}; } - return InputInfo(A, &Input, /* _BaseInput = */ ""); + return {InputInfo(A, &Input, /* _BaseInput = */ "")}; } if (const BindArchAction *BAA = dyn_cast(A)) { @@ -4772,7 +4869,7 @@ const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions); if (!T) - return InputInfo(); + return {InputInfo()}; if (BuildingForOffloadDevice && A->getOffloadingDeviceKind() == Action::OFK_OpenMP) { @@ -4799,7 +4896,7 @@ cast(OA)->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { - OffloadDependencesInputInfo.push_back(BuildJobsForAction( + OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false, /*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); @@ -4813,7 +4910,7 @@ // FIXME: Clean this up. bool SubJobAtTopLevel = AtTopLevel && (isa(A) || isa(A)); - InputInfos.push_back(BuildJobsForAction( + InputInfos.append(BuildJobsForAction( C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput, CachedResults, A->getOffloadingDeviceKind())); } @@ -4897,8 +4994,8 @@ Arch = BoundArch; CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch, - UI.DependentOffloadKind)}] = - CurI; + UI.DependentOffloadKind)}] = { + CurI}; } // Now that we have all the results generated, select the one that should be @@ -4907,9 +5004,9 @@ A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; assert(CachedResults.find(ActionTC) != CachedResults.end() && "Result does not exist??"); - Result = CachedResults[ActionTC]; + Result = CachedResults[ActionTC].front(); } else if (JA->getType() == types::TY_Nothing) - Result = InputInfo(A, BaseInput); + Result = {InputInfo(A, BaseInput)}; else { // We only have to generate a prefix for the host if this is not a top-level // action. @@ -4962,7 +5059,7 @@ C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), LinkingOutput); } - return Result; + return {Result}; } const char *Driver::getDefaultImageName() const { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4351,6 +4351,7 @@ bool IsHIP = JA.isOffloading(Action::OFK_HIP); bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP); bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP); + bool IsOpenMPHost = JA.isHostOffloading(Action::OFK_OpenMP); bool IsHeaderModulePrecompile = isa(JA); bool IsDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) || JA.isDeviceOffloading(Action::OFK_Host)); @@ -4371,6 +4372,7 @@ InputInfoList ModuleHeaderInputs; const InputInfo *CudaDeviceInput = nullptr; const InputInfo *OpenMPDeviceInput = nullptr; + const InputInfo *OpenMPHostInput = nullptr; for (const InputInfo &I : Inputs) { if (&I == &Input) { // This is the primary input. @@ -4387,6 +4389,8 @@ CudaDeviceInput = &I; } else if (IsOpenMPDevice && !OpenMPDeviceInput) { OpenMPDeviceInput = &I; + } else if (IsOpenMPHost && !OpenMPHostInput) { + OpenMPHostInput = &I; } else { llvm_unreachable("unexpectedly given multiple inputs"); } diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -350,3 +350,13 @@ // TRIPLE: "-triple" "nvptx64-nvidia-cuda" // TRIPLE: "-target-cpu" "sm_35" + +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: -fopenmp-new-driver -no-canonical-prefixes -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \ +// RUN: | FileCheck -check-prefix=NEW_DRIVER %s + +// NEW_DRIVER: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_INPUT:.+]]"], output: "[[HOST_BC:.+]]" +// NEW_DRIVER: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[DEVICE_INPUT:.+]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.+]]" +// NEW_DRIVER: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "[[DEVICE_OBJ:.+]]" +// NEW_DRIVER: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.+]]" +// NEW_DRIVER: "x86_64-unknown-linux-gnu" - "[[LINKER:.+]]", inputs: ["[[HOST_OBJ]]"], output: "openmp-offload-gpu"