Index: include/clang/Driver/Compilation.h =================================================================== --- include/clang/Driver/Compilation.h +++ include/clang/Driver/Compilation.h @@ -116,6 +116,12 @@ return OrderedOffloadingToolchains.equal_range(Kind); } + /// Return true if an offloading tool chain of a given kind exists. + template bool hasOffloadToolChain() const { + return OrderedOffloadingToolchains.find(Kind) != + OrderedOffloadingToolchains.end(); + } + /// Return an offload toolchain of the provided kind. Only one is expected to /// exist. template Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -1387,131 +1387,521 @@ } } -// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE -// input action and then wraps each in CudaDeviceAction paired with -// appropriate GPU arch name. In case of partial (i.e preprocessing -// only) or device-only compilation, each device action is added to /p -// Actions and /p Current is released. Otherwise the function creates -// and returns a new CudaHostAction which wraps /p Current and device -// side actions. -static Action *buildCudaActions(Compilation &C, DerivedArgList &Args, - const Arg *InputArg, Action *HostAction, - ActionList &Actions) { - Arg *PartialCompilationArg = Args.getLastArg( - options::OPT_cuda_host_only, options::OPT_cuda_device_only, - options::OPT_cuda_compile_host_device); - bool CompileHostOnly = - PartialCompilationArg && - PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only); - bool CompileDeviceOnly = - PartialCompilationArg && - PartialCompilationArg->getOption().matches(options::OPT_cuda_device_only); - - if (CompileHostOnly) { +namespace { +/// Provides a convenient interface for different programming models to generate +/// the required device actions. +class OffloadingActionBuilder final { + /// Flag used to trace errors in the builder. + bool IsValid = false; + + /// The compilation that is using this builder. + Compilation &C; + + /// The derived arguments associated with this builder. + DerivedArgList &Args; + + /// Map between an input argument and the offload kinds used to process it. + std::map InputArgToOffloadKindMap; + + /// Builder interface. It doesn't build anything or keep any state. + class DeviceActionBuilder { + public: + typedef llvm::SmallVector PhasesTy; + + enum ActionBuilderReturnCode { + // The builder acted successfully on the current action. + ABRT_Success, + // The builder didn't have to act on the current action. + ABRT_Inactive, + // The builder was successful and requested the host action to not be + // generated. + ABRT_Ignore_Host, + }; + + protected: + /// Compilation associated with this builder. + Compilation &C; + + /// Tool chains associated with this builder. The same programming + /// model may have associated one or more tool chains. + SmallVector ToolChains; + + /// The derived arguments associated with this builder. + DerivedArgList &Args; + + /// The inputs associated with this builder. + const Driver::InputList &Inputs; + + /// The associated offload kind. + Action::OffloadKind AssociatedOffloadKind = Action::OFK_None; + + public: + DeviceActionBuilder(Compilation &C, DerivedArgList &Args, + const Driver::InputList &Inputs, + Action::OffloadKind AssociatedOffloadKind) + : C(C), Args(Args), Inputs(Inputs), + AssociatedOffloadKind(AssociatedOffloadKind) {} + virtual ~DeviceActionBuilder() {} + + /// Fill up the array \a DA with all the device dependences that should be + /// added to the provided host action \a HostAction. By default it is + /// inactive. + virtual ActionBuilderReturnCode + getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, + phases::ID FinalPhase, PhasesTy &Phases) { + return ABRT_Inactive; + } + + /// Update the state to include the provided host action \a HostAction as a + /// dependency of the current device action. By default it is inactive. + virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction) { + return ABRT_Inactive; + } + + /// Append top level actions generated by the builder. Return true if errors + /// were found. + virtual void appendTopLevelActions(ActionList &AL) {} + + /// Append linker actions generated by the builder. Return true if errors + /// were found. + virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {} + + /// Initialize the builder. Return true if any initialization errors are + /// found. + virtual bool initialize() { return false; } + + /// Return true if this builder is valid. We have a valid builder if we have + /// associated device tool chains. + bool isValid() { return !ToolChains.empty(); } + + /// Return the associated offload kind. + Action::OffloadKind getAssociatedOffloadKind() { + return AssociatedOffloadKind; + } + }; + + /// \brief CUDA action builder. It injects device code in the host backend + /// action. + class CudaActionBuilder final : public DeviceActionBuilder { + /// Flags to signal if the user requested host-only or device-only + /// compilation. + bool CompileHostOnly = false; + bool CompileDeviceOnly = false; + + /// List of GPU architectures to use in this compilation. + SmallVector GpuArchList; + + /// The CUDA actions for the current input. + ActionList CudaDeviceActions; + + /// The CUDA fat binary if it was generated for the current input. + Action *CudaFatBinary = nullptr; + + /// Flag that is set to true if this builder acted on the current input. + bool IsActive = false; + + public: + CudaActionBuilder(Compilation &C, DerivedArgList &Args, + const Driver::InputList &Inputs) + : DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {} + + ActionBuilderReturnCode + getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, + phases::ID FinalPhase, PhasesTy &Phases) override { + if (!IsActive) + return ABRT_Inactive; + + // If we don't have more CUDA actions, we don't have any dependences to + // create for the host. + if (CudaDeviceActions.empty()) + return ABRT_Success; + + assert(CudaDeviceActions.size() == GpuArchList.size() && + "Expecting one action per GPU architecture."); + assert(!CompileHostOnly && + "Not expecting CUDA actions in host-only compilation."); + + // If we are generating code for the device or we are in a backend phase, + // we attempt to generate the fat binary. We compile each arch to ptx and + // assemble to cubin, then feed the cubin *and* the ptx into a device + // "link" action, which uses fatbinary to combine these cubins into one + // fatbin. The fatbin is then an input to the host action if not in + // device-only mode. + if (CompileDeviceOnly || CurPhase == phases::Backend) { + ActionList DeviceActions; + for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { + // Produce the device action from the current phase up to the assemble + // phase. + for (auto Ph : Phases) { + // Skip the phases that were already dealt with. + if (Ph < CurPhase) + continue; + // We have to be consistent with the host final phase. + if (Ph > FinalPhase) + break; + + CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction( + C, Args, Ph, CudaDeviceActions[I]); + + if (Ph == phases::Assemble) + break; + } + + // If we didn't reach the assemble phase, we can't generate the fat + // binary. + if (!isa(CudaDeviceActions[I])) + continue; + + Action *AssembleAction = CudaDeviceActions[I]; + assert(AssembleAction->getType() == types::TY_Object); + assert(AssembleAction->getInputs().size() == 1); + + Action *BackendAction = AssembleAction->getInputs()[0]; + assert(BackendAction->getType() == types::TY_PP_Asm); + + for (auto &A : {AssembleAction, BackendAction}) { + OffloadAction::DeviceDependences DDep; + DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda); + DeviceActions.push_back( + C.MakeAction(DDep, A->getType())); + } + } + + // We generate the fat binary if we have device input actions. + if (!DeviceActions.empty()) { + CudaFatBinary = + C.MakeAction(DeviceActions, types::TY_CUDA_FATBIN); + + if (!CompileDeviceOnly) { + DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr, + Action::OFK_Cuda); + // Clear the fat binary, it is already a dependence to an host + // action. + CudaFatBinary = nullptr; + } + + // Remove the CUDA actions as they are already connected to an host + // action or fat binary. + CudaDeviceActions.clear(); + } + + // We avoid creating host action in device-only mode. + return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; + } + + assert(CurPhase < phases::Backend && "Generating single CUDA " + "instructions should only occur " + "before the backend phase!"); + + // By default, we produce an action for each device arch. + for (Action *&A : CudaDeviceActions) + A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A); + + return ABRT_Success; + } + + ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { + // While generating code for CUDA, we only depend on the host input action + // to trigger the creation of all the CUDA device actions. + + // If we are dealing with an input action, replicate it for each GPU + // architecture. If we are in host-only mode we return 'success' so that + // the host use the CUDA offload kind. + if (auto *IA = dyn_cast(HostAction)) { + assert(!GpuArchList.empty() && + "We should have at least one GPU architecture."); + + // If the host input is not CUDA, we don't need to bother about this + // input. + if (IA->getType() != types::TY_CUDA) { + // The builder will ignore this input. + IsActive = false; + return ABRT_Inactive; + } + + // Set the flag to true, so that the builder acts on the current input. + IsActive = true; + + if (CompileHostOnly) + return ABRT_Success; + + // Replicate inputs for each GPU architecture. + for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) + CudaDeviceActions.push_back(C.MakeAction( + IA->getInputArg(), types::TY_CUDA_DEVICE)); + + return ABRT_Success; + } + + return IsActive ? ABRT_Success : ABRT_Inactive; + } + + void appendTopLevelActions(ActionList &AL) override { + // Utility to append actions to the top level list. + auto AddTopLevel = [&](Action *A, const char *BoundArch) { + OffloadAction::DeviceDependences Dep; + Dep.add(*A, *ToolChains.front(), BoundArch, Action::OFK_Cuda); + AL.push_back(C.MakeAction(Dep, A->getType())); + }; + + // If we have a fat binary, add it to the list. + if (CudaFatBinary) { + AddTopLevel(CudaFatBinary, /*BoundArch=*/nullptr); + CudaDeviceActions.clear(); + CudaFatBinary = nullptr; + return; + } + + if (CudaDeviceActions.empty()) + return; + + // If we have CUDA actions at this point, that's because we have a have + // partial compilation, so we should have an action for each GPU + // architecture. + assert(CudaDeviceActions.size() == GpuArchList.size() && + "Expecting one action per GPU architecture."); + assert(ToolChains.size() == 1 && + "Expecting to have a sing CUDA toolchain."); + for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) + AddTopLevel(CudaDeviceActions[I], GpuArchList[I]); + + CudaDeviceActions.clear(); + } + + bool initialize() override { + // We don't need to support CUDA. + if (!C.hasOffloadToolChain()) + return false; + + ToolChains.push_back(C.getSingleOffloadToolChain()); + + Arg *PartialCompilationArg = Args.getLastArg( + options::OPT_cuda_host_only, options::OPT_cuda_device_only, + options::OPT_cuda_compile_host_device); + CompileHostOnly = PartialCompilationArg && + PartialCompilationArg->getOption().matches( + options::OPT_cuda_host_only); + CompileDeviceOnly = PartialCompilationArg && + PartialCompilationArg->getOption().matches( + options::OPT_cuda_device_only); + + // Collect all cuda_gpu_arch parameters, removing duplicates. + llvm::StringSet<> GpuArchNames; + bool Error = false; + for (Arg *A : Args) { + if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) + continue; + A->claim(); + + const auto &Arch = A->getValue(); + if (!toolchains::CudaToolChain::GpuArchToComputeName(Arch)) { + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << Arch; + Error = true; + } else if (GpuArchNames.insert(Arch).second) + GpuArchList.push_back(Arch); + } + + // Default to sm_20 which is the lowest common denominator for supported + // GPUs. + // sm_20 code should work correctly, if suboptimally, on all newer GPUs. + if (GpuArchList.empty()) + GpuArchList.push_back("sm_20"); + + return Error; + } + }; + + /// Add the implementation for other specialized builders here. + + /// Specialized builders being used by this offloading action builder. + SmallVector SpecializedBuilders; + +public: + OffloadingActionBuilder(Compilation &C, DerivedArgList &Args, + const Driver::InputList &Inputs) + : C(C), Args(Args) { + // Create a specialized builder for each device toolchain. + + IsValid = true; + + // Create a specialized builder for CUDA. + SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs)); + + // + // TODO: Build other specialized builders here. + // + + // Initialize all the builders, keeping track of errors. + for (auto *SB : SpecializedBuilders) + IsValid = IsValid && !SB->initialize(); + } + + ~OffloadingActionBuilder() { + for (auto *SB : SpecializedBuilders) + delete SB; + } + + /// Generate an action that adds device dependences (if any) to a host action. + /// If no device dependence actions exist, just return the host action \a + /// HostAction. If an error is found or if no builder requires the host action + /// to be generated, return nullptr. + Action * + addDeviceDependencesToHostAction(Action *HostAction, const Arg *InputArg, + phases::ID CurPhase, phases::ID FinalPhase, + DeviceActionBuilder::PhasesTy &Phases) { + if (!IsValid) + return nullptr; + + if (SpecializedBuilders.empty()) + return HostAction; + + assert(HostAction && "Invalid host action!"); + + OffloadAction::DeviceDependences DDeps; + // Check if all the programming models agree we should not emit the host + // action. Also, keep track of the offloading kinds employed. + auto &OffloadKind = InputArgToOffloadKindMap[InputArg]; + unsigned InactiveBuilders = 0u; + unsigned IgnoringBuilders = 0u; + for (auto *SB : SpecializedBuilders) { + if (!SB->isValid()) { + ++InactiveBuilders; + continue; + } + + auto RetCode = SB->getDeviceDepences(DDeps, CurPhase, FinalPhase, Phases); + + // If the builder explicitly says the host action should be ignored, + // we need to increment the variable that tracks the builders that request + // the host object to be ignored. + if (RetCode == DeviceActionBuilder::ABRT_Ignore_Host) + ++IgnoringBuilders; + + // Unless the builder was inactive for this action, we have to record the + // offload kind because the host will have to use it. + if (RetCode != DeviceActionBuilder::ABRT_Inactive) + OffloadKind |= SB->getAssociatedOffloadKind(); + } + + // If all builders agree that the host object should be ignored, just return + // nullptr. + if (IgnoringBuilders && + SpecializedBuilders.size() == (InactiveBuilders + IgnoringBuilders)) + return nullptr; + + if (DDeps.getActions().empty()) + return HostAction; + + // We have dependences we need to bundle together. We use an offload action + // for that. OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), - /*BoundArch=*/nullptr, Action::OFK_Cuda); - return C.MakeAction(HDep); + /*BoundArch=*/nullptr, DDeps); + return C.MakeAction(HDep, DDeps); } - // Collect all cuda_gpu_arch parameters, removing duplicates. - SmallVector GpuArchList; - llvm::StringSet<> GpuArchNames; - for (Arg *A : Args) { - if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) - continue; - A->claim(); + /// Generate an action that adds a host dependence to a device action. The + /// results will be kept in this action builder. Return true if an error was + /// found. + bool addHostDependenceToDeviceActions(Action *HostAction, + const Arg *InputArg) { + if (!IsValid) + return true; - const auto& Arch = A->getValue(); - if (!toolchains::CudaToolChain::GpuArchToComputeName(Arch)) - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << Arch; - else if (GpuArchNames.insert(Arch).second) - GpuArchList.push_back(Arch); - } - - // Default to sm_20 which is the lowest common denominator for supported GPUs. - // sm_20 code should work correctly, if suboptimally, on all newer GPUs. - if (GpuArchList.empty()) - GpuArchList.push_back("sm_20"); - - // Replicate inputs for each GPU architecture. - Driver::InputList CudaDeviceInputs; - for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg)); - - // Build actions for all device inputs. - ActionList CudaDeviceActions; - C.getDriver().BuildActions(C, Args, CudaDeviceInputs, CudaDeviceActions); - assert(GpuArchList.size() == CudaDeviceActions.size() && - "Failed to create actions for all devices"); - - // Check whether any of device actions stopped before they could generate PTX. - bool PartialCompilation = - llvm::any_of(CudaDeviceActions, [](const Action *a) { - return a->getKind() != Action::AssembleJobClass; - }); + assert(HostAction && "Invalid host action!"); + + // Register the offload kinds that are used. + auto &OffloadKind = InputArgToOffloadKindMap[InputArg]; + for (auto *SB : SpecializedBuilders) { + if (!SB->isValid()) + continue; + + auto RetCode = SB->addDeviceDepences(HostAction); - const ToolChain *CudaTC = C.getSingleOffloadToolChain(); + // Host dependences for device actions are not compatible with that same + // action being ignored. + assert(RetCode != DeviceActionBuilder::ABRT_Ignore_Host && + "Host dependence not expected to be ignored.!"); - // Figure out what to do with device actions -- pass them as inputs to the - // host action or run each of them independently. - if (PartialCompilation || CompileDeviceOnly) { - // In case of partial or device-only compilation results of device actions - // are not consumed by the host action device actions have to be added to - // top-level actions list with AtTopLevel=true and run independently. + // Unless the builder was inactive for this action, we have to record the + // offload kind because the host will have to use it. + if (RetCode != DeviceActionBuilder::ABRT_Inactive) + OffloadKind |= SB->getAssociatedOffloadKind(); + } + + return false; + } - // -o is ambiguous if we have more than one top-level action. - if (Args.hasArg(options::OPT_o) && - (!CompileDeviceOnly || GpuArchList.size() > 1)) { + /// Add the offloading top level actions to the provided action list. + bool appendTopLevelActions(ActionList &AL, Action *HostAction, + const Arg *InputArg) { + auto NumActions = AL.size(); + + for (auto *SB : SpecializedBuilders) { + if (!SB->isValid()) + continue; + SB->appendTopLevelActions(AL); + } + + assert(NumActions <= AL.size() && "Expecting more actions, not less!"); + + // Propagate to the current host action (if any) the offload information + // associated with the current input. + if (HostAction) + HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg], + /*BoundArch=*/nullptr); + + // If any action is added by the builders, -o is ambiguous if we have more + // than one top-level action. + if (NumActions < AL.size() && Args.hasArg(options::OPT_o) && + AL.size() > 1) { C.getDriver().Diag( clang::diag::err_drv_output_argument_with_multiple_files); - return nullptr; + return true; } - for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { - OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *CudaTC, GpuArchList[I], - Action::OFK_Cuda); - Actions.push_back( - C.MakeAction(DDep, CudaDeviceActions[I]->getType())); + return false; + } + + /// Processes the host linker action. This currently consists of replacing it + /// with an offload action if there are device link objects and propagate to + /// the host action all the offload kinds used in the current compilation. The + /// resulting action is returned. + Action *processHostLinkAction(Action *HostAction) { + // Add all the dependences from the device linking actions. + OffloadAction::DeviceDependences DDeps; + for (auto *SB : SpecializedBuilders) { + if (!SB->isValid()) + continue; + + SB->appendLinkDependences(DDeps); } - // Kill host action in case of device-only compilation. - if (CompileDeviceOnly) - return nullptr; - return HostAction; - } - - // If we're not a partial or device-only compilation, we compile each arch to - // ptx and assemble to cubin, then feed the cubin *and* the ptx into a device - // "link" action, which uses fatbinary to combine these cubins into one - // fatbin. The fatbin is then an input to the host compilation. - ActionList DeviceActions; - for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { - Action* AssembleAction = CudaDeviceActions[I]; - assert(AssembleAction->getType() == types::TY_Object); - assert(AssembleAction->getInputs().size() == 1); - - Action* BackendAction = AssembleAction->getInputs()[0]; - assert(BackendAction->getType() == types::TY_PP_Asm); - - for (auto &A : {AssembleAction, BackendAction}) { - OffloadAction::DeviceDependences DDep; - DDep.add(*A, *CudaTC, GpuArchList[I], Action::OFK_Cuda); - DeviceActions.push_back(C.MakeAction(DDep, A->getType())); - } - } - auto FatbinAction = - C.MakeAction(DeviceActions, types::TY_CUDA_FATBIN); - - // Return a new host action that incorporates original host action and all - // device actions. - OffloadAction::HostDependence HDep( - *HostAction, *C.getSingleOffloadToolChain(), - /*BoundArch=*/nullptr, Action::OFK_Cuda); - OffloadAction::DeviceDependences DDep; - DDep.add(*FatbinAction, *CudaTC, /*BoundArch=*/nullptr, Action::OFK_Cuda); - return C.MakeAction(HDep, DDep); -} + + // Calculate all the offload kinds used in the current compilation. + unsigned ActiveOffloadKinds = 0u; + for (auto &I : InputArgToOffloadKindMap) + ActiveOffloadKinds |= I.second; + + // If we don't have device dependencies, we don't have to create an offload + // action. + if (DDeps.getActions().empty()) { + // Propagate all the active kinds to host action. Given that it is a link + // action it is assumed to depend on all actions generated so far. + HostAction->propagateHostOffloadInfo(ActiveOffloadKinds, + /*BoundArch=*/nullptr); + return HostAction; + } + + /// Create the offload action with all dependences. When an offload action + /// is created the kinds are propagated to the host action, so we don't have + /// to do that explicitely here. + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain(), + /*BoundArch*/ nullptr, ActiveOffloadKinds); + return C.MakeAction(HDep, DDeps); + } +}; +} // anonymous namespace. void Driver::BuildActions(Compilation &C, DerivedArgList &Args, const InputList &Inputs, ActionList &Actions) const { @@ -1619,8 +2009,8 @@ YcArg = YuArg = nullptr; } - // Track the host offload kinds used on this compilation. - unsigned CompilationActiveOffloadHostKinds = 0u; + // Builder to be used to build offloading actions. + OffloadingActionBuilder OffloadBuilder(C, Args, Inputs); // Construct the actions to perform. ActionList LinkerInputs; @@ -1684,17 +2074,14 @@ } } - phases::ID CudaInjectionPhase = - (phases::Compile < FinalPhase && - llvm::find(PL, phases::Compile) != PL.end()) - ? phases::Compile - : FinalPhase; - - // Track the host offload kinds used on this input. - unsigned InputActiveOffloadHostKinds = 0u; - // Build the pipeline for this file. Action *Current = C.MakeAction(*InputArg, InputType); + + // Use the current host action in any of the offloading actions, if + // required. + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; + for (SmallVectorImpl::iterator i = PL.begin(), e = PL.end(); i != e; ++i) { phases::ID Phase = *i; @@ -1703,6 +2090,12 @@ if (Phase > FinalPhase) break; + // Add any offload action the host action depends on. + Current = OffloadBuilder.addDeviceDependencesToHostAction( + Current, InputArg, Phase, FinalPhase, PL); + if (!Current) + break; + // Queue linker inputs. if (Phase == phases::Link) { assert((i + 1) == e && "linking must be final compilation step."); @@ -1711,48 +2104,37 @@ break; } - // Some types skip the assembler phase (e.g., llvm-bc), but we can't - // encode this in the steps because the intermediate type depends on - // arguments. Just special case here. - if (Phase == phases::Assemble && Current->getType() != types::TY_PP_Asm) - continue; - // Otherwise construct the appropriate action. - Current = ConstructPhaseAction(C, Args, Phase, Current); + auto *NewCurrent = ConstructPhaseAction(C, Args, Phase, Current); - if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) { - Current = buildCudaActions(C, Args, InputArg, Current, Actions); - if (!Current) - break; + // We didn't create a new action, so we will just move to the next phase. + if (NewCurrent == Current) + continue; - // We produced a CUDA action for this input, so the host has to support - // CUDA. - InputActiveOffloadHostKinds |= Action::OFK_Cuda; - CompilationActiveOffloadHostKinds |= Action::OFK_Cuda; - } + Current = NewCurrent; + + // Use the current host action in any of the offloading actions, if + // required. + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; if (Current->getType() == types::TY_Nothing) break; } - // If we ended with something, add to the output list. Also, propagate the - // offload information to the top-level host action related with the current - // input. - if (Current) { - if (InputActiveOffloadHostKinds) - Current->propagateHostOffloadInfo(InputActiveOffloadHostKinds, - /*BoundArch=*/nullptr); + // If we ended with something, add to the output list. + if (Current) Actions.push_back(Current); - } + + // Add any top level actions generated for offloading. + OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg); } - // Add a link action if necessary and propagate the offload information for - // the current compilation. + // Add a link action if necessary. if (!LinkerInputs.empty()) { - Actions.push_back( - C.MakeAction(LinkerInputs, types::TY_Image)); - Actions.back()->propagateHostOffloadInfo(CompilationActiveOffloadHostKinds, - /*BoundArch=*/nullptr); + Action *LA = C.MakeAction(LinkerInputs, types::TY_Image); + LA = OffloadBuilder.processHostLinkAction(LA); + Actions.push_back(LA); } // If we are linking, claim any options which are obviously only used for @@ -1774,6 +2156,13 @@ Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input) const { llvm::PrettyStackTraceString CrashInfo("Constructing phase actions"); + + // Some types skip the assembler phase (e.g., llvm-bc), but we can't + // encode this in the steps because the intermediate type depends on + // arguments. Just special case here. + if (Phase == phases::Assemble && Input->getType() != types::TY_PP_Asm) + return Input; + // Build the appropriate action. switch (Phase) { case phases::Link: