diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -427,10 +427,11 @@ /// \param Args - The input arguments. /// \param Input - The input type and arguments /// \param HostAction - The host action used in the offloading toolchain. + /// \param DeviceAction - The leftover offload action if not merged. Action *BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, - const InputTy &Input, - Action *HostAction) const; + const InputTy &Input, Action *HostAction, + Action *&DeviceAction) const; /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3905,6 +3905,9 @@ // Build the pipeline for this file. Action *Current = C.MakeAction(*InputArg, InputType); + // The leftover offloading action not merged into the host, if any. + Action *CurrentOffload = nullptr; + // Use the current host action in any of the offloading actions, if // required. if (!Args.hasArg(options::OPT_fopenmp_new_driver)) @@ -3958,7 +3961,7 @@ // Try to build the offloading actions and add the result as a dependency // to the host. if (Args.hasArg(options::OPT_fopenmp_new_driver)) - Current = BuildOffloadingActions(C, Args, I, Current); + Current = BuildOffloadingActions(C, Args, I, Current, CurrentOffload); // FIXME: Should we include any prior module file outputs as inputs of // later actions in the same command line? @@ -3987,6 +3990,12 @@ break; } + // Bundle any leftover device output with the host action. + if (Current && CurrentOffload) { + ActionList AL{CurrentOffload, Current}; + Current = C.MakeAction(AL); + } + // If we ended with something, add to the output list. if (Current) Actions.push_back(Current); @@ -4112,8 +4121,8 @@ Action *Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, - const InputTy &Input, - Action *HostAction) const { + const InputTy &Input, Action *HostAction, + Action *&DeviceAction) const { if (!isa(HostAction)) return HostAction; @@ -4124,6 +4133,8 @@ const Action::OffloadKind OffloadKinds[] = {Action::OFK_OpenMP}; + auto PL = types::getCompilationPhases(*this, Args, InputType); + for (Action::OffloadKind Kind : OffloadKinds) { SmallVector ToolChains; ActionList DeviceActions; @@ -4141,8 +4152,6 @@ if (DeviceActions.empty()) return HostAction; - auto PL = types::getCompilationPhases(*this, Args, InputType); - for (phases::ID Phase : PL) { if (Phase == phases::Link) { assert(Phase == PL.back() && "linking must be final compilation step."); @@ -4173,6 +4182,14 @@ } } + // We shouldn't embed the device action in the host if we are targeting a + // textual output format. + if (PL.back() != phases::Assemble && PL.back() != phases::Link) { + DeviceAction = C.MakeAction( + DDeps, DDeps.getActions().back()->getType()); + return HostAction; + } + OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), /*BoundArch=*/nullptr, DDeps); diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -346,3 +346,10 @@ // RUN: | FileCheck -check-prefix=NEW_DRIVER_EMBEDDING %s // NEW_DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp.nvptx64-nvidia-cuda.sm_70 + +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvida-cuda -march=sm_70 \ +// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-new-nvptx-test.bc \ +// RUN: -fopenmp-new-driver -no-canonical-prefixes -S -emit-llvm -nogpulib %s 2>&1 \ +// RUN: | FileCheck -check-prefix=NEW_DRIVER_BUNDLING %s + +// NEW_DRIVER_BUNDLING: clang-offload-bundler" "-type=ll"