diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -906,14 +906,6 @@ def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">, InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating " "'-aux-target-cpu' and '-aux-target-feature'.">; -def cuda_device_only : Flag<["--"], "cuda-device-only">, - HelpText<"Compile CUDA code for device only">; -def cuda_host_only : Flag<["--"], "cuda-host-only">, - HelpText<"Compile CUDA code for host only. Has no effect on non-CUDA " - "compilations.">; -def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">, - HelpText<"Compile CUDA code for both host and device (default). Has no " - "effect on non-CUDA compilations.">; def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOption]>, HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>, @@ -2538,6 +2530,19 @@ HelpText<"Use the new driver for offloading compilation.">; def no_offload_new_driver : Flag<["--"], "no-offload-new-driver">, Flags<[CC1Option]>, Group, HelpText<"Don't Use the new driver for offloading compilation.">; +def offload_device_only : Flag<["--"], "offload-device-only">, + HelpText<"Only compile for the offloading device.">; +def offload_host_only : Flag<["--"], "offload-host-only">, + HelpText<"Only compile for the offloading host.">; +def offload_host_device : Flag<["--"], "offload-host-device">, + HelpText<"Only compile for the offloading host.">; +def cuda_device_only : Flag<["--"], "cuda-device-only">, Alias, + HelpText<"Compile CUDA code for device only">; +def cuda_host_only : Flag<["--"], "cuda-host-only">, Alias, + HelpText<"Compile CUDA code for host only. Has no effect on non-CUDA compilations.">; +def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">, Alias, + HelpText<"Compile CUDA code for both host and device (default). Has no " + "effect on non-CUDA compilations.">; def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group, HelpText<"Use the new driver for OpenMP offloading.">; def fno_openmp_new_driver : Flag<["-"], "fno-openmp-new-driver">, Flags<[CC1Option]>, Group, diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2868,14 +2868,14 @@ : C.getSingleOffloadToolChain()); Arg *PartialCompilationArg = Args.getLastArg( - options::OPT_cuda_host_only, options::OPT_cuda_device_only, - options::OPT_cuda_compile_host_device); - CompileHostOnly = PartialCompilationArg && - PartialCompilationArg->getOption().matches( - options::OPT_cuda_host_only); - CompileDeviceOnly = PartialCompilationArg && - PartialCompilationArg->getOption().matches( - options::OPT_cuda_device_only); + options::OPT_offload_host_only, options::OPT_offload_device_only, + options::OPT_offload_host_device); + CompileHostOnly = + PartialCompilationArg && PartialCompilationArg->getOption().matches( + options::OPT_offload_host_only); + CompileDeviceOnly = + PartialCompilationArg && PartialCompilationArg->getOption().matches( + options::OPT_offload_device_only); EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); EmitAsm = Args.getLastArg(options::OPT_S); FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ); @@ -4055,11 +4055,6 @@ break; } - // Try to build the offloading actions and add the result as a dependency - // to the host. - if (UseNewOffloadingDriver) - Current = BuildOffloadingActions(C, Args, I, Current); - // FIXME: Should we include any prior module file outputs as inputs of // later actions in the same command line? @@ -4083,6 +4078,11 @@ if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) break; + // Try to build the offloading actions and add the result as a dependency + // to the host. + if (UseNewOffloadingDriver) + Current = BuildOffloadingActions(C, Args, I, Current); + if (Current->getType() == types::TY_Nothing) break; } @@ -4204,10 +4204,10 @@ // Claim ignored clang-cl options. Args.ClaimAllArgs(options::OPT_cl_ignored_Group); - // Claim --cuda-host-only and --cuda-compile-host-device, which may be passed - // to non-CUDA compilations and should not trigger warnings there. - Args.ClaimAllArgs(options::OPT_cuda_host_only); - Args.ClaimAllArgs(options::OPT_cuda_compile_host_device); + // Claim --offload-host-only and --offload-compile-host-device, which may be + // passed to non-CUDA compilations and should not trigger warnings there. + Args.ClaimAllArgs(options::OPT_offload_host_only); + Args.ClaimAllArgs(options::OPT_offload_host_device); } /// Returns the canonical name for the offloading architecture when using HIP or @@ -4309,14 +4309,22 @@ llvm::opt::DerivedArgList &Args, const InputTy &Input, Action *HostAction) const { - if (!isa(HostAction)) + const Arg *Mode = Args.getLastArg(options::OPT_offload_host_only, + options::OPT_offload_device_only, + options::OPT_offload_host_device); + const bool HostOnly = + Mode && Mode->getOption().matches(options::OPT_offload_host_only); + const bool DeviceOnly = + Mode && Mode->getOption().matches(options::OPT_offload_device_only); + + // Don't build offloading actions if explicitly disabled or we do not have a + // compile action to embed it in. If preprocessing only ignore embedding. + if (HostOnly || !(isa(HostAction) || + getFinalPhase(Args) == phases::Preprocess)) return HostAction; OffloadAction::DeviceDependences DDeps; - types::ID InputType = Input.first; - const Arg *InputArg = Input.second; - const Action::OffloadKind OffloadKinds[] = { Action::OFK_OpenMP, Action::OFK_Cuda, Action::OFK_HIP}; @@ -4331,6 +4339,9 @@ if (ToolChains.empty()) continue; + types::ID InputType = Input.first; + const Arg *InputArg = Input.second; + // Get the product of all bound architectures and toolchains. SmallVector> TCAndArchs; for (const ToolChain *TC : ToolChains) @@ -4355,7 +4366,8 @@ for (Action *&A : DeviceActions) { A = ConstructPhaseAction(C, Args, Phase, A, Kind); - if (isa(A) && Kind == Action::OFK_OpenMP) { + if (isa(A) && isa(HostAction) && + Kind == Action::OFK_OpenMP) { // OpenMP offloading has a dependency on the host compile action to // identify which declarations need to be emitted. This shouldn't be // collapsed with any other actions so we can use it in the device. @@ -4389,6 +4401,9 @@ } } + if (DeviceOnly) + return C.MakeAction(DDeps, types::TY_Nothing); + OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), /*BoundArch=*/nullptr, DDeps); diff --git a/clang/test/Driver/cuda-openmp-driver.cu b/clang/test/Driver/cuda-openmp-driver.cu --- a/clang/test/Driver/cuda-openmp-driver.cu +++ b/clang/test/Driver/cuda-openmp-driver.cu @@ -16,3 +16,18 @@ // RUN: %clang -### -nocudalib --offload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s // RDC: error: Using '--offload-new-driver' requires '-fgpu-rdc' + +// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \ +// RUN: --offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \ +// RUN: | FileCheck -check-prefix BINDINGS-HOST %s + +// BINDINGS-HOST: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[OUTPUT:.+]]" +// BINDINGS-HOST: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out" + +// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \ +// RUN: --offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \ +// RUN: | FileCheck -check-prefix BINDINGS-DEVICE %s + +// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX:.+]]" +// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX]]"], output: "[[CUBIN:.+]]" +// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN]]", "[[PTX]]"], output: "{{.*}}.fatbin" diff --git a/clang/test/Driver/openmp-offload-gpu-new.c b/clang/test/Driver/openmp-offload-gpu-new.c --- a/clang/test/Driver/openmp-offload-gpu-new.c +++ b/clang/test/Driver/openmp-offload-gpu-new.c @@ -3,7 +3,6 @@ /// // REQUIRES: x86-registered-target -// REQUIRES: powerpc-registered-target // REQUIRES: nvptx-registered-target // REQUIRES: amdgpu-registered-target @@ -50,3 +49,18 @@ // RUN: | FileCheck -check-prefix=DRIVER_EMBEDDING %s // DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp,nvptx64-nvidia-cuda,sm_70 + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY +// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]" +// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: --offload-device-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY +// CHECK-DEVICE-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]" +// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.*]]" +// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "{{.*}}-openmp-nvptx64-nvidia-cuda.o" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP +// CHECK-DEVICE-ONLY-PP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.*]]"], output: "-"