diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -906,14 +906,6 @@
 def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
   InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
                               "'-aux-target-cpu' and '-aux-target-feature'.">;
-def cuda_device_only : Flag<["--"], "cuda-device-only">,
-  HelpText<"Compile CUDA code for device only">;
-def cuda_host_only : Flag<["--"], "cuda-host-only">,
-  HelpText<"Compile CUDA code for host only.  Has no effect on non-CUDA "
-           "compilations.">;
-def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">,
-  HelpText<"Compile CUDA code for both host and device (default).  Has no "
-           "effect on non-CUDA compilations.">;
 def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOption]>,
   HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
 def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>,
@@ -2532,6 +2524,19 @@
   PosFlag<SetTrue, [CC1Option]>, NegFlag<SetFalse>, BothFlags<[NoArgumentUnused, HelpHidden]>>;
 def static_openmp: Flag<["-"], "static-openmp">,
   HelpText<"Use the static host OpenMP runtime while linking.">;
+def offload_device_only : Flag<["--"], "offload-device-only">,
+  HelpText<"Only compile for the offloading device.">;
+def offload_host_only : Flag<["--"], "offload-host-only">,
+  HelpText<"Only compile for the offloading host.">;
+def offload_host_device : Flag<["--"], "offload-host-device">,
+  HelpText<"Only compile for the offloading host.">;
+def cuda_device_only : Flag<["--"], "cuda-device-only">, Alias<offload_device_only>,
+  HelpText<"Compile CUDA code for device only">;
+def cuda_host_only : Flag<["--"], "cuda-host-only">, Alias<offload_host_only>,
+  HelpText<"Compile CUDA code for host only. Has no effect on non-CUDA compilations.">;
+def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">, Alias<offload_host_device>,
+  HelpText<"Compile CUDA code for both host and device (default). Has no "
+           "effect on non-CUDA compilations.">;
 def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
   HelpText<"Use the new driver for OpenMP offloading.">;
 def fno_openmp_new_driver : Flag<["-"], "fno-openmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2868,14 +2868,14 @@
               : C.getSingleOffloadToolChain<Action::OFK_HIP>());
 
       Arg *PartialCompilationArg = Args.getLastArg(
-          options::OPT_cuda_host_only, options::OPT_cuda_device_only,
-          options::OPT_cuda_compile_host_device);
-      CompileHostOnly = PartialCompilationArg &&
-                        PartialCompilationArg->getOption().matches(
-                            options::OPT_cuda_host_only);
-      CompileDeviceOnly = PartialCompilationArg &&
-                          PartialCompilationArg->getOption().matches(
-                              options::OPT_cuda_device_only);
+          options::OPT_offload_host_only, options::OPT_offload_device_only,
+          options::OPT_offload_host_device);
+      CompileHostOnly =
+          PartialCompilationArg && PartialCompilationArg->getOption().matches(
+                                       options::OPT_offload_host_only);
+      CompileDeviceOnly =
+          PartialCompilationArg && PartialCompilationArg->getOption().matches(
+                                       options::OPT_offload_device_only);
       EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
       EmitAsm = Args.getLastArg(options::OPT_S);
       FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ);
@@ -4052,11 +4052,6 @@
         break;
       }
 
-      // Try to build the offloading actions and add the result as a dependency
-      // to the host.
-      if (UseNewOffloadingDriver)
-        Current = BuildOffloadingActions(C, Args, I, Current);
-
       // FIXME: Should we include any prior module file outputs as inputs of
       // later actions in the same command line?
 
@@ -4080,6 +4075,11 @@
         if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
           break;
 
+      // Try to build the offloading actions and add the result as a dependency
+      // to the host.
+      if (UseNewOffloadingDriver)
+        Current = BuildOffloadingActions(C, Args, I, Current);
+
       if (Current->getType() == types::TY_Nothing)
         break;
     }
@@ -4202,24 +4202,37 @@
   // Claim ignored clang-cl options.
   Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
 
-  // Claim --cuda-host-only and --cuda-compile-host-device, which may be passed
-  // to non-CUDA compilations and should not trigger warnings there.
-  Args.ClaimAllArgs(options::OPT_cuda_host_only);
-  Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
+  // Claim --offload-host-only and --offload-compile-host-device, which may be
+  // passed to non-CUDA compilations and should not trigger warnings there.
+  Args.ClaimAllArgs(options::OPT_offload_host_only);
+  Args.ClaimAllArgs(options::OPT_offload_host_device);
 }
 
 Action *Driver::BuildOffloadingActions(Compilation &C,
                                        llvm::opt::DerivedArgList &Args,
                                        const InputTy &Input,
                                        Action *HostAction) const {
-  if (!isa<CompileJobAction>(HostAction))
-    return HostAction;
-
-  OffloadAction::DeviceDependences DDeps;
+  const Arg *Mode = Args.getLastArg(options::OPT_offload_host_only,
+                                    options::OPT_offload_device_only,
+                                    options::OPT_offload_host_device);
+  const bool HostOnly =
+      Mode && Mode->getOption().matches(options::OPT_offload_host_only);
+  const bool DeviceOnly =
+      Mode && Mode->getOption().matches(options::OPT_offload_device_only);
 
   types::ID InputType = Input.first;
   const Arg *InputArg = Input.second;
 
+  auto PL = types::getCompilationPhases(*this, Args, InputType);
+
+  // Don't build offloading actions if explicitly disabled or we do not have a
+  // compile action to embed it in. If preprocessing only ignore embedding.
+  if ((!isa<CompileJobAction>(HostAction) && PL.back() != phases::Preprocess) ||
+      HostOnly)
+    return HostAction;
+
+  OffloadAction::DeviceDependences DDeps;
+
   const Action::OffloadKind OffloadKinds[] = {Action::OFK_OpenMP};
 
   for (Action::OffloadKind Kind : OffloadKinds) {
@@ -4239,8 +4252,6 @@
     if (DeviceActions.empty())
       return HostAction;
 
-    auto PL = types::getCompilationPhases(*this, Args, InputType);
-
     for (phases::ID Phase : PL) {
       if (Phase == phases::Link) {
         assert(Phase == PL.back() && "linking must be final compilation step.");
@@ -4251,7 +4262,8 @@
       for (Action *&A : DeviceActions) {
         A = ConstructPhaseAction(C, Args, Phase, A, Kind);
 
-        if (isa<CompileJobAction>(A) && Kind == Action::OFK_OpenMP) {
+        if (isa<CompileJobAction>(A) && isa<CompileJobAction>(HostAction) &&
+            Kind == Action::OFK_OpenMP) {
           HostAction->setCannotBeCollapsedWithNextDependentAction();
           OffloadAction::HostDependence HDep(
               *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
@@ -4271,6 +4283,9 @@
     }
   }
 
+  if (DeviceOnly)
+    return C.MakeAction<OffloadAction>(DDeps, types::TY_Nothing);
+
   OffloadAction::HostDependence HDep(
       *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
       /*BoundArch=*/nullptr, DDeps);
diff --git a/clang/test/Driver/openmp-offload-gpu-new.c b/clang/test/Driver/openmp-offload-gpu-new.c
--- a/clang/test/Driver/openmp-offload-gpu-new.c
+++ b/clang/test/Driver/openmp-offload-gpu-new.c
@@ -3,7 +3,6 @@
 ///
 
 // REQUIRES: x86-registered-target
-// REQUIRES: powerpc-registered-target
 // REQUIRES: nvptx-registered-target
 // REQUIRES: amdgpu-registered-target
 
@@ -50,3 +49,18 @@
 // RUN:   | FileCheck -check-prefix=DRIVER_EMBEDDING %s
 
 // DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp,nvptx64-nvidia-cuda,sm_70
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:     --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
+// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]"
+// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:     --offload-device-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY
+// CHECK-DEVICE-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
+// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.*]]"
+// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "{{.*}}-openmp-nvptx64-nvidia-cuda.o"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:     --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP
+// CHECK-DEVICE-ONLY-PP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.*]]"], output: "-"