diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2661,6 +2661,13 @@
     getDeviceDependences(OffloadAction::DeviceDependences &DA,
                          phases::ID CurPhase, phases::ID FinalPhase,
                          PhasesTy &Phases) override {
+      // If the final phase won't be able to generate the output bundling both
+      // device and host objects, it assumes such usage are host only unless
+      // device only compilation option is specified.
+      if (!CompileDeviceOnly && FinalPhase < phases::Backend) {
+        CudaDeviceActions.clear();
+        return ABRT_Inactive;
+      }
       // amdgcn does not support linking of object files, therefore we skip
       // backend and assemble phases to output LLVM IR. Except for generating
       // non-relocatable device coee, where we generate fat binary for device
diff --git a/clang/test/Driver/hip-pre-backend-phases.hip b/clang/test/Driver/hip-pre-backend-phases.hip
new file mode 100644
--- /dev/null
+++ b/clang/test/Driver/hip-pre-backend-phases.hip
@@ -0,0 +1,11 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -M %s 2>&1 | FileCheck %s
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -E %s 2>&1 | FileCheck %s
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -fsyntax-only %s 2>&1 | FileCheck %s
+
+// CHECK-NOT: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device"
+// CHECK: clang{{.*}}" "-cc1" "-triple" "x86_64"
+// CHECK-NOT: clang-offload-bundler"