diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -921,6 +921,8 @@
            "specified more than once.">;
 def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[NoXarchOption]>,
   Alias<offload_arch_EQ>;
+def cuda_device_triple: Joined<["--"], "cuda-device-triple=">,
+  HelpText<"Override the triple passed to CUDA device compilation phase">;
 def hip_link : Flag<["--"], "hip-link">,
   HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_offload_arch_EQ : Joined<["--"], "no-offload-arch=">, Flags<[NoXarchOption]>,
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -706,8 +706,13 @@
     const llvm::Triple &HostTriple = HostTC->getTriple();
     StringRef DeviceTripleStr;
     auto OFK = Action::OFK_Cuda;
-    DeviceTripleStr =
-        HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda";
+    if (auto *CudaOverrideDeviceTriple =
+            C.getInputArgs().getLastArg(options::OPT_cuda_device_triple)) {
+      DeviceTripleStr = CudaOverrideDeviceTriple->getValue();
+    } else {
+      DeviceTripleStr =
+          HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda";
+    }
     llvm::Triple CudaTriple(DeviceTripleStr);
     // Use the CUDA and host triples as the key into the ToolChains map,
     // because the device toolchain we create depends on both.
diff --git a/clang/test/Driver/cuda-device-triple.cu b/clang/test/Driver/cuda-device-triple.cu
new file mode 100644
--- /dev/null
+++ b/clang/test/Driver/cuda-device-triple.cu
@@ -0,0 +1,8 @@
+// REQUIRES: clang-driver
+
+// RUN: %clang -### -emit-llvm --cuda-device-only \
+// RUN:   -nocudalib -nocudainc --cuda-device-triple=spirv32-unknown-unknown -c %s 2>&1 | FileCheck %s
+
+// CHECK: clang{{.*}}" "-cc1" "-triple" "spirv32-unknown-unknown" {{.*}} "-fcuda-is-device" {{.*}}
+
+