diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6643,11 +6643,17 @@
   }
 
   if (WholeProgramVTables) {
-    if (!IsUsingLTO)
+    // Propagate -fwhole-program-vtables if this is an LTO compile.
+    if (IsUsingLTO)
+      CmdArgs.push_back("-fwhole-program-vtables");
+    // Check if we passed LTO options but they were suppressed because this is a
+    // device offloading action, or we passed device offload LTO options which
+    // were suppressed because this is not the device offload action.
+    // Otherwise, issue an error.
+    else if (!D.isUsingLTO(!IsDeviceOffloadAction))
       D.Diag(diag::err_drv_argument_only_allowed_with)
           << "-fwhole-program-vtables"
           << "-flto";
-    CmdArgs.push_back("-fwhole-program-vtables");
   }
 
   bool DefaultsSplitLTOUnit =
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -183,6 +183,12 @@
 // RUN:   -c %s 2>&1 \
 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
 
+// Verify -flto=thin -fwhole-program-vtables handling. This should result in
+// both options being passed to the host compilation, with neither passed to
+// the device compilation.
+// RUN: %clang -### -target x86_64-linux-gnu -c -flto=thin -fwhole-program-vtables %s 2>&1 \
+// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
+// THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
 
 // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20"
 // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20"
@@ -206,8 +212,10 @@
 // Match the job that produces PTX assembly.
 // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// THINLTOWPD-NOT: "-flto=thin"
 // DEVICE-SAME: "-fcuda-is-device"
 // DEVICE-SM30-SAME: "-target-cpu" "sm_30"
+// THINLTOWPD-NOT: "-fwhole-program-vtables"
 // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]"
 // DEVICE-NOSAVE-SAME: "-x" "cuda"
 // DEVICE-SAVE-SAME: "-x" "ir"
@@ -252,12 +260,14 @@
 // Match host-side compilation.
 // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
 // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
+// THINLTOWPD-SAME: "-flto=thin"
 // HOST-NOT: "-fcuda-is-device"
 // There is only one GPU binary after combining it with fatbinary!
 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
 // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
 // There is only one GPU binary after combining it with fatbinary.
 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
+// THINLTOWPD-SAME: "-fwhole-program-vtables"
 // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
 // HOST-NOSAVE-SAME: "-x" "cuda"
 // HOST-SAVE-SAME: "-x" "cuda-cpp-output"
diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -60,13 +60,30 @@
 // Check -foffload-lto=thin translated correctly.
 
 // RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx906 -foffload-lto=thin %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=THINLTO %s
+// RUN:   --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
+
+// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
 
+// Ensure we don't error about -fwhole-program-vtables for the non-device offload compile.
+// HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
+// HIPTHINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
+// HIPTHINLTO: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" {{.*}} "-fwhole-program-vtables"
+// HIPTHINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
+// HIPTHINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all"
+
+// Check that -flto=thin is handled correctly, particularly with -fwhole-program-vtables.
+//
 // RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin %s 2>&1 \
+// RUN:   --cuda-gpu-arch=gfx906 -flto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=THINLTO %s
 
-// THINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
-// THINLTO: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit"
-// THINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all"
+// Ensure we don't error about -fwhole-program-vtables for the device offload compile. We should
+// drop -fwhole-program-vtables for the device offload compile and pass it through for the
+// non-device offload compile along with -flto=thin.
+// THINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
+// THINLTO-NOT: clang{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
+// THINLTO: clang{{.*}}" "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto=thin" {{.*}} "-fwhole-program-vtables"
+// THINLTO-NOT: clang{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"