diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6643,11 +6643,17 @@ } if (WholeProgramVTables) { - if (!IsUsingLTO) + // Propagate -fwhole-program-vtables if this is an LTO compile. + if (IsUsingLTO) + CmdArgs.push_back("-fwhole-program-vtables"); + // Check if we passed LTO options but they were suppressed because this is a + // device offloading action, or we passed device offload LTO options which + // were suppressed because this is not the device offload action. + // Otherwise, issue an error. + else if (!D.isUsingLTO(!IsDeviceOffloadAction)) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fwhole-program-vtables" << "-flto"; - CmdArgs.push_back("-fwhole-program-vtables"); } bool DefaultsSplitLTOUnit = diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu --- a/clang/test/Driver/cuda-options.cu +++ b/clang/test/Driver/cuda-options.cu @@ -183,6 +183,12 @@ // RUN: -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s +// Verify -flto=thin -fwhole-program-vtables handling. This should result in +// both options being passed to the host compilation, with neither passed to +// the device compilation. +// RUN: %clang -### -target x86_64-linux-gnu -c -flto=thin -fwhole-program-vtables %s 2>&1 \ +// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s +// THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20" // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20" @@ -206,8 +212,10 @@ // Match the job that produces PTX assembly. // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda" // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// THINLTOWPD-NOT: "-flto=thin" // DEVICE-SAME: "-fcuda-is-device" // DEVICE-SM30-SAME: "-target-cpu" "sm_30" +// THINLTOWPD-NOT: "-fwhole-program-vtables" // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]" // DEVICE-NOSAVE-SAME: "-x" "cuda" // DEVICE-SAVE-SAME: "-x" "ir" @@ -252,12 +260,14 @@ // Match host-side compilation. // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu" // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda" +// THINLTOWPD-SAME: "-flto=thin" // HOST-NOT: "-fcuda-is-device" // There is only one GPU binary after combining it with fatbinary! // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]" // There is only one GPU binary after combining it with fatbinary. // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" +// THINLTOWPD-SAME: "-fwhole-program-vtables" // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" // HOST-NOSAVE-SAME: "-x" "cuda" // HOST-SAVE-SAME: "-x" "cuda-cpp-output" diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip --- a/clang/test/Driver/hip-options.hip +++ b/clang/test/Driver/hip-options.hip @@ -60,13 +60,30 @@ // Check -foffload-lto=thin translated correctly. // RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ -// RUN: --cuda-gpu-arch=gfx906 -foffload-lto=thin %s 2>&1 \ -// RUN: | FileCheck -check-prefix=THINLTO %s +// RUN: --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \ +// RUN: | FileCheck -check-prefix=HIPTHINLTO %s + +// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ +// RUN: --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \ +// RUN: | FileCheck -check-prefix=HIPTHINLTO %s +// Ensure we don't error about -fwhole-program-vtables for the non-device offload compile. +// HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' +// HIPTHINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" +// HIPTHINLTO: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" {{.*}} "-fwhole-program-vtables" +// HIPTHINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" +// HIPTHINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all" + +// Check that -flto=thin is handled correctly, particularly with -fwhole-program-vtables. +// // RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ -// RUN: --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin %s 2>&1 \ +// RUN: --cuda-gpu-arch=gfx906 -flto=thin -fwhole-program-vtables %s 2>&1 \ // RUN: | FileCheck -check-prefix=THINLTO %s -// THINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" -// THINLTO: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" -// THINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all" +// Ensure we don't error about -fwhole-program-vtables for the device offload compile. We should +// drop -fwhole-program-vtables for the device offload compile and pass it through for the +// non-device offload compile along with -flto=thin. +// THINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' +// THINLTO-NOT: clang{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables" +// THINLTO: clang{{.*}}" "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto=thin" {{.*}} "-fwhole-program-vtables" +// THINLTO-NOT: clang{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"