diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -897,9 +897,13 @@ HelpText<"Ignore environment variables to detect CUDA installation">; def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Group, HelpText<"Path to ptxas (used for compiling CUDA code)">; +def fgpu_flush_denormals_to_zero : Flag<["-"], "fgpu-flush-denormals-to-zero">, + HelpText<"Flush denormal floating point values to zero in CUDA/HIP device mode.">; +def fno_gpu_flush_denormals_to_zero : Flag<["-"], "fno-gpu-flush-denormals-to-zero">; def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">, - HelpText<"Flush denormal floating point values to zero in CUDA device mode.">; -def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">; + Alias; +def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">, + Alias; defm gpu_rdc : BoolFOption<"gpu-rdc", LangOpts<"GPURelocatableDeviceCode">, DefaultFalse, PosFlag, diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -643,8 +643,8 @@ auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch()); auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch); if (FPType && FPType == &llvm::APFloat::IEEEsingle() && - DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, - options::OPT_fno_cuda_flush_denormals_to_zero, + DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, + options::OPT_fno_gpu_flush_denormals_to_zero, getDefaultDenormsAreZeroForTarget(Kind))) return llvm::DenormalMode::getPreserveSign(); diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -763,9 +763,8 @@ const llvm::fltSemantics *FPType) const { if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { if (FPType && FPType == &llvm::APFloat::IEEEsingle() && - DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, - options::OPT_fno_cuda_flush_denormals_to_zero, - false)) + DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, + options::OPT_fno_gpu_flush_denormals_to_zero, false)) return llvm::DenormalMode::getPreserveSign(); } diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -400,8 +400,8 @@ // If --hip-device-lib is not set, add the default bitcode libraries. // TODO: There are way too many flags that change this. Do we need to check // them all? - bool DAZ = DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, - options::OPT_fno_cuda_flush_denormals_to_zero, + bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, + options::OPT_fno_gpu_flush_denormals_to_zero, getDefaultDenormsAreZeroForTarget(Kind)); // TODO: Check standard C++ flags? bool FiniteOnly = false; diff --git a/clang/test/CodeGenCUDA/flush-denormals.cu b/clang/test/CodeGenCUDA/flush-denormals.cu --- a/clang/test/CodeGenCUDA/flush-denormals.cu +++ b/clang/test/CodeGenCUDA/flush-denormals.cu @@ -27,7 +27,7 @@ // Checks that device function calls get emitted with the "denormal-fp-math-f32" // attribute set when we compile CUDA device code with // -fdenormal-fp-math-f32. Further, check that we reflect the presence or -// absence of -fcuda-flush-denormals-to-zero in a module flag. +// absence of -fgpu-flush-denormals-to-zero in a module flag. // AMDGCN targets always have f64/f16 denormals enabled. // @@ -36,7 +36,7 @@ // // For AMDGCN target with fast FMAF (e.g. gfx900), it has ieee denormals by // default and preserve-sign when there with the option -// -fcuda-flush-denormals-to-zero. +// -fgpu-flush-denormals-to-zero. // CHECK-LABEL: define void @foo() #0 extern "C" __device__ void foo() {} diff --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu --- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu +++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu @@ -1,22 +1,26 @@ // Checks that cuda compilation does the right thing when passed -// -fcuda-flush-denormals-to-zero. This should be translated to +// -fgpu-flush-denormals-to-zero. This should be translated to // -fdenormal-fp-math-f32=preserve-sign +// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s + +// Test alias options -f[no-]cuda-flush-denormals-to-zero // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s -// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s -// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s // Test explicit argument, with CUDA offload kind -// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s -// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s // Test explicit argument, with HIP offload kind -// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s -// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s -// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s -// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-gpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s // Test the default changing with no argument based on the subtarget in HIP mode // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s @@ -27,8 +31,8 @@ // Test multiple offload archs with different defaults. // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s -// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fcuda-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZX2 %s -// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fno-cuda-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fgpu-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZX2 %s +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fno-gpu-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s // CPUFTZ-NOT: -fdenormal-fp-math diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -24,7 +24,7 @@ // Test explicit flag, opposite of target default. // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx900 \ -// RUN: -fcuda-flush-denormals-to-zero \ +// RUN: -fgpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD @@ -33,7 +33,7 @@ // Test explicit flag, opposite of target default. // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 \ -// RUN: -fno-cuda-flush-denormals-to-zero \ +// RUN: -fno-gpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD @@ -42,7 +42,7 @@ // Test explicit flag, same as target default. // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx900 \ -// RUN: -fno-cuda-flush-denormals-to-zero \ +// RUN: -fno-gpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD @@ -51,7 +51,7 @@ // Test explicit flag, same as target default. // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 \ -// RUN: -fcuda-flush-denormals-to-zero \ +// RUN: -fgpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD @@ -60,7 +60,7 @@ // Test last flag wins, not flushing // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 \ -// RUN: -fcuda-flush-denormals-to-zero -fno-cuda-flush-denormals-to-zero \ +// RUN: -fgpu-flush-denormals-to-zero -fno-gpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD @@ -68,7 +68,7 @@ // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx900 \ -// RUN: -fcuda-flush-denormals-to-zero -fno-cuda-flush-denormals-to-zero \ +// RUN: -fgpu-flush-denormals-to-zero -fno-gpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD @@ -76,7 +76,7 @@ // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx900 \ -// RUN: -fno-cuda-flush-denormals-to-zero -fcuda-flush-denormals-to-zero \ +// RUN: -fno-gpu-flush-denormals-to-zero -fgpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD @@ -84,7 +84,7 @@ // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 \ -// RUN: -fno-cuda-flush-denormals-to-zero -fcuda-flush-denormals-to-zero \ +// RUN: -fno-gpu-flush-denormals-to-zero -fgpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD