Index: clang/include/clang/Basic/CodeGenOptions.h =================================================================== --- clang/include/clang/Basic/CodeGenOptions.h +++ clang/include/clang/Basic/CodeGenOptions.h @@ -164,10 +164,10 @@ std::string FloatABI; /// The floating-point denormal mode to use. - llvm::DenormalMode FPDenormalMode; + llvm::DenormalMode FPDenormalMode = llvm::DenormalMode::getIEEE(); - /// The floating-point subnormal mode to use, for float. - llvm::DenormalMode FP32DenormalMode; + /// The floating-point denormal mode to use, for float. + llvm::DenormalMode FP32DenormalMode = llvm::DenormalMode::getIEEE(); /// The float precision limit to use, if non-empty. std::string LimitFloatPrecision; Index: clang/include/clang/Driver/ToolChain.h =================================================================== --- clang/include/clang/Driver/ToolChain.h +++ clang/include/clang/Driver/ToolChain.h @@ -623,8 +623,7 @@ const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind, const llvm::fltSemantics *FPType = nullptr) const { - // FIXME: This should be IEEE when default handling is fixed. - return llvm::DenormalMode::getInvalid(); + return llvm::DenormalMode::getIEEE(); } }; Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -2548,8 +2548,13 @@ ReciprocalMath = false; SignedZeros = true; // -fno_fast_math restores default denormal and fpcontract handling - DenormalFPMath = DefaultDenormalFPMath; FPContract = ""; + DenormalFPMath = DefaultDenormalFPMath; + + // FIXME: The target may have picked a non-IEEE default mode here based on + // -cl-denorms-are-zero. Should the target consider -fp-model interaction? + DenormalFP32Math = DefaultDenormalFP32Math; + StringRef Val = A->getValue(); if (OFastEnabled && !Val.equals("fast")) { // Only -ffp-model=fast is compatible with OFast, ignore. @@ -2726,7 +2731,9 @@ FPExceptionBehavior = "strict"; // -fno_unsafe_math_optimizations restores default denormal handling DenormalFPMath = DefaultDenormalFPMath; - DenormalFP32Math = DefaultDenormalFP32Math; + + // The target may have opted to flush just f32 by default, so force IEEE. + DenormalFP32Math = llvm::DenormalMode::getIEEE(); break; case options::OPT_Ofast: @@ -2767,11 +2774,12 @@ if (StrictFPModel) { // If -ffp-model=strict has been specified on command line but // subsequent options conflict then emit warning diagnostic. - // TODO: How should this interact with DenormalFP32Math? if (HonorINFs && HonorNaNs && !AssociativeMath && !ReciprocalMath && SignedZeros && TrappingMath && RoundingFPMath && - (FPContract.equals("off") || FPContract.empty())) + (FPContract.equals("off") || FPContract.empty()) && + DenormalFPMath == llvm::DenormalMode::getIEEE() && + DenormalFP32Math == llvm::DenormalMode::getIEEE()) // OK: Current Arg doesn't conflict with -ffp-model=strict ; else { @@ -2825,7 +2833,8 @@ CmdArgs.push_back(Args.MakeArgString(ArgStr.str())); } - if (DenormalFP32Math.isValid()) { + // Add f32 specific denormal mode flag if it's different. + if (DenormalFP32Math != DenormalFPMath) { llvm::SmallString<64> DenormFlag; llvm::raw_svector_ostream ArgStr(DenormFlag); ArgStr << "-fdenormal-fp-math-f32=" << DenormalFP32Math; Index: clang/test/CodeGenCUDA/flush-denormals.cu =================================================================== --- clang/test/CodeGenCUDA/flush-denormals.cu +++ clang/test/CodeGenCUDA/flush-denormals.cu @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fcuda-is-device \ // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix=DEFAULT %s +// RUN: FileCheck -check-prefix=NOFTZ %s // RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=ieee \ // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ @@ -10,10 +10,9 @@ // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ // RUN: FileCheck -check-prefix=FTZ %s -// FIXME: Unspecified should default to ieee // RUN: %clang_cc1 -fcuda-is-device -x hip \ // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix=AMDFTZ %s +// RUN: FileCheck -check-prefix=AMDNOFTZ %s // RUN: %clang_cc1 -fcuda-is-device -x hip \ // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -fdenormal-fp-math-f32=ieee -emit-llvm -o - %s | \ @@ -42,10 +41,6 @@ // FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" // NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee,ieee" - -// FIXME: This should be removed -// DEFAULT-NOT: "denormal-fp-math-f32" - // AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals // AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals Index: clang/test/CodeGenCUDA/propagate-metadata.cu =================================================================== --- clang/test/CodeGenCUDA/propagate-metadata.cu +++ clang/test/CodeGenCUDA/propagate-metadata.cu @@ -15,7 +15,7 @@ // RUN: %s -o %t.bc -triple nvptx-unknown-unknown // RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \ -// RUN: -fno-trapping-math -fcuda-is-device -fdenormal-fp-math-f32=ieee -triple nvptx-unknown-unknown \ +// RUN: -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \ // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST // RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \ @@ -60,8 +60,7 @@ // CHECK-SAME: convergent // CHECK-SAME: norecurse -// FTZ-NOT: "denormal-fp-math" - +// FTZ: "denormal-fp-math"="ieee,ieee" // FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign" // NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee" @@ -76,7 +75,8 @@ // CHECK-SAME: convergent // CHECK-NOT: norecurse -// FTZ-NOT: "denormal-fp-math" +// FTZ-SAME: "denormal-fp-math"="ieee,ieee" +// NOFTZ-SAME: "denormal-fp-math"="ieee,ieee" // FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign" // NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee" Index: clang/test/CodeGenOpenCL/amdgpu-features.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-features.cl +++ clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -14,13 +14,13 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s -// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" -// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" -// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,-fp32-denormals" -// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" -// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" -// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" -// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime,-fp32-denormals" +// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime" +// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime" // GFX700: "target-features"="+ci-insts,+flat-address-space,+fp64-fp16-denormals,-fp32-denormals" // GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals" // GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals" Index: clang/test/Driver/cuda-flush-denormals-to-zero.cu =================================================================== --- clang/test/Driver/cuda-flush-denormals-to-zero.cu +++ clang/test/Driver/cuda-flush-denormals-to-zero.cu @@ -9,5 +9,9 @@ // CPUFTZ-NOT: -fdenormal-fp-math +// FTZ-NOT: -fdenormal-fp-math-f32= // FTZ: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign" -// NOFTZ: "-fdenormal-fp-math=ieee,ieee" + +// The default of ieee is omitted +// NOFTZ-NOT: "-fdenormal-fp-math" +// NOFTZ-NOT: "-fdenormal-fp-math-f32" Index: clang/test/Driver/denormal-fp-math.c =================================================================== --- clang/test/Driver/denormal-fp-math.c +++ clang/test/Driver/denormal-fp-math.c @@ -8,7 +8,8 @@ // RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,ieee -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID2 %s // RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID3 %s -// CHECK-IEEE: -fdenormal-fp-math=ieee,ieee +// TODO: ieee is the implied default, and the flag is not passed. +// CHECK-IEEE: "-fdenormal-fp-math=ieee,ieee" // CHECK-PS: "-fdenormal-fp-math=preserve-sign,preserve-sign" // CHECK-PZ: "-fdenormal-fp-math=positive-zero,positive-zero" // CHECK-NO-UNSAFE-NOT: "-fdenormal-fp-math=ieee" Index: clang/test/Driver/fp-model.c =================================================================== --- clang/test/Driver/fp-model.c +++ clang/test/Driver/fp-model.c @@ -63,6 +63,10 @@ // RUN: | FileCheck --check-prefix=WARNf %s // WARNf: warning: overriding '-ffp-model=strict' option with '-Ofast' [-Woverriding-t-option] +// RUN: %clang -### -ffp-model=strict -fdenormal-fp-math=preserve-sign,preserve-sign -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN10 %s +// WARN10: warning: overriding '-ffp-model=strict' option with '-fdenormal-fp-math=preserve-sign,preserve-sign' [-Woverriding-t-option] + // RUN: %clang -### -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NOROUND %s // CHECK-NOROUND: "-cc1"