Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -21,6 +21,12 @@ "Assuming f32 fma is at least as fast as mul + add" >; +def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32", + "FastDenormalF32", + "true", + "Enabling denormals does not cause f32 instructions to run at f64 rates" +>; + def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128", "MIMG_R128", "true", @@ -431,7 +437,7 @@ FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, - FeatureIntClamp + FeatureIntClamp, FeatureFastDenormalF32 ] >; @@ -444,7 +450,7 @@ FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, - FeatureAddNoCarryInsts, FeatureScalarAtomics + FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureFastDenormalF32 ] >; Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -310,6 +310,7 @@ // Possibly statically set by tablegen, but may want to be overridden. bool FastFMAF32; + bool FastDenormalF32; bool HalfRate64Ops; // Dynamially set bits that enable features. Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -154,6 +154,7 @@ MaxPrivateElementSize(0), FastFMAF32(false), + FastDenormalF32(false), HalfRate64Ops(false), FP64FP16Denormals(false),