Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -194,12 +194,6 @@ // Subtarget Features (options and debugging) //===------------------------------------------------------------===// -def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals", - "FP16Denormals", - "true", - "Enable half precision denormal handling" ->; - // Some instructions do not support denormals despite this flag. Using // fp32 denormals also causes instructions to run at the double // precision rate for the device. @@ -209,13 +203,30 @@ "Enable single precision denormal handling" >; -def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", - "FP64Denormals", +// Denormal handling for fp64 and fp16 is controlled by the same +// config register when fp16 supported. +// TODO: Do we need a separate f16 setting when not legal? +def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals", + "FP64FP16Denormals", "true", - "Enable double precision denormal handling", + "Enable double and half precision denormal handling", [FeatureFP64] >; +def : SubtargetFeature<"fp64-denormals", + "FP64FP16Denormals", + "true", + "Enable double and half precision denormal handling", + [FeatureFP64] +>; + +def : SubtargetFeature<"fp16-denormals", + "FP64FP16Denormals", + "true", + "Enable half precision denormal handling" +>; + + def FeatureFPExceptions : SubtargetFeature<"fp-exceptions", "FPExceptions", "true", Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -81,9 +81,8 @@ bool HalfRate64Ops; // Dynamially set bits that enable features. - bool FP16Denormals; bool FP32Denormals; - bool FP64Denormals; + bool FP64FP16Denormals; bool FPExceptions; bool FlatForGlobal; bool UnalignedScratchAccess; @@ -280,7 +279,7 @@ unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const; bool hasFP16Denormals() const { - return FP16Denormals; + return FP64FP16Denormals; } bool hasFP32Denormals() const { @@ -288,7 +287,7 @@ } bool hasFP64Denormals() const { - return FP64Denormals; + return FP64FP16Denormals; } bool hasFPExceptions() const { Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -41,9 +41,10 @@ // for SI has the unhelpful behavior that it unsets everything else if you // disable it. - SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,"); + SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. FullFS += "+flat-for-global,+unaligned-buffer-access,"; + FullFS += FS; ParseSubtargetFeatures(GPU, FullFS); @@ -52,9 +53,8 @@ // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - FP16Denormals = false; + FP64FP16Denormals = false; FP32Denormals = false; - FP64Denormals = false; } // Set defaults if needed. @@ -78,9 +78,8 @@ FastFMAF32(false), HalfRate64Ops(false), - FP16Denormals(false), FP32Denormals(false), - FP64Denormals(false), + FP64FP16Denormals(false), FPExceptions(false), FlatForGlobal(false), UnalignedScratchAccess(false), Index: test/CodeGen/AMDGPU/default-fp-mode.ll =================================================================== --- test/CodeGen/AMDGPU/default-fp-mode.ll +++ test/CodeGen/AMDGPU/default-fp-mode.ll @@ -54,6 +54,34 @@ ret void } +; GCN-LABEL: {{^}}test_f16_f64_denormals: +; GCN: FloatMode: 192 +; GCN: IeeeMode: 1 +define void @test_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #6 { + store half 0.0, half addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_no_f16_f64_denormals: +; GCN: FloatMode: 0 +; GCN: IeeeMode: 1 +define void @test_no_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #7 { + store half 0.0, half addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_f32_f16_f64_denormals: +; GCN: FloatMode: 240 +; GCN: IeeeMode: 1 +define void @test_f32_f16_f64_denormals(half addrspace(1)* %out0, float addrspace(1)* %out1, double addrspace(1)* %out2) #8 { + store half 0.0, half addrspace(1)* %out0 + store float 0.0, float addrspace(1)* %out1 + store double 0.0, double addrspace(1)* %out2 + ret void +} + ; GCN-LABEL: {{^}}kill_gs_const: ; GCN: IeeeMode: 0 define amdgpu_gs void @kill_gs_const() { @@ -87,4 +115,7 @@ attributes #2 = { nounwind "target-features"="+fp64-denormals" } attributes #3 = { nounwind "target-features"="+fp32-denormals" } attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" } -attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" } +attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" } +attributes #6 = { nounwind "target-features"="+fp64-fp16-denormals" } +attributes #7 = { nounwind "target-features"="-fp64-fp16-denormals" } +attributes #8 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }