Index: lib/Target/R600/AMDGPU.td =================================================================== --- lib/Target/R600/AMDGPU.td +++ lib/Target/R600/AMDGPU.td @@ -32,11 +32,22 @@ "false", "Disable the if conversion pass">; -def FeatureFP64 : SubtargetFeature<"fp64", +def FeatureFP64 : SubtargetFeature<"fp64", "FP64", "true", "Enable 64bit double precision operations">; +def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", + "FP64Denormals", + "true", + "Enable double precision denormal handling", + [FeatureFP64]>; + +def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals", + "FP32Denormals", + "true", + "Enable single precision denormal handling">; + def Feature64BitPtr : SubtargetFeature<"64BitPtr", "Is64bit", "true", Index: lib/Target/R600/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/R600/AMDGPUAsmPrinter.cpp +++ lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -53,11 +53,20 @@ // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32, // and sin_f32, cos_f32 on most parts). We want to be able to use these so it's // probably best to just report no single precision denormals. -static uint32_t getFPMode(MachineFunction &) { +static uint32_t getFPMode(const MachineFunction &F) { + const AMDGPUSubtarget& ST = F.getTarget().getSubtarget(); + // TODO: Is there any real use for the flush in only / flush out only modes? + + uint32_t FP32Denormals = + ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; + + uint32_t FP64Denormals = + ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; + return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) | FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) | - FP_DENORM_MODE_SP(FP_DENORM_FLUSH_IN_FLUSH_OUT) | - FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE); + FP_DENORM_MODE_SP(FP32Denormals) | + FP_DENORM_MODE_DP(FP64Denormals); } static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, Index: lib/Target/R600/AMDGPUInstructions.td =================================================================== --- lib/Target/R600/AMDGPUInstructions.td +++ lib/Target/R600/AMDGPUInstructions.td @@ -35,6 +35,8 @@ } def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; +def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">; +def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">; def InstFlag : OperandWithDefaultOps ; def ADDRIndirect : ComplexPattern; Index: lib/Target/R600/AMDGPUSubtarget.h =================================================================== --- lib/Target/R600/AMDGPUSubtarget.h +++ lib/Target/R600/AMDGPUSubtarget.h @@ -51,6 +51,8 @@ short TexVTXClauseSize; enum Generation Gen; bool FP64; + bool FP64Denormals; + bool FP32Denormals; bool CaymanISA; bool EnableIRStructurizer; bool EnableIfCvt; @@ -76,6 +78,14 @@ bool hasHWFP64() const; bool hasCaymanISA() const; + bool hasFP32Denormals() const { + return FP32Denormals; + } + + bool hasFP64Denormals() const { + return FP64Denormals; + } + bool hasBFE() const { return (getGeneration() >= EVERGREEN); } Index: lib/Target/R600/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/R600/AMDGPUSubtarget.cpp +++ lib/Target/R600/AMDGPUSubtarget.cpp @@ -15,6 +15,7 @@ #include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" #include "SIInstrInfo.h" +#include "llvm/ADT/SmallString.h" using namespace llvm; @@ -36,17 +37,36 @@ TexVTXClauseSize = 0; Gen = AMDGPUSubtarget::R600; FP64 = false; + FP64Denormals = false; + FP32Denormals = false; CaymanISA = false; EnableIRStructurizer = true; EnableIfCvt = true; WavefrontSize = 0; CFALUBug = false; LocalMemorySize = 0; - ParseSubtargetFeatures(GPU, FS); + + // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be + // enabled, but some instructions do not respect them, so don't enable by + // default. + // + // We want to be able to turn these off, but making this a subtarget feature + // for SI has the unhelpful behavior that it unsets everything else if you + // disable it. + SmallString<128> FullFS("+fp64-denormals,"); + FullFS += FS; + + ParseSubtargetFeatures(GPU, FullFS); DevName = GPU; if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { InstrInfo.reset(new R600InstrInfo(*this)); + + // FIXME: I don't think think Evergreen has any useful support for + // denormals, but should be checked. Should we issue a warning somewhere if + // someone tries to enable these? + FP32Denormals = false; + FP64Denormals = false; } else { InstrInfo.reset(new SIInstrInfo(*this)); } Index: test/CodeGen/R600/default-fp-mode.ll =================================================================== --- test/CodeGen/R600/default-fp-mode.ll +++ test/CodeGen/R600/default-fp-mode.ll @@ -1,8 +1,27 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s -; SI-LABEL: @test_kernel -; SI: FloatMode: 192 -; SI: IeeeMode: 0 +; FUNC-LABEL: @test_kernel + +; DEFAULT: FloatMode: 192 +; DEFAULT: IeeeMode: 0 + +; FP64-DENORMAL: FloatMode: 192 +; FP64-DENORMAL: IeeeMode: 0 + +; FP32-DENORMAL: FloatMode: 48 +; FP32-DENORMAL: IeeeMode: 0 + +; BOTH-DENORMAL: FloatMode: 240 +; BOTH-DENORMAL: IeeeMode: 0 + +; NO-DENORMAL: FloatMode: 0 +; NO-DENORMAL: IeeeMode: 0 define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { store float 0.0, float addrspace(1)* %out0 store double 0.0, double addrspace(1)* %out1