Index: lib/Target/R600/AMDGPU.td
===================================================================
--- lib/Target/R600/AMDGPU.td
+++ lib/Target/R600/AMDGPU.td
@@ -32,11 +32,22 @@
         "false",
         "Disable the if conversion pass">;
 
-def FeatureFP64     : SubtargetFeature<"fp64",
+def FeatureFP64 : SubtargetFeature<"fp64",
         "FP64",
         "true",
         "Enable 64bit double precision operations">;
 
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+        "FP64Denormals",
+        "true",
+        "Enable double precision denormal handling",
+        [FeatureFP64]>;
+
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+        "FP32Denormals",
+        "true",
+        "Enable single precision denormal handling">;
+
 def Feature64BitPtr : SubtargetFeature<"64BitPtr",
         "Is64bit",
         "true",
Index: lib/Target/R600/AMDGPUAsmPrinter.cpp
===================================================================
--- lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -53,11 +53,20 @@
 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
 // and sin_f32, cos_f32 on most parts). We want to be able to use these so it's
 // probably best to just report no single precision denormals.
-static uint32_t getFPMode(MachineFunction &) {
+static uint32_t getFPMode(const MachineFunction &F) {
+  const AMDGPUSubtarget& ST = F.getTarget().getSubtarget<AMDGPUSubtarget>();
+  // TODO: Is there any real use for the flush in only / flush out only modes?
+
+  uint32_t FP32Denormals =
+    ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+  uint32_t FP64Denormals =
+    ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
   return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
          FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
-         FP_DENORM_MODE_SP(FP_DENORM_FLUSH_IN_FLUSH_OUT) |
-         FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
+         FP_DENORM_MODE_SP(FP32Denormals) |
+         FP_DENORM_MODE_DP(FP64Denormals);
 }
 
 static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
Index: lib/Target/R600/AMDGPUInstructions.td
===================================================================
--- lib/Target/R600/AMDGPUInstructions.td
+++ lib/Target/R600/AMDGPUInstructions.td
@@ -35,6 +35,8 @@
 }
 
 def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
+def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
+def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
 
 def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
 def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
Index: lib/Target/R600/AMDGPUSubtarget.h
===================================================================
--- lib/Target/R600/AMDGPUSubtarget.h
+++ lib/Target/R600/AMDGPUSubtarget.h
@@ -51,6 +51,8 @@
   short TexVTXClauseSize;
   enum Generation Gen;
   bool FP64;
+  bool FP64Denormals;
+  bool FP32Denormals;
   bool CaymanISA;
   bool EnableIRStructurizer;
   bool EnableIfCvt;
@@ -76,6 +78,14 @@
   bool hasHWFP64() const;
   bool hasCaymanISA() const;
 
+  bool hasFP32Denormals() const {
+    return FP32Denormals;
+  }
+
+  bool hasFP64Denormals() const {
+    return FP64Denormals;
+  }
+
   bool hasBFE() const {
     return (getGeneration() >= EVERGREEN);
   }
Index: lib/Target/R600/AMDGPUSubtarget.cpp
===================================================================
--- lib/Target/R600/AMDGPUSubtarget.cpp
+++ lib/Target/R600/AMDGPUSubtarget.cpp
@@ -15,6 +15,7 @@
 #include "AMDGPUSubtarget.h"
 #include "R600InstrInfo.h"
 #include "SIInstrInfo.h"
+#include "llvm/ADT/SmallString.h"
 
 using namespace llvm;
 
@@ -36,17 +37,36 @@
   TexVTXClauseSize = 0;
   Gen = AMDGPUSubtarget::R600;
   FP64 = false;
+  FP64Denormals = false;
+  FP32Denormals = false;
   CaymanISA = false;
   EnableIRStructurizer = true;
   EnableIfCvt = true;
   WavefrontSize = 0;
   CFALUBug = false;
   LocalMemorySize = 0;
-  ParseSubtargetFeatures(GPU, FS);
+
+  // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
+  // enabled, but some instructions do not respect them, so don't enable by
+  // default.
+  //
+  // We want to be able to turn these off, but making this a subtarget feature
+  // for SI has the unhelpful behavior that it unsets everything else if you
+  // disable it.
+  SmallString<128> FullFS("+fp64-denormals,");
+  FullFS += FS;
+
+  ParseSubtargetFeatures(GPU, FullFS);
   DevName = GPU;
 
   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
     InstrInfo.reset(new R600InstrInfo(*this));
+
+    // FIXME: I don't think think Evergreen has any useful support for
+    // denormals, but should be checked. Should we issue a warning somewhere if
+    // someone tries to enable these?
+    FP32Denormals = false;
+    FP64Denormals = false;
   } else {
     InstrInfo.reset(new SIInstrInfo(*this));
   }
Index: test/CodeGen/R600/default-fp-mode.ll
===================================================================
--- test/CodeGen/R600/default-fp-mode.ll
+++ test/CodeGen/R600/default-fp-mode.ll
@@ -1,8 +1,27 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
 
-; SI-LABEL: @test_kernel
-; SI: FloatMode: 192
-; SI: IeeeMode: 0
+; FUNC-LABEL: @test_kernel
+
+; DEFAULT: FloatMode: 192
+; DEFAULT: IeeeMode: 0
+
+; FP64-DENORMAL: FloatMode: 192
+; FP64-DENORMAL: IeeeMode: 0
+
+; FP32-DENORMAL: FloatMode: 48
+; FP32-DENORMAL: IeeeMode: 0
+
+; BOTH-DENORMAL: FloatMode: 240
+; BOTH-DENORMAL: IeeeMode: 0
+
+; NO-DENORMAL: FloatMode: 0
+; NO-DENORMAL: IeeeMode: 0
 define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
   store float 0.0, float addrspace(1)* %out0
   store double 0.0, double addrspace(1)* %out1