Index: llvm/lib/Target/AMDGPU/AMDGPU.td
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPU.td
+++ llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -494,30 +494,6 @@
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//
 
-// Denormal handling for fp64 and fp16 is controlled by the same
-// config register when fp16 supported.
-// TODO: Do we need a separate f16 setting when not legal?
-def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
-  "FP64FP16Denormals",
-  "true",
-  "Enable double and half precision denormal handling",
-  [FeatureFP64]
->;
-
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
-  "FP64FP16Denormals",
-  "true",
-  "Enable double and half precision denormal handling",
-  [FeatureFP64, FeatureFP64FP16Denormals]
->;
-
-def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
-  "FP64FP16Denormals",
-  "true",
-  "Enable half precision denormal handling",
-  [FeatureFP64FP16Denormals]
->;
-
 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
   "FPExceptions",
   "true",
Index: llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -15,6 +15,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
+#include "llvm/ADT/FloatingPointMode.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
@@ -1387,7 +1388,9 @@
   DT = DTWP ? &DTWP->getDomTree() : nullptr;
 
   HasUnsafeFPMath = hasUnsafeFPMath(F);
-  HasFP32Denormals = ST->hasFP32Denormals(F);
+
+  AMDGPU::SIModeRegisterDefaults Mode(F);
+  HasFP32Denormals = Mode.allFP32Denormals();
 
   bool MadeChange = false;
 
Index: llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -18,15 +18,6 @@
   "Enable single precision FMA (not as fast as mul+add, but fused)"
 >;
 
-// Some instructions do not support denormals despite this flag. Using
-// fp32 denormals also causes instructions to run at the double
-// precision rate for the device.
-def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
-  "FP32Denormals",
-  "true",
-  "Enable single precision denormal handling"
->;
-
 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
   "localmemorysize"#Value,
   "LocalMemorySize",
Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -402,7 +402,7 @@
   }
 #endif
   Subtarget = &MF.getSubtarget<GCNSubtarget>();
-  Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction(), *Subtarget);
+  Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
   return SelectionDAGISel::runOnMachineFunction(MF);
 }
 
Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1644,9 +1644,10 @@
   const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
 
   // float fr = mad(fqneg, fb, fa);
-  unsigned OpCode = MFI->getMode().allFP32Denormals() ?
-                    (unsigned)AMDGPUISD::FMAD_FTZ :
-                    (unsigned)ISD::FMAD;
+  unsigned OpCode = !MFI->getMode().allFP32Denormals() ?
+                    (unsigned)ISD::FMAD :
+                    (unsigned)AMDGPUISD::FMAD_FTZ;
+
   SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
 
   // int iq = (int)fq;
@@ -1729,9 +1730,10 @@
     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 
     // Compute denominator reciprocal.
-    unsigned FMAD = MFI->getMode().allFP32Denormals() ?
-                    (unsigned)AMDGPUISD::FMAD_FTZ :
-                    (unsigned)ISD::FMAD;
+    unsigned FMAD = !MFI->getMode().allFP32Denormals() ?
+                    (unsigned)ISD::FMAD :
+                    (unsigned)AMDGPUISD::FMAD_FTZ;
+
 
     SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
     SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
Index: llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -807,3 +807,8 @@
   [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
    (fmaxnum_oneuse node:$src0, node:$src1)]
 >;
+
+def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+  [(fmad node:$src0, node:$src1, node:$src2),
+   (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
+>;
Index: llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -18,7 +18,7 @@
   LocalMemoryObjects(),
   ExplicitKernArgSize(0),
   LDSSize(0),
-  Mode(MF.getFunction(), MF.getSubtarget<GCNSubtarget>()),
+  Mode(MF.getFunction()),
   IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
   NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath),
   MemoryBound(false),
Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -66,7 +66,6 @@
 protected:
   bool Has16BitInsts;
   bool HasMadMixInsts;
-  bool FP32Denormals;
   bool FPExceptions;
   bool HasSDWA;
   bool HasVOP3PInsts;
@@ -149,15 +148,6 @@
     return HasMadMixInsts;
   }
 
-  bool hasFP32Denormals(const Function &F) const {
-    // FIXME: This should not be a property of the subtarget. This should be a
-    // property with a default set by the calling convention which can be
-    // overridden by attributes. For now, use the subtarget feature as a
-    // placeholder attribute. The function arguments only purpose is to
-    // discourage use without a function context until this is removed.
-    return FP32Denormals;
-  }
-
   bool hasFPExceptions() const {
     return FPExceptions;
   }
@@ -304,7 +294,6 @@
   bool HalfRate64Ops;
 
   // Dynamially set bits that enable features.
-  bool FP64FP16Denormals;
   bool FlatForGlobal;
   bool AutoWaitcntBeforeBarrier;
   bool CodeObjectV3;
@@ -636,20 +625,6 @@
   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
                                            const Function &) const;
 
-  /// Alias for hasFP64FP16Denormals
-  bool hasFP16Denormals(const Function &F) const {
-    return FP64FP16Denormals;
-  }
-
-  /// Alias for hasFP64FP16Denormals
-  bool hasFP64Denormals(const Function &F) const {
-    return FP64FP16Denormals;
-  }
-
-  bool hasFP64FP16Denormals(const Function &F) const {
-    return FP64FP16Denormals;
-  }
-
   bool supportsMinMaxDenormModes() const {
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -59,13 +59,6 @@
   FullFS += FS;
   ParseSubtargetFeatures(GPU, FullFS);
 
-  // FIXME: I don't think think Evergreen has any useful support for
-  // denormals, but should be checked. Should we issue a warning somewhere
-  // if someone tries to enable these?
-  if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
-    FP32Denormals = false;
-  }
-
   HasMulU24 = getGeneration() >= EVERGREEN;
   HasMulI24 = hasCaymanISA();
 
@@ -76,9 +69,6 @@
 GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
                                               StringRef GPU, StringRef FS) {
   // Determine default and user-specified characteristics
-  // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
-  // enabled, but some instructions do not respect them and they run at the
-  // double precision rate, so don't enable by default.
   //
   // We want to be able to turn these off, but making this a subtarget feature
   // for SI has the unhelpful behavior that it unsets everything else if you
@@ -93,15 +83,6 @@
   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
     FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
 
-  // FIXME: I don't think think Evergreen has any useful support for
-  // denormals, but should be checked. Should we issue a warning somewhere
-  // if someone tries to enable these?
-  if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    FullFS += "+fp64-fp16-denormals,+fp32-denormals,";
-  } else {
-    FullFS += "-fp32-denormals,";
-  }
-
   FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
 
   // Disable mutually exclusive bits.
@@ -172,7 +153,6 @@
   TargetTriple(TT),
   Has16BitInsts(false),
   HasMadMixInsts(false),
-  FP32Denormals(false),
   FPExceptions(false),
   HasSDWA(false),
   HasVOP3PInsts(false),
@@ -200,7 +180,6 @@
     FastFMAF32(false),
     HalfRate64Ops(false),
 
-    FP64FP16Denormals(false),
     FlatForGlobal(false),
     AutoWaitcntBeforeBarrier(false),
     CodeObjectV3(false),
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -133,7 +133,7 @@
       TLI(ST->getTargetLowering()),
       CommonTTI(TM, F),
       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
-      HasFP32Denormals(ST->hasFP32Denormals(F)) { }
+      HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()) {}
 
   bool hasBranchDivergence() { return true; }
   bool useGPUDivergenceAnalysis() const;
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -941,8 +941,8 @@
 
   // FIXME: dx10_clamp can just take the caller setting, but there seems to be
   // no way to support merge for backend defined attributes.
-  AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
-  AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
+  AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
+  AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
   return CallerMode.isInlineCompatible(CalleeMode);
 }
 
Index: llvm/lib/Target/AMDGPU/R600Instructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/R600Instructions.td
+++ llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1006,7 +1006,7 @@
 
 class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
   inst, "MULADD_IEEE",
-  [(set f32:$dst, (fmad f32:$src0, f32:$src1, f32:$src2))]
+  [(set f32:$dst, (any_fmad f32:$src0, f32:$src1, f32:$src2))]
 >;
 
 class FMA_Common <bits<5> inst> : R600_3OP <
Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -695,19 +695,13 @@
     FP64FP16InputDenormals(true),
     FP64FP16OutputDenormals(true) {}
 
-  // FIXME: Should not depend on the subtarget
-  SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
+  SIModeRegisterDefaults(const Function &F);
 
   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
     const bool IsCompute = AMDGPU::isCompute(CC);
 
     SIModeRegisterDefaults Mode;
-    Mode.DX10Clamp = true;
     Mode.IEEE = IsCompute;
-    Mode.FP32InputDenormals = true;
-    Mode.FP32OutputDenormals = true;
-    Mode.FP64FP16InputDenormals = true;
-    Mode.FP64FP16OutputDenormals = true;
     return Mode;
   }
 
Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1333,8 +1333,7 @@
   return true;
 }
 
-SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
-                                               const GCNSubtarget &ST) {
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
   *this = getDefaultForCallingConv(F.getCallingConv());
 
   StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
@@ -1346,11 +1345,25 @@
   if (!DX10ClampAttr.empty())
     DX10Clamp = DX10ClampAttr == "true";
 
-  // FIXME: Split this when denormal-fp-math is used
-  FP32InputDenormals = ST.hasFP32Denormals(F);
-  FP32OutputDenormals = FP32InputDenormals;
-  FP64FP16InputDenormals = ST.hasFP64FP16Denormals(F);
-  FP64FP16OutputDenormals = FP64FP16InputDenormals;
+  StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
+  if (!DenormF32Attr.empty()) {
+    DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
+    FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+    FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+  }
+
+  StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
+  if (!DenormAttr.empty()) {
+    DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
+
+    if (DenormF32Attr.empty()) {
+      FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+      FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+    }
+
+    FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+    FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+  }
 }
 
 namespace {
Index: llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
===================================================================
--- llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
@@ -278,5 +278,5 @@
   ret void
 }
 
-attributes #0 = { nounwind "target-features"="+fp32-denormals" }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
Index: llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
+++ llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
@@ -150,7 +150,6 @@
   ret void
 }
 
-; FIXME: Should have denormals off by default.
 ; GCN-LABEL: {{^}}ps_ieee_mode_on:
 ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
 ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
Index: llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -338,8 +338,8 @@
 }
 
 attributes #0 = { nounwind optnone noinline }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "target-features"="+fp32-denormals" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
 
 !0 = !{float 2.500000e+00}
 !1 = !{float 5.000000e-01}
Index: llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -389,10 +389,10 @@
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "target-features"="+fp32-denormals" }
-attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" }
+attributes #2 = { nounwind "denormal-fp-math-f32"="ieee.ieee" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2, !3}
Index: llvm/test/CodeGen/AMDGPU/clamp.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/clamp.ll
+++ llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -767,8 +767,8 @@
 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
 
-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,-fp-exceptions" "no-nans-fp-math"="false" }
-attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
-attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
+attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
+attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="+fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
+attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="+fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
Index: llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -82,7 +82,15 @@
   ret void
 }
 
-; FIXME: Denormals should be off by default
+; GCN-LABEL: {{^}}test_just_f32_attr_flush
+; GCN: FloatMode: 192
+; GCN: IeeeMode: 1
+define amdgpu_kernel void @test_just_f32_attr_flush(float addrspace(1)* %out0, double addrspace(1)* %out1) #9 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
+
 ; GCN-LABEL: {{^}}kill_gs_const:
 ; GCN: FloatMode: 240
 ; GCN: IeeeMode: 0
@@ -110,10 +118,11 @@
 
 attributes #0 = { nounwind "target-cpu"="tahiti" }
 attributes #1 = { nounwind "target-cpu"="fiji" }
-attributes #2 = { nounwind "target-features"="+fp64-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals" }
-attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
-attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
-attributes #6 = { nounwind "target-features"="+fp64-fp16-denormals" }
-attributes #7 = { nounwind "target-features"="-fp64-fp16-denormals" }
-attributes #8 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
+attributes #2 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
+attributes #4 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #5 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #6 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #7 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #8 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #9 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
Index: llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
+++ llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mattr=+fast-fmaf -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mattr=-fast-fmaf -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
 
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s
+; RUN: llc -march=amdgcn -mattr=+fast-fmaf -denormal-fp-math-f32=ieee  -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s
+; RUN: llc -march=amdgcn -mattr=-fast-fmaf -denormal-fp-math-f32=ieee -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s
 
 ; FIXME: This should also fold when fma is actually fast if an FMA
 ; exists in the original program.
Index: llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
-; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,VI-FLUSH,GCN-FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=+fp-exceptions -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,VI-FLUSH,GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
 
 ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
 ; GCN-FLUSH:   v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
Index: llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -739,6 +739,6 @@
 }
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
Index: llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -625,9 +625,9 @@
 }
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
-attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }
-attributes #5 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" "target-cpu"="gfx900" }
-attributes #6 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="gfx900" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "target-cpu"="tonga" }
+attributes #5 = { nounwind "denormal-fp-math"="ieee,ieee" "target-cpu"="gfx900" }
+attributes #6 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "target-cpu"="gfx900" }
Index: llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
+++ llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
 
 ; Make sure fdiv is promoted to f32.
 
@@ -263,9 +263,9 @@
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare half @llvm.sqrt.f16(half) #1
-declare half @llvm.fabs.f16(half) #1
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+declare half @llvm.sqrt.f16(half) #2
+declare half @llvm.fabs.f16(half) #2
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
Index: llvm/test/CodeGen/AMDGPU/fdiv.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fdiv.ll
+++ llvm/test/CodeGen/AMDGPU/fdiv.ll
@@ -345,9 +345,8 @@
   ret void
 }
 
-
-attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals,+fp64-fp16-denormals,-flat-for-global" }
-attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals,-flat-for-global" }
-attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals,-flat-for-global" }
+attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" }
+attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" }
+attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" "target-features"="-flat-for-global" }
 
 !0 = !{float 2.500000e+00}
Index: llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck --check-prefixes=GCN,GCN-DENORM %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals < %s | FileCheck --check-prefixes=GCN,GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefixes=GCN,GCN-DENORM %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefixes=GCN,GCN-FLUSH %s
 
 ; GCN-LABEL: {{^}}div_1_by_x_25ulp:
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
Index: llvm/test/CodeGen/AMDGPU/fdot2.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fdot2.ll
+++ llvm/test/CodeGen/AMDGPU/fdot2.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
-; RUN: llc -march=amdgcn -mcpu=gfx1011 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx1012 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp64-fp16-denormals,-fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+fp64-fp16-denormals,+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DENORM-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
+; RUN: llc -march=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DENORM-CONTRACT
 ; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)
 
 ; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
Index: llvm/test/CodeGen/AMDGPU/fma-combine.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fma-combine.ll
+++ llvm/test/CodeGen/AMDGPU/fma-combine.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
 
 ; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs
 
Index: llvm/test/CodeGen/AMDGPU/fmaxnum.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmaxnum.ll
+++ llvm/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on:
 ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
@@ -205,7 +205,7 @@
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
 ; GCN-NOT: v_max_f32
-define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) nounwind {
+define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) #0 {
   %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0
   ret <3 x float> %val
 }
@@ -218,5 +218,5 @@
 declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1
 declare double @llvm.maxnum.f64(double, double)
 
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
Index: llvm/test/CodeGen/AMDGPU/fminnum.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fminnum.ll
+++ llvm/test/CodeGen/AMDGPU/fminnum.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_on:
 ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
@@ -225,5 +225,5 @@
 declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1
 declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1
 
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
Index: llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
+++ llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -1,8 +1,8 @@
-; XUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-DENORM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s
+; XUN: llc -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-DENORM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s
 
 ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
 ; make add an instruction if the fadd has more than one use.
Index: llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
+++ llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
@@ -1,13 +1,13 @@
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
 
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
 
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GFX10-FLUSH,GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GFX10-FLUSH,GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,GFX10-DENORM-STRICT,GFX10-DENORM,GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,GFX10-DENORM-CONTRACT,GFX10-DENORM,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GFX10-FLUSH,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GFX10-FLUSH,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,GFX10-DENORM-STRICT,GFX10-DENORM,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,GFX10-DENORM-CONTRACT,GFX10-DENORM,GFX10 %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare half @llvm.fmuladd.f16(half, half, half) #1
Index: llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -1,21 +1,21 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
 
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
 
 
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -mattr=-fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX900 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -mattr=+fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX900 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX900 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX900 %s
 
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -mattr=-fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX906 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX906 %s
 
 ; FIXME: Should probably test this, but sometimes selecting fmac is painful to match.
-; XUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -mattr=+fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX906 %s
+; XUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX906 %s
 
 
 ; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow.
Index: llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
+++ llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
@@ -1,12 +1,12 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
-
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
+
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
Index: llvm/test/CodeGen/AMDGPU/fneg-combines.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=-fp32-denormals,+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=-fp32-denormals,+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
 
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
-; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
 
 ; --------------------------------------------------------------------------------
 ; fadd tests
@@ -2562,6 +2562,6 @@
 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
 
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "unsafe-fp-math"="true" }
Index: llvm/test/CodeGen/AMDGPU/fpext-free.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fpext-free.ll
+++ llvm/test/CodeGen/AMDGPU/fpext-free.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s
-; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s
+; RUN: llc -march=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s
 
 ;  fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
 
Index: llvm/test/CodeGen/AMDGPU/frem.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/frem.ll
+++ llvm/test/CodeGen/AMDGPU/frem.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs  < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs  < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
 
 ; FUNC-LABEL: {{^}}frem_f32:
 ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
@@ -109,5 +109,5 @@
    ret void
 }
 
-attributes #0 = { nounwind "unsafe-fp-math"="false" }
-attributes #1 = { nounwind "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
Index: llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
@@ -92,10 +92,10 @@
 
 attributes #0 = { nounwind "target-cpu"="kaveri" "target-features"="-code-object-v3" }
 attributes #1 = { nounwind "target-cpu"="fiji" "target-features"="-code-object-v3" }
-attributes #2 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,+fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,-fp64-fp16-denormals" }
-attributes #4 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,+fp64-fp16-denormals" }
-attributes #5 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,-fp64-fp16-denormals" }
+attributes #2 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #4 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math"="ieee,ieee" }
+attributes #5 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math"="preserve-sign,preserve-sign" }
 attributes #6 = { nounwind "amdgpu-dx10-clamp"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
 attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
 attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
Index: llvm/test/CodeGen/AMDGPU/known-never-snan.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/known-never-snan.ll
+++ llvm/test/CodeGen/AMDGPU/known-never-snan.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; Mostly overlaps with fmed3.ll to stress specific cases of
 ; isKnownNeverSNaN.
@@ -667,7 +667,7 @@
 declare float @llvm.amdgcn.fract.f32(float) #1
 declare float @llvm.amdgcn.cubeid(float, float, float) #0
 
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }
 
 !0 = !{float 2.500000e+00}
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -139,9 +139,9 @@
 }
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
-attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
-attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" }
+attributes #1 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" }
+attributes #4 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" }
 
 !0 = !{float 2.500000e+00}
Index: llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -1,9 +1,9 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-DENORM %s
 
 declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
Index: llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SIVI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SIVI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
 
 declare half @llvm.maxnum.f16(half %a, half %b)
 declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
@@ -88,7 +88,7 @@
 ; GFX9-NEXT:    s_endpgm
     half addrspace(1)* %r,
     half addrspace(1)* %a,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #0 {
 entry:
   %a.val = load volatile half, half addrspace(1)* %a
   %b.val = load volatile half, half addrspace(1)* %b
@@ -157,7 +157,7 @@
 ; GFX9-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
     half addrspace(1)* %r,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #0 {
 entry:
   %b.val = load half, half addrspace(1)* %b
   %r.val = call half @llvm.maxnum.f16(half 3.0, half %b.val)
@@ -225,7 +225,7 @@
 ; GFX9-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
     half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    half addrspace(1)* %a) #0 {
 entry:
   %a.val = load half, half addrspace(1)* %a
   %r.val = call half @llvm.maxnum.f16(half %a.val, half 4.0)
@@ -308,7 +308,7 @@
 ; GFX9-NEXT:    s_endpgm
     <2 x half> addrspace(1)* %r,
     <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b) {
+    <2 x half> addrspace(1)* %b) #0 {
 entry:
   %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
   %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
@@ -376,7 +376,7 @@
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
     <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %b) {
+    <2 x half> addrspace(1)* %b) #0 {
 entry:
   %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
   %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
@@ -443,7 +443,7 @@
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
     <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    <2 x half> addrspace(1)* %a) #0 {
 entry:
   %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
   %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
@@ -542,7 +542,7 @@
 ; GFX9-NEXT:    s_endpgm
     <3 x half> addrspace(1)* %r,
     <3 x half> addrspace(1)* %a,
-    <3 x half> addrspace(1)* %b) {
+    <3 x half> addrspace(1)* %b) #0 {
 entry:
   %a.val = load <3 x half>, <3 x half> addrspace(1)* %a
   %b.val = load <3 x half>, <3 x half> addrspace(1)* %b
@@ -655,7 +655,7 @@
 ; GFX9-NEXT:    s_endpgm
     <4 x half> addrspace(1)* %r,
     <4 x half> addrspace(1)* %a,
-    <4 x half> addrspace(1)* %b) {
+    <4 x half> addrspace(1)* %b) #0 {
 entry:
   %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
   %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
@@ -746,10 +746,12 @@
 ; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
     <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %b) {
+    <4 x half> addrspace(1)* %b) #0 {
 entry:
   %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
   %r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
   store <4 x half> %r.val, <4 x half> addrspace(1)* %r
   ret void
 }
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
Index: llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
 
 declare half @llvm.minnum.f16(half %a, half %b)
 declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
@@ -88,7 +88,7 @@
 ; GFX9-NEXT:    s_endpgm
     half addrspace(1)* %r,
     half addrspace(1)* %a,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #0 {
 entry:
   %a.val = load volatile half, half addrspace(1)* %a
   %b.val = load volatile half, half addrspace(1)* %b
@@ -97,7 +97,7 @@
   ret void
 }
 
-define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) {
+define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) #0 {
 ; SI-LABEL: minnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
@@ -180,7 +180,7 @@
 ; GFX9-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
     half addrspace(1)* %r,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #0 {
 entry:
   %b.val = load half, half addrspace(1)* %b
   %r.val = call half @llvm.minnum.f16(half 3.0, half %b.val)
@@ -248,7 +248,7 @@
 ; GFX9-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
     half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    half addrspace(1)* %a) #0 {
 entry:
   %a.val = load half, half addrspace(1)* %a
   %r.val = call half @llvm.minnum.f16(half %a.val, half 4.0)
@@ -331,7 +331,7 @@
 ; GFX9-NEXT:    s_endpgm
     <2 x half> addrspace(1)* %r,
     <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b) {
+    <2 x half> addrspace(1)* %b) #0 {
 entry:
   %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
   %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
@@ -340,7 +340,7 @@
   ret void
 }
 
-define amdgpu_ps <2 x half> @minnum_v2f16_no_ieee(<2 x half> %a, <2 x half> %b) {
+define amdgpu_ps <2 x half> @minnum_v2f16_no_ieee(<2 x half> %a, <2 x half> %b) #0 {
 ; SI-LABEL: minnum_v2f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
@@ -429,7 +429,7 @@
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
     <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %b) {
+    <2 x half> addrspace(1)* %b) #0 {
 entry:
   %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
   %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
@@ -496,7 +496,7 @@
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
     <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    <2 x half> addrspace(1)* %a) #0 {
 entry:
   %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
   %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
@@ -595,7 +595,7 @@
 ; GFX9-NEXT:    s_endpgm
     <3 x half> addrspace(1)* %r,
     <3 x half> addrspace(1)* %a,
-    <3 x half> addrspace(1)* %b) {
+    <3 x half> addrspace(1)* %b) #0 {
 entry:
   %a.val = load <3 x half>, <3 x half> addrspace(1)* %a
   %b.val = load <3 x half>, <3 x half> addrspace(1)* %b
@@ -708,7 +708,7 @@
 ; GFX9-NEXT:    s_endpgm
     <4 x half> addrspace(1)* %r,
     <4 x half> addrspace(1)* %a,
-    <4 x half> addrspace(1)* %b) {
+    <4 x half> addrspace(1)* %b) #0 {
 entry:
   %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
   %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
@@ -799,10 +799,12 @@
 ; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
     <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %b) {
+    <4 x half> addrspace(1)* %b) #0 {
 entry:
   %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
   %r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
   store <4 x half> %r.val, <4 x half> addrspace(1)* %r
   ret void
 }
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
Index: llvm/test/CodeGen/AMDGPU/mad-combine.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/mad-combine.ll
+++ llvm/test/CodeGen/AMDGPU/mad-combine.ll
@@ -1,14 +1,14 @@
 ; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
 
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD  -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD  -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
 
 ; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs
 
 ; Make sure we don't form mad with denormals
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare float @llvm.fabs.f32(float) #0
Index: llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
+++ llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
@@ -143,5 +143,5 @@
 declare float @llvm.fmuladd.f32(float, float, float) #1
 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
 
-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }
Index: llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -310,5 +310,5 @@
 declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1
 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
 
-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }
Index: llvm/test/CodeGen/AMDGPU/mad-mix.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -557,6 +557,6 @@
 declare float @llvm.fmuladd.f32(float, float, float) #2
 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
 
-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
-attributes #1 = { nounwind "target-features"="+fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
 attributes #2 = { nounwind readnone speculatable }
Index: llvm/test/CodeGen/AMDGPU/madak.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/madak.ll
+++ llvm/test/CodeGen/AMDGPU/madak.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs  -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs  -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.fabs.f32(float) nounwind readnone
@@ -19,7 +19,7 @@
 ; MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; GFX10-MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; FMA:   v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
-define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -53,7 +53,7 @@
 ; MAD-DAG:      v_mac_f32_e32 [[VK]], [[VA]], [[VC]]
 ; FMA-DAG:      v_fmac_f32_e32 [[VK]], [[VA]], [[VC]]
 ; GCN:          s_endpgm
-define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -82,7 +82,7 @@
 ; MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 ; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 ; FMA: v_fmaak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
-define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
+define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -110,7 +110,7 @@
 ; MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 ; GFX10-MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 ; FMA:   v_fma_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
-define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -134,7 +134,7 @@
 ; GFX6_8_9:     v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
 ; GFX10-MAD:    v_mad_f32 v{{[0-9]+}}, [[VA]], [[SB]], 0x41200000
 ; FMA:          v_fma_f32 v{{[0-9]+}}, [[VA]], [[SB]], 0x41200000
-define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
+define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -155,7 +155,7 @@
 ; GFX6_8_9:      v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
 ; GFX10-MAD:     v_madak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
 ; FMA:           v_fmaak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
-define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -173,7 +173,7 @@
 ; GFX8_9:  v_mac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; GFX10-MAD: v_mac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
 ; FMA:       v_fmac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) #0 {
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
   store float %madak, float addrspace(1)* %out, align 4
@@ -189,7 +189,7 @@
 ; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
 ; FMA:       v_fma_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
 ; GCN:       s_endpgm
-define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -215,7 +215,7 @@
 ; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
 ; FMA:       v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
 ; GCN:       s_endpgm
-define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -265,4 +265,4 @@
   ret void
 }
 
-attributes #0 = { nounwind}
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
Index: llvm/test/CodeGen/AMDGPU/madmk.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/madmk.ll
+++ llvm/test/CodeGen/AMDGPU/madmk.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
  ; FIXME: None of these trigger madmk emission anymore. It is still
  ; possible, but requires the correct registers to be used which is
@@ -12,7 +12,7 @@
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
-define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -35,7 +35,7 @@
 ; GCN-DAG: v_mac_f32_e32 [[VB]], [[SK]], [[VA]]
 ; GCN-DAG: v_mac_f32_e32 [[VC]], [[SK]], [[VA]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -64,7 +64,7 @@
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
-define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -83,7 +83,7 @@
 ; GCN-NOT: v_madmk_f32
 ; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
-define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x i32], float %a, [8 x i32], float %b) nounwind {
+define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x i32], float %a, [8 x i32], float %b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
@@ -99,7 +99,7 @@
 ; GCN: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG]]
 ; GCN: v_mac_f32_e32 [[VREG2]], 0x41200000, [[VREG1]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
+define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -115,7 +115,7 @@
 ; GCN-NOT: v_madmk_f32
 ; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
-define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
+define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -132,7 +132,7 @@
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
 ; GCN: v_mad_f32 {{v[0-9]+}}, |[[VA]]|, [[SK]], [[VB]]
-define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -153,7 +153,7 @@
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{[sv][0-9]+}}, |{{v[0-9]+}}|
-define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -174,7 +174,7 @@
 ; GCN: buffer_load_dword [[A:v[0-9]+]]
 ; GCN: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
 ; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[SK]], 2.0
-define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -191,7 +191,7 @@
 ; SI: s_or_b64
 ; SI: s_xor_b64
 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
-define amdgpu_kernel void @kill_madmk_verifier_error() nounwind {
+define amdgpu_kernel void @kill_madmk_verifier_error() #0 {
 bb:
   br label %bb2
 
@@ -214,4 +214,5 @@
 
 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
 
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
Index: llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -254,7 +254,7 @@
 declare void @foo(i32) #0
 declare float @llvm.fmuladd.f32(float, float, float) #1
 
-attributes #0 = { nounwind willreturn }
+attributes #0 = { nounwind willreturn "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }
 
 !0 = !{float 2.500000e+00}
Index: llvm/test/CodeGen/AMDGPU/omod.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/omod.ll
+++ llvm/test/CodeGen/AMDGPU/omod.ll
@@ -275,10 +275,10 @@
 declare half @llvm.maxnum.f16(half, half) #1
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
-attributes #0 = { nounwind "target-features"="-fp32-denormals" "no-signed-zeros-fp-math"="true" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "target-features"="+fp32-denormals" "no-signed-zeros-fp-math"="true" }
-attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" "no-signed-zeros-fp-math"="true" }
+attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "no-signed-zeros-fp-math"="true" }
+attributes #3 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
 attributes #4 = { nounwind "no-signed-zeros-fp-math"="false" }
 
 !llvm.dbg.cu = !{!0}
Index: llvm/test/CodeGen/AMDGPU/operand-folding.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/operand-folding.ll
+++ llvm/test/CodeGen/AMDGPU/operand-folding.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
 
 ; CHECK-LABEL: {{^}}fold_sgpr:
 ; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
-define amdgpu_kernel void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
+define amdgpu_kernel void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) #1 {
 entry:
   %tmp0 = icmp ne i32 %fold, 0
   br i1 %tmp0, label %if, label %endif
@@ -20,7 +20,7 @@
 
 ; CHECK-LABEL: {{^}}fold_imm:
 ; CHECK: v_or_b32_e32 v{{[0-9]+}}, 5
-define amdgpu_kernel void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
+define amdgpu_kernel void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) #1 {
 entry:
   %fold = add i32 3, 2
   %tmp0 = icmp ne i32 %cmp, 0
@@ -46,7 +46,7 @@
 ; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[HI]]
 ; CHECK: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}},
 
-define amdgpu_kernel void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) {
+define amdgpu_kernel void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) #1 {
 entry:
   %tmp0 = add i64 %val, 1
   store i64 %tmp0, i64 addrspace(1)* %out
@@ -61,7 +61,7 @@
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
 
-define amdgpu_kernel void @vector_inline(<4 x i32> addrspace(1)* %out) {
+define amdgpu_kernel void @vector_inline(<4 x i32> addrspace(1)* %out) #1 {
 entry:
   %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp0, 1
@@ -80,7 +80,7 @@
 ; CHECK-LABEL: {{^}}imm_one_use:
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0x64, v{{[0-9]+}}
 
-define amdgpu_kernel void @imm_one_use(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @imm_one_use(i32 addrspace(1)* %out) #1 {
 entry:
   %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = xor i32 %tmp0, 100
@@ -94,7 +94,7 @@
 ; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
 ; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
 
-define amdgpu_kernel void @vector_imm(<4 x i32> addrspace(1)* %out) {
+define amdgpu_kernel void @vector_imm(<4 x i32> addrspace(1)* %out) #1 {
 entry:
   %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp0, 1
@@ -114,7 +114,7 @@
 ; CHECK: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
 ; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]]
 ; CHECK: buffer_store_dword v[[LO]]
-define amdgpu_kernel void @no_fold_tied_subregister() {
+define amdgpu_kernel void @no_fold_tied_subregister() #1 {
   %tmp1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
   %tmp2 = extractelement <2 x float> %tmp1, i32 0
   %tmp3 = extractelement <2 x float> %tmp1, i32 1
@@ -128,7 +128,7 @@
 ; CHECK-LABEL: {{^}}no_extra_fold_on_same_opnd
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @no_extra_fold_on_same_opnd() {
+define void @no_extra_fold_on_same_opnd() #1 {
 entry:
   %s0 = load i32, i32 addrspace(5)* undef, align 4
   %s0.i64= zext i32 %s0 to i64
@@ -151,3 +151,4 @@
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
Index: llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
+++ llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
@@ -150,8 +150,8 @@
 declare float @llvm.fabs.f32(float) #1
 declare float @llvm.sqrt.f32(float) #1
 
-attributes #0 = { nounwind "unsafe-fp-math"="false" }
+attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
+attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 
 !0 = !{float 2.500000e+00}
Index: llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
+++ llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}rcp_uint:
 ; GCN: v_rcp_iflag_f32_e32
-define amdgpu_kernel void @rcp_uint(i32 addrspace(1)* %in, float addrspace(1)* %out) {
+define amdgpu_kernel void @rcp_uint(i32 addrspace(1)* %in, float addrspace(1)* %out) #0 {
   %load = load i32, i32 addrspace(1)* %in, align 4
   %cvt = uitofp i32 %load to float
   %div = fdiv float 1.000000e+00, %cvt, !fpmath !0
@@ -12,7 +12,7 @@
 
 ; GCN-LABEL: {{^}}rcp_sint:
 ; GCN: v_rcp_iflag_f32_e32
-define amdgpu_kernel void @rcp_sint(i32 addrspace(1)* %in, float addrspace(1)* %out) {
+define amdgpu_kernel void @rcp_sint(i32 addrspace(1)* %in, float addrspace(1)* %out) #0 {
   %load = load i32, i32 addrspace(1)* %in, align 4
   %cvt = sitofp i32 %load to float
   %div = fdiv float 1.000000e+00, %cvt, !fpmath !0
@@ -20,4 +20,27 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}rcp_uint_denorm:
+; GCN-NOT: v_rcp_iflag_f32
+define amdgpu_kernel void @rcp_uint_denorm(i32 addrspace(1)* %in, float addrspace(1)* %out) #1 {
+  %load = load i32, i32 addrspace(1)* %in, align 4
+  %cvt = uitofp i32 %load to float
+  %div = fdiv float 1.000000e+00, %cvt
+  store float %div, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}rcp_sint_denorm:
+; GCN-NOT: v_rcp_iflag_f32
+define amdgpu_kernel void @rcp_sint_denorm(i32 addrspace(1)* %in, float addrspace(1)* %out) #1 {
+  %load = load i32, i32 addrspace(1)* %in, align 4
+  %cvt = sitofp i32 %load to float
+  %div = fdiv float 1.000000e+00, %cvt
+  store float %div, float addrspace(1)* %out, align 4
+  ret void
+}
+
 !0 = !{float 2.500000e+00}
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { "denormal-fp-math-f32"="ieee,ieee" }
Index: llvm/test/CodeGen/AMDGPU/rsq.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/rsq.ll
+++ llvm/test/CodeGen/AMDGPU/rsq.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.sqrt.f32(float) nounwind readnone
@@ -8,7 +8,7 @@
 ; SI-LABEL: {{^}}rsq_f32:
 ; SI: v_rsq_f32_e32
 ; SI: s_endpgm
-define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
   %val = load float, float addrspace(1)* %in, align 4
   %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
   %div = fdiv float 1.0, %sqrt, !fpmath !0
@@ -20,7 +20,7 @@
 ; SI-UNSAFE: v_rsq_f64_e32
 ; SI-SAFE: v_sqrt_f64_e32
 ; SI: s_endpgm
-define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
   %val = load double, double addrspace(1)* %in, align 4
   %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
   %div = fdiv double 1.0, %sqrt
@@ -31,7 +31,7 @@
 ; SI-LABEL: {{^}}rsq_f32_sgpr:
 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 ; SI: s_endpgm
-define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
+define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) #0 {
   %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
   %div = fdiv float 1.0, %sqrt, !fpmath !0
   store float %div, float addrspace(1)* %out, align 4
@@ -57,7 +57,7 @@
 ; SI-SAFE-NOT: v_rsq_f32
 
 ; SI: s_endpgm
-define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -83,7 +83,7 @@
 ; SI-UNSAFE: v_sqrt_f32_e32 [[SQRT:v[0-9]+]], v{{[0-9]+}}
 ; SI-UNSAFE: v_rcp_f32_e64 [[RSQ:v[0-9]+]], -[[SQRT]]
 ; SI-UNSAFE: buffer_store_dword [[RSQ]]
-define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
   %val = load float, float addrspace(1)* %in, align 4
   %sqrt = call float @llvm.sqrt.f32(float %val)
   %div = fdiv float -1.0, %sqrt, !fpmath !0
@@ -98,7 +98,7 @@
 ; SI-UNSAFE: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}
 ; SI-UNSAFE: v_rcp_f64_e64 [[RCP:v\[[0-9]+:[0-9]+\]]], -[[SQRT]]
 ; SI-UNSAFE: buffer_store_dwordx2 [[RCP]]
-define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
   %val = load double, double addrspace(1)* %in, align 4
   %sqrt = call double @llvm.sqrt.f64(double %val)
   %div = fdiv double -1.0, %sqrt
@@ -114,7 +114,7 @@
 ; SI-UNSAFE: v_sqrt_f32_e64 [[SQRT:v[0-9]+]], -v{{[0-9]+}}
 ; SI-UNSAFE: v_rcp_f32_e64 [[RSQ:v[0-9]+]], -[[SQRT]]
 ; SI-UNSAFE: buffer_store_dword [[RSQ]]
-define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
   %val = load float, float addrspace(1)* %in, align 4
   %val.fneg = fsub float -0.0, %val
   %sqrt = call float @llvm.sqrt.f32(float %val.fneg)
@@ -130,7 +130,7 @@
 ; SI-UNSAFE: v_sqrt_f64_e64 [[SQRT:v\[[0-9]+:[0-9]+\]]], -v{{\[[0-9]+:[0-9]+\]}}
 ; SI-UNSAFE: v_rcp_f64_e64 [[RCP:v\[[0-9]+:[0-9]+\]]], -[[SQRT]]
 ; SI-UNSAFE: buffer_store_dwordx2 [[RCP]]
-define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
   %val = load double, double addrspace(1)* %in, align 4
   %val.fneg = fsub double -0.0, %val
   %sqrt = call double @llvm.sqrt.f64(double %val.fneg)
@@ -140,3 +140,5 @@
 }
 
 !0 = !{float 2.500000e+00}
+
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
Index: llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -1,7 +1,7 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=NOSDWA,GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GFX89,SDWA,GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_10,SDWA,GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX9_10,SDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=NOSDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GFX89,SDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_10,SDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX9_10,SDWA,GCN %s
 
 ; GCN-LABEL: {{^}}add_shr_i32:
 ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}}
@@ -12,7 +12,7 @@
 ; GFX9: v_add_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX10: v_add_nc_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 
-define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %a = load i32, i32 addrspace(1)* %in, align 4
   %shr = lshr i32 %a, 16
   %add = add i32 %a, %shr
@@ -28,7 +28,7 @@
 ; VI: v_subrev_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9: v_sub_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX10: v_sub_nc_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %a = load i32, i32 addrspace(1)* %in, align 4
   %shr = lshr i32 %a, 16
   %sub = sub i32 %shr, %a
@@ -44,7 +44,7 @@
 
 ; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 
-define amdgpu_kernel void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in1, i32 addrspace(1)* %in2) {
+define amdgpu_kernel void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in1, i32 addrspace(1)* %in2) #0 {
   %a = load i32, i32 addrspace(1)* %in1, align 4
   %b = load i32, i32 addrspace(1)* %in2, align 4
   %shra = lshr i32 %a, 16
@@ -61,7 +61,7 @@
 ; GFX10: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; SDWA-NOT: v_mul_u32_u24_sdwa
 
-define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) #0 {
 entry:
   %a = load i16, i16 addrspace(1)* %ina, align 4
   %b = load i16, i16 addrspace(1)* %inb, align 4
@@ -84,7 +84,7 @@
 
 ; GFX9_10: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
-define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 {
 entry:
   %a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
   %b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@@ -111,7 +111,7 @@
 ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
-define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) #0 {
 entry:
   %a = load <4 x i16>, <4 x i16> addrspace(1)* %ina, align 4
   %b = load <4 x i16>, <4 x i16> addrspace(1)* %inb, align 4
@@ -146,7 +146,7 @@
 ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
-define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) #0 {
 entry:
   %a = load <8 x i16>, <8 x i16> addrspace(1)* %ina, align 4
   %b = load <8 x i16>, <8 x i16> addrspace(1)* %inb, align 4
@@ -161,7 +161,7 @@
 ; SDWA: v_mul_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; SDWA-NOT: v_mul_f16_sdwa
 
-define amdgpu_kernel void @mul_half(half addrspace(1)* %out, half addrspace(1)* %ina, half addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_half(half addrspace(1)* %out, half addrspace(1)* %ina, half addrspace(1)* %inb) #0 {
 entry:
   %a = load half, half addrspace(1)* %ina, align 4
   %b = load half, half addrspace(1)* %inb, align 4
@@ -184,7 +184,7 @@
 
 ; GFX9_10: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
-define amdgpu_kernel void @mul_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) #0 {
 entry:
   %a = load <2 x half>, <2 x half> addrspace(1)* %ina, align 4
   %b = load <2 x half>, <2 x half> addrspace(1)* %inb, align 4
@@ -209,7 +209,7 @@
 ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
-define amdgpu_kernel void @mul_v4half(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %ina, <4 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4half(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %ina, <4 x half> addrspace(1)* %inb) #0 {
 entry:
   %a = load <4 x half>, <4 x half> addrspace(1)* %ina, align 4
   %b = load <4 x half>, <4 x half> addrspace(1)* %inb, align 4
@@ -240,7 +240,7 @@
 ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
-define amdgpu_kernel void @mul_v8half(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %ina, <8 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8half(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %ina, <8 x half> addrspace(1)* %inb) #0 {
 entry:
   %a = load <8 x half>, <8 x half> addrspace(1)* %ina, align 4
   %b = load <8 x half>, <8 x half> addrspace(1)* %inb, align 4
@@ -256,7 +256,7 @@
 ; GFX10: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; SDWA-NOT: v_mul_u32_u24_sdwa
 
-define amdgpu_kernel void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) #0 {
 entry:
   %a = load i8, i8 addrspace(1)* %ina, align 4
   %b = load i8, i8 addrspace(1)* %inb, align 4
@@ -285,7 +285,7 @@
 
 ; GFX10: v_lshlrev_b16_e64 v{{[0-9]+}}, 8, v
 ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-define amdgpu_kernel void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) #0 {
 entry:
   %a = load <2 x i8>, <2 x i8> addrspace(1)* %ina, align 4
   %b = load <2 x i8>, <2 x i8> addrspace(1)* %inb, align 4
@@ -315,7 +315,7 @@
 ; GFX10-DAG: v_mul_lo_u16_e64
 ; GFX10-DAG: v_mul_lo_u16_e64
 
-define amdgpu_kernel void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) #0 {
 entry:
   %a = load <4 x i8>, <4 x i8> addrspace(1)* %ina, align 4
   %b = load <4 x i8>, <4 x i8> addrspace(1)* %inb, align 4
@@ -355,7 +355,7 @@
 ; GFX10-DAG: v_mul_lo_u16_e64
 ; GFX10-DAG: v_mul_lo_u16_e64
 
-define amdgpu_kernel void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) #0 {
 entry:
   %a = load <8 x i8>, <8 x i8> addrspace(1)* %ina, align 4
   %b = load <8 x i8>, <8 x i8> addrspace(1)* %inb, align 4
@@ -376,7 +376,7 @@
 ; FIXME: Should be able to avoid or
 define amdgpu_kernel void @sitofp_v2i16_to_v2f16(
     <2 x half> addrspace(1)* %r,
-    <2 x i16> addrspace(1)* %a) {
+    <2 x i16> addrspace(1)* %a) #0 {
 entry:
   %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
   %r.val = sitofp <2 x i16> %a.val to <2 x half>
@@ -399,7 +399,7 @@
 ; GFX9_10: v_pk_mul_f16 v[[DST_MUL:[0-9]+]], v{{[0-9]+}}, v[[SRC:[0-9]+]]
 ; GFX9_10: v_pk_add_f16 v{{[0-9]+}}, v[[DST_MUL]], v[[SRC]]
 
-define amdgpu_kernel void @mac_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mac_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) #0 {
 entry:
   %a = load <2 x half>, <2 x half> addrspace(1)* %ina, align 4
   %b = load <2 x half>, <2 x half> addrspace(1)* %inb, align 4
@@ -421,7 +421,7 @@
 
 ; GFX10: v_pk_mul_lo_u16 v{{[0-9]+}}, 0x141007b, v{{[0-9]+}}
 
-define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
 entry:
   %a = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
   %mul = mul <2 x i16> %a, <i16 123, i16 321>
@@ -443,7 +443,7 @@
 ; GFX9_10: v_pk_mul_lo_u16 v[[DST1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9_10: v_pk_mul_lo_u16 v{{[0-9]+}}, v[[DST1]], v{{[0-9]+}}
 
-define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 {
 entry:
   %a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
   %b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@@ -460,7 +460,7 @@
 
 ; GFX9_10: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
-define amdgpu_kernel void @add_bb_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @add_bb_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 {
 entry:
   %a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
   %b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@@ -503,7 +503,7 @@
 ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 
-define amdgpu_kernel void @pulled_out_test(<8 x i8> addrspace(1)* %sourceA, <8 x i8> addrspace(1)* %destValues) {
+define amdgpu_kernel void @pulled_out_test(<8 x i8> addrspace(1)* %sourceA, <8 x i8> addrspace(1)* %destValues) #0 {
 entry:
   %idxprom = ashr exact i64 15, 32
   %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %sourceA, i64 %idxprom
@@ -564,3 +564,5 @@
   store volatile <2 x i32> %tmp12, <2 x i32> addrspace(1)* undef
   br label %bb1
 }
+
+attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
Index: llvm/test/CodeGen/AMDGPU/udivrem24.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/udivrem24.ll
+++ llvm/test/CodeGen/AMDGPU/udivrem24.ll
@@ -21,6 +21,63 @@
   ret void
 }
 
+; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_in_out:
+; SI: v_cvt_f32_ubyte
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define amdgpu_kernel void @udiv24_i8_denorm_flush_in_out(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+  %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
+  %result = udiv i8 %num, %den
+  store i8 %result, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_in:
+; SI: v_cvt_f32_ubyte
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define amdgpu_kernel void @udiv24_i8_denorm_flush_in(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
+  %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
+  %result = udiv i8 %num, %den
+  store i8 %result, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_out:
+; SI: v_cvt_f32_ubyte
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define amdgpu_kernel void @udiv24_i8_denorm_flush_out(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #2 {
+  %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
+  %result = udiv i8 %num, %den
+  store i8 %result, i8 addrspace(1)* %out
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}udiv24_i16:
 ; SI: v_cvt_f32_u32
 ; SI: v_cvt_f32_u32
@@ -325,3 +382,7 @@
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
+attributes #2 = { "denormal-fp-math-f32"="preserve-sign,ieee" }
Index: llvm/test/CodeGen/AMDGPU/v_mac.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}mac_vvv:
 ; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
Index: llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -1,5 +1,5 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s
 
 ; GCN-LABEL: {{^}}mac_f16:
 ; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]]
@@ -677,6 +677,6 @@
 
 declare void @llvm.amdgcn.s.barrier() #2
 
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
+attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" }
 attributes #2 = { nounwind convergent }
Index: llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
+++ llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
 
 define amdgpu_kernel void @madak_f16(
 ; SI-LABEL: madak_f16:
@@ -52,7 +52,7 @@
 ; VI-NEXT:    s_endpgm
     half addrspace(1)* %r,
     half addrspace(1)* %a,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #0 {
 entry:
   %a.val = load half, half addrspace(1)* %a
   %b.val = load half, half addrspace(1)* %b
@@ -136,7 +136,7 @@
     half addrspace(1)* %r1,
     half addrspace(1)* %a,
     half addrspace(1)* %b,
-    half addrspace(1)* %c) {
+    half addrspace(1)* %c) #0 {
 entry:
   %a.val = load volatile half, half addrspace(1)* %a
   %b.val = load volatile half, half addrspace(1)* %b
@@ -151,3 +151,5 @@
   store half %r1.val, half addrspace(1)* %r1
   ret void
 }
+
+attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
Index: llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
===================================================================
--- llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
+++ llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
@@ -15,89 +15,74 @@
 
 ; CHECK-LABEL: @target_cpu_target_features_call_no_target_cpu(
 ; CHECK-NEXT: ret i32 0
-define i32 @target_cpu_target_features_call_no_target_cpu() #2 {
+define i32 @target_cpu_target_features_call_no_target_cpu() {
   %call = call i32 @func_no_target_cpu()
   ret i32 %call
 }
 
-; CHECK-LABEL: @fp32_denormals(
-define i32 @fp32_denormals() #3 {
-  ret i32 0
-}
-
-; CHECK-LABEL: @no_fp32_denormals_call_f32_denormals(
-; CHECK-NEXT: call i32 @fp32_denormals()
-define i32 @no_fp32_denormals_call_f32_denormals() #4 {
-  %call = call i32 @fp32_denormals()
-  ret i32 %call
-}
-
 ; Make sure gfx9 can call unspecified functions because of movrel
 ; feature change.
 ; CHECK-LABEL: @gfx9_target_features_call_no_target_cpu(
 ; CHECK-NEXT: ret i32 0
-define i32 @gfx9_target_features_call_no_target_cpu() #5 {
+define i32 @gfx9_target_features_call_no_target_cpu() #2 {
   %call = call i32 @func_no_target_cpu()
   ret i32 %call
 }
 
-define i32 @func_no_halfrate64ops() #6 {
+define i32 @func_no_halfrate64ops() #3 {
   ret i32 0
 }
 
-define i32 @func_with_halfrate64ops() #7 {
+define i32 @func_with_halfrate64ops() #4 {
   ret i32 0
 }
 
 ; CHECK-LABEL: @call_func_without_halfrate64ops(
 ; CHECK-NEXT: ret i32 0
-define i32 @call_func_without_halfrate64ops() #7 {
+define i32 @call_func_without_halfrate64ops() #4 {
   %call = call i32 @func_no_halfrate64ops()
   ret i32 %call
 }
 
 ; CHECK-LABEL: @call_func_with_halfrate64ops(
 ; CHECK-NEXT: ret i32 0
-define i32 @call_func_with_halfrate64ops() #6 {
+define i32 @call_func_with_halfrate64ops() #3 {
   %call = call i32 @func_with_halfrate64ops()
   ret i32 %call
 }
 
-define i32 @func_no_loadstoreopt() #8 {
+define i32 @func_no_loadstoreopt() #5 {
   ret i32 0
 }
 
-define i32 @func_with_loadstoreopt() #9 {
+define i32 @func_with_loadstoreopt() #6 {
   ret i32 0
 }
 
 ; CHECK-LABEL: @call_func_without_loadstoreopt(
 ; CHECK-NEXT: ret i32 0
-define i32 @call_func_without_loadstoreopt() #9 {
+define i32 @call_func_without_loadstoreopt() #6 {
   %call = call i32 @func_no_loadstoreopt()
   ret i32 %call
 }
 
-define i32 @enable_codeobjectv3() #10 {
+define i32 @enable_codeobjectv3() #7 {
   ret i32 999
 }
 
 ; CHECK-LABEL: @disable_codeobjectv3_call_codeobjectv3(
 ; CHECK-NEXT: ret i32 999
-define i32 @disable_codeobjectv3_call_codeobjectv3() #11 {
+define i32 @disable_codeobjectv3_call_codeobjectv3() #8 {
   %call = call i32 @enable_codeobjectv3()
   ret i32 %call
 }
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind "target-cpu"="fiji" }
-attributes #2 = { nounwind "target-cpu"="fiji" "target-features"="+fp32-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals" }
-attributes #4 = { nounwind "target-features"="-fp32-denormals" }
-attributes #5 = { nounwind "target-cpu"="gfx900" }
-attributes #6 = { nounwind "target-features"="-half-rate-64-ops" }
-attributes #7 = { nounwind "target-features"="+half-rate-64-ops" }
-attributes #8 = { nounwind "target-features"="-load-store-opt" }
-attributes #9 = { nounwind "target-features"="+load-store-opt" }
-attributes #10 = { nounwind "target-features"="+code-object-v3" }
-attributes #11 = { nounwind "target-features"="-code-object-v3" }
+attributes #2 = { nounwind "target-cpu"="gfx900" }
+attributes #3 = { nounwind "target-features"="-half-rate-64-ops" }
+attributes #4 = { nounwind "target-features"="+half-rate-64-ops" }
+attributes #5 = { nounwind "target-features"="-load-store-opt" }
+attributes #6 = { nounwind "target-features"="+load-store-opt" }
+attributes #7 = { nounwind "target-features"="+code-object-v3" }
+attributes #8 = { nounwind "target-features"="-code-object-v3" }