Index: docs/AMDGPUUsage.rst
===================================================================
--- docs/AMDGPUUsage.rst
+++ docs/AMDGPUUsage.rst
@@ -435,6 +435,14 @@
      "amdgpu-waves-per-eu"="m,n"             Specify the minimum and maximum number of waves per
                                              execution unit. Generated by the ``amdgpu_waves_per_eu``
                                              CLANG attribute [CLANG-ATTR]_.
+
+     "amdgpu-ieee" true/false. Specify whether the function expects
+            the IEEE field of the mode register to be set on entry. Overrides
+            the default for the calling convention.
+     "amdgpu-dx10-clamp" true/false. Specify whether the function expects
+            the DX10_CLAMP field of the mode register to be set on entry. Overrides
+            the default for the calling convention.
+
      ======================================= ==========================================================
 
 Code Object
Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -892,10 +892,11 @@
   // register.
   ProgInfo.FloatMode = getFPMode(MF);
 
-  ProgInfo.IEEEMode = STM.enableIEEEBit(MF);
+  const SIModeRegisterDefaults Mode = MFI->getMode();
+  ProgInfo.IEEEMode = Mode.IEEE;
 
   // Make clamp modifier on NaN input returns 0.
-  ProgInfo.DX10Clamp = STM.enableDX10Clamp();
+  ProgInfo.DX10Clamp = Mode.DX10Clamp;
 
   unsigned LDSAlignShift;
   if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
Index: lib/Target/AMDGPU/AMDGPUFeatures.td
===================================================================
--- lib/Target/AMDGPU/AMDGPUFeatures.td
+++ lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -54,12 +54,6 @@
         SubtargetFeature <Value, "Gen", Subtarget#"::"#Value,
                           Value#" GPU generation", Implies>;
 
-def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
-  "DX10Clamp",
-  "true",
-  "clamp modifier clamps NaNs to 0.0"
->;
-
 def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
   "EnablePromoteAlloca",
   "true",
Index: lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -286,7 +286,6 @@
 
   // Dynamially set bits that enable features.
   bool FP64FP16Denormals;
-  bool DX10Clamp;
   bool FlatForGlobal;
   bool AutoWaitcntBeforeBarrier;
   bool CodeObjectV3;
@@ -531,14 +530,6 @@
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
-  bool enableDX10Clamp() const {
-    return DX10Clamp;
-  }
-
-  bool enableIEEEBit(const MachineFunction &MF) const {
-    return AMDGPU::isCompute(MF.getFunction().getCallingConv());
-  }
-
   bool useFlatForGlobal() const {
     return FlatForGlobal;
   }
@@ -970,7 +961,6 @@
   bool FMA;
   bool CaymanISA;
   bool CFALUBug;
-  bool DX10Clamp;
   bool HasVertexCache;
   bool R600ALUInst;
   bool FP64;
Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -45,7 +45,7 @@
 R600Subtarget &
 R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
                                                StringRef GPU, StringRef FS) {
-  SmallString<256> FullFS("+promote-alloca,+dx10-clamp,");
+  SmallString<256> FullFS("+promote-alloca,");
   FullFS += FS;
   ParseSubtargetFeatures(GPU, FullFS);
 
@@ -77,7 +77,7 @@
   // Similarly we want enable-prt-strict-null to be on by default and not to
   // unset everything else if it is disabled
 
-  SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
+  SmallString<256> FullFS("+promote-alloca,+load-store-opt,");
 
   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
     FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
@@ -164,7 +164,6 @@
     HalfRate64Ops(false),
 
     FP64FP16Denormals(false),
-    DX10Clamp(false),
     FlatForGlobal(false),
     AutoWaitcntBeforeBarrier(false),
     CodeObjectV3(false),
@@ -461,7 +460,6 @@
   FMA(false),
   CaymanISA(false),
   CFALUBug(false),
-  DX10Clamp(false),
   HasVertexCache(false),
   R600ALUInst(false),
   FP64(false),
Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -611,7 +611,7 @@
 }
 
 bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
-                                        const Function *Callee) const {
+                                     const Function *Callee) const {
   const TargetMachine &TM = getTLI()->getTargetMachine();
   const FeatureBitset &CallerBits =
     TM.getSubtargetImpl(*Caller)->getFeatureBits();
@@ -620,7 +620,14 @@
 
   FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
   FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
-  return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
+  if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
+    return false;
+
+  // FIXME: dx10_clamp can just take the caller setting, but there seems to be
+  // no way to support merge for backend defined attributes.
+  AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
+  AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
+  return CallerMode.isInlineCompatible(CalleeMode);
 }
 
 void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
Index: lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- lib/Target/AMDGPU/SIFoldOperands.cpp
+++ lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1120,7 +1120,8 @@
   // omod is ignored by hardware if IEEE bit is enabled. omod also does not
   // correctly handle signed zeros.
   //
-  bool IsIEEEMode = ST->enableIEEEBit(MF);
+  // FIXME: Also need to check strictfp
+  bool IsIEEEMode = MFI->getMode().IEEE;
   bool HasNSZ = MFI->hasNoSignedZerosFPMath();
 
   for (MachineBasicBlock *MBB : depth_first(&MF)) {
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4145,7 +4145,9 @@
 SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
                                                SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
-  bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  bool IsIEEEMode = Info->getMode().IEEE;
 
   // FIXME: Assert during eslection that this is only selected for
   // ieee_mode. Currently a combine can produce the ieee version for non-ieee
@@ -8300,9 +8302,12 @@
   if (Cmp == APFloat::cmpGreaterThan)
     return SDValue();
 
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
   // TODO: Check IEEE bit enabled?
   EVT VT = Op0.getValueType();
-  if (Subtarget->enableDX10Clamp()) {
+  if (Info->getMode().DX10Clamp) {
     // If dx10_clamp is enabled, NaNs clamp to 0.0. This is the same as the
     // hardware fmed3 behavior converting to a min.
     // FIXME: Should this be allowing -0.0?
@@ -8436,9 +8441,12 @@
     return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src2);
   }
 
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
   // FIXME: dx10_clamp behavior assumed in instcombine. Should we really bother
   // handling no dx10-clamp?
-  if (Subtarget->enableDX10Clamp()) {
+  if (Info->getMode().DX10Clamp) {
     // If NaNs is clamped to 0, we are free to reorder the inputs.
 
     if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
@@ -9128,11 +9136,13 @@
   if (!CSrc)
     return SDValue();
 
+  const MachineFunction &MF = DCI.DAG.getMachineFunction();
   const APFloat &F = CSrc->getValueAPF();
   APFloat Zero = APFloat::getZero(F.getSemantics());
   APFloat::cmpResult Cmp0 = F.compare(Zero);
   if (Cmp0 == APFloat::cmpLessThan ||
-      (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
+      (Cmp0 == APFloat::cmpUnordered &&
+       MF.getInfo<SIMachineFunctionInfo>()->getMode().DX10Clamp)) {
     return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
   }
 
@@ -9967,7 +9977,10 @@
                                                     bool SNaN,
                                                     unsigned Depth) const {
   if (Op.getOpcode() == AMDGPUISD::CLAMP) {
-    if (Subtarget->enableDX10Clamp())
+    const MachineFunction &MF = DAG.getMachineFunction();
+    const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+    if (Info->getMode().DX10Clamp)
       return true; // Clamped to 0.
     return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
   }
Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h
===================================================================
--- lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -148,6 +148,9 @@
 
   AMDGPUFunctionArgInfo ArgInfo;
 
+  // State of MODE register, assumed FP mode.
+  AMDGPU::SIModeRegisterDefaults Mode;
+
   // Graphics info.
   unsigned PSInputAddr = 0;
   unsigned PSInputEnable = 0;
@@ -281,6 +284,10 @@
     return SpillVGPRs;
   }
 
+  AMDGPU::SIModeRegisterDefaults getMode() const {
+    return Mode;
+  }
+
   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
 
Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -28,6 +28,7 @@
 
 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   : AMDGPUMachineFunction(MF),
+    Mode(MF.getFunction()),
     PrivateSegmentBuffer(false),
     DispatchPtr(false),
     QueuePtr(false),
Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
===================================================================
--- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -495,6 +495,46 @@
 /// \returns true if the intrinsic is divergent
 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
 
+
+// Track defaults for fields in the MODE registser.
+struct SIModeRegisterDefaults {
+
+  /// Floating point opcodes that support exception flag gathering quiet and
+  /// propagate sig- naling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
+  /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
+  /// quieting.
+  bool IEEE : 1;
+
+  /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
+  /// clamp NaN to zero; otherwise, pass NaN through.
+  bool DX10Clamp : 1;
+
+  // TODO: FP mode fields
+
+  SIModeRegisterDefaults() :
+    IEEE(true),
+    DX10Clamp(true) {}
+
+  SIModeRegisterDefaults(const Function &F);
+
+  static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
+    SIModeRegisterDefaults Mode;
+    Mode.DX10Clamp = true;
+    Mode.IEEE = AMDGPU::isCompute(CC);
+    return Mode;
+  }
+
+  bool operator ==(const SIModeRegisterDefaults Other) const {
+    return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+  }
+
+  // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
+  // be able to override.
+  bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
+    return *this == CalleeMode;
+  }
+};
+
 } // end namespace AMDGPU
 } // end namespace llvm
 
Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
===================================================================
--- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1002,6 +1002,19 @@
   return true;
 }
 
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
+  *this = getDefaultForCallingConv(F.getCallingConv());
+
+  StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
+  if (!IEEEAttr.empty())
+    IEEE = IEEEAttr == "true";
+
+  StringRef DX10ClampAttr
+    = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
+  if (!DX10ClampAttr.empty())
+    DX10Clamp = DX10ClampAttr == "true";
+}
+
 namespace {
 
 struct SourceOfDivergence {
Index: test/CodeGen/AMDGPU/amdgcn-ieee.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/amdgcn-ieee.ll
@@ -0,0 +1,188 @@
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}kernel_ieee_mode_default:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
+; GCN-NOT: v_mul_f32
+define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}kernel_ieee_mode_on:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
+; GCN-NOT: v_mul_f32
+define amdgpu_kernel void @kernel_ieee_mode_on() #1 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}kernel_ieee_mode_off:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-NOT: [[VAL0]]
+; GCN-NOT: [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
+; GCN-NOT: v_mul_f32
+define amdgpu_kernel void @kernel_ieee_mode_off() #2 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}func_ieee_mode_default:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
+; GCN-NOT: v_mul_f32
+define void @func_ieee_mode_default() #0 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}func_ieee_mode_on:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
+; GCN-NOT: v_mul_f32
+define void @func_ieee_mode_on() #1 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}func_ieee_mode_off:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-NOT: [[VAL0]]
+; GCN-NOT: [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
+; GCN-NOT: v_mul_f32
+define void @func_ieee_mode_off() #2 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}cs_ieee_mode_default:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
+; GCN-NOT: v_mul_f32
+define amdgpu_cs void @cs_ieee_mode_default() #0 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}cs_ieee_mode_on:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
+; GCN-NOT: v_mul_f32
+define amdgpu_cs void @cs_ieee_mode_on() #1 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}cs_ieee_mode_off:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-NOT: [[VAL0]]
+; GCN-NOT: [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
+; GCN-NOT: v_mul_f32
+define amdgpu_cs void @cs_ieee_mode_off() #2 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}ps_ieee_mode_default:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-NOT: [[VAL0]]
+; GCN-NOT: [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
+; GCN-NOT: v_mul_f32
+define amdgpu_ps void @ps_ieee_mode_default() #0 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}ps_ieee_mode_on:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
+; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
+; GCN-NOT: v_mul_f32
+define amdgpu_ps void @ps_ieee_mode_on() #1 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}ps_ieee_mode_off:
+; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
+; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
+; GCN-NOT: [[VAL0]]
+; GCN-NOT: [[VAL1]]
+; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
+; GCN-NOT: v_mul_f32
+define amdgpu_ps void @ps_ieee_mode_off() #2 {
+  %val0 = load volatile float, float addrspace(1)* undef
+  %val1 = load volatile float, float addrspace(1)* undef
+  %min = call float @llvm.minnum.f32(float %val0, float %val1)
+  store volatile float %min, float addrspace(1)* undef
+  ret void
+}
+
+declare float @llvm.minnum.f32(float, float) #3
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "amdgpu-ieee"="true" }
+attributes #2 = { nounwind "amdgpu-ieee"="false" }
+attributes #3 = { nounwind readnone speculatable }
Index: test/CodeGen/AMDGPU/clamp.ll
===================================================================
--- test/CodeGen/AMDGPU/clamp.ll
+++ test/CodeGen/AMDGPU/clamp.ll
@@ -769,6 +769,6 @@
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
-attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
-attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
+attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp-exceptions" "no-nans-fp-math"="false" }
+attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" }
+attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" }
Index: test/CodeGen/AMDGPU/hsa-fp-mode.ll
===================================================================
--- test/CodeGen/AMDGPU/hsa-fp-mode.ll
+++ test/CodeGen/AMDGPU/hsa-fp-mode.ll
@@ -70,10 +70,32 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}test_no_ieee_mode_vi:
+; GCN: float_mode = 192
+; GCN: enable_dx10_clamp = 1
+; GCN: enable_ieee_mode = 0
+define amdgpu_kernel void @test_no_ieee_mode_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #7 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_no_ieee_mode_no_dx10_clamp_vi:
+; GCN: float_mode = 192
+; GCN: enable_dx10_clamp = 0
+; GCN: enable_ieee_mode = 0
+define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #8 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
+
 attributes #0 = { nounwind "target-cpu"="kaveri" "target-features"="-code-object-v3" }
 attributes #1 = { nounwind "target-cpu"="fiji" "target-features"="-code-object-v3" }
 attributes #2 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,+fp64-fp16-denormals" }
 attributes #3 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,-fp64-fp16-denormals" }
 attributes #4 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,+fp64-fp16-denormals" }
 attributes #5 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,-fp64-fp16-denormals" }
-attributes #6 = { nounwind "target-cpu"="fiji" "target-features"="-code-object-v3,-dx10-clamp" }
+attributes #6 = { nounwind "amdgpu-dx10-clamp"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
+attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
+attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
Index: test/Transforms/Inline/AMDGPU/inline-amdgpu-dx10.ll
===================================================================
--- /dev/null
+++ test/Transforms/Inline/AMDGPU/inline-amdgpu-dx10.ll
@@ -0,0 +1,107 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s
+
+define i32 @func_default() #0 {
+  ret i32 0
+}
+
+define i32 @func_dx10_clamp_enabled() #1 {
+  ret i32 0
+}
+
+define i32 @func_dx10_clamp_disabled() #2 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @default_call_default(
+; CHECK-NEXT: ret i32 0
+define i32 @default_call_default() #0 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @dx10_clamp_enabled_call_default(
+; CHECK-NEXT: ret i32 0
+define i32 @dx10_clamp_enabled_call_default() #1 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @dx10_clamp_enabled_call_dx10_clamp_enabled(
+; CHECK-NEXT: ret i32 0
+define i32 @dx10_clamp_enabled_call_dx10_clamp_enabled() #1 {
+  %call = call i32 @func_dx10_clamp_enabled()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @dx10_clamp_enabled_call_dx10_clamp_disabled(
+; CHECK-NEXT: call i32 @func_dx10_clamp_disabled()
+define i32 @dx10_clamp_enabled_call_dx10_clamp_disabled() #1 {
+  %call = call i32 @func_dx10_clamp_disabled()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @dx10_clamp_disabled_call_default(
+; CHECK-NEXT: call i32 @func_default()
+define i32 @dx10_clamp_disabled_call_default() #2 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @dx10_clamp_disabled_call_dx10_clamp_enabled(
+; CHECK-NEXT: call i32 @func_dx10_clamp_enabled()
+define i32 @dx10_clamp_disabled_call_dx10_clamp_enabled() #2 {
+  %call = call i32 @func_dx10_clamp_enabled()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @dx10_clamp_disabled_call_dx10_clamp_disabled(
+; CHECK-NEXT: ret i32 0
+define i32 @dx10_clamp_disabled_call_dx10_clamp_disabled() #2 {
+  %call = call i32 @func_dx10_clamp_disabled()
+  ret i32 %call
+}
+
+; Shader calling a compute function
+; CHECK-LABEL: @amdgpu_ps_default_call_default(
+; CHECK-NEXT: call i32 @func_default()
+define amdgpu_ps i32 @amdgpu_ps_default_call_default() #0 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; Shader with dx10_clamp enabled calling a compute function. Default
+; also implies ieee_mode, so this isn't inlinable.
+; CHECK-LABEL: @amdgpu_ps_dx10_clamp_enabled_call_default(
+; CHECK-NEXT: call i32 @func_default()
+define amdgpu_ps i32 @amdgpu_ps_dx10_clamp_enabled_call_default() #1 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @amdgpu_ps_dx10_clamp_disabled_call_default(
+; CHECK-NEXT: call i32 @func_default()
+define amdgpu_ps i32 @amdgpu_ps_dx10_clamp_disabled_call_default() #2 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @amdgpu_ps_dx10_clamp_enabled_ieee_call_default(
+; CHECK-NEXT: ret i32 0
+define amdgpu_ps i32 @amdgpu_ps_dx10_clamp_enabled_ieee_call_default() #3 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @amdgpu_ps_dx10_clamp_disabled_ieee_call_default(
+; CHECK-NEXT: call i32 @func_default()
+define amdgpu_ps i32 @amdgpu_ps_dx10_clamp_disabled_ieee_call_default() #4 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "amdgpu-dx10-clamp"="true" }
+attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" }
+attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "amdgpu-ieee"="true" }
+attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="true" }
Index: test/Transforms/Inline/AMDGPU/inline-amdgpu-ieee.ll
===================================================================
--- /dev/null
+++ test/Transforms/Inline/AMDGPU/inline-amdgpu-ieee.ll
@@ -0,0 +1,90 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s
+
+define i32 @func_default() #0 {
+  ret i32 0
+}
+
+define i32 @func_ieee_enabled() #1 {
+  ret i32 0
+}
+
+define i32 @func_ieee_disabled() #2 {
+  ret i32 0
+}
+
+; CHECK-LABEL: @default_call_default(
+; CHECK-NEXT: ret i32 0
+define i32 @default_call_default() #0 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @ieee_enabled_call_default(
+; CHECK-NEXT: ret i32 0
+define i32 @ieee_enabled_call_default() #1 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @ieee_enabled_call_ieee_enabled(
+; CHECK-NEXT: ret i32 0
+define i32 @ieee_enabled_call_ieee_enabled() #1 {
+  %call = call i32 @func_ieee_enabled()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @ieee_enabled_call_ieee_disabled(
+; CHECK-NEXT: call i32 @func_ieee_disabled()
+define i32 @ieee_enabled_call_ieee_disabled() #1 {
+  %call = call i32 @func_ieee_disabled()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @ieee_disabled_call_default(
+; CHECK-NEXT: call i32 @func_default()
+define i32 @ieee_disabled_call_default() #2 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @ieee_disabled_call_ieee_enabled(
+; CHECK-NEXT: call i32 @func_ieee_enabled()
+define i32 @ieee_disabled_call_ieee_enabled() #2 {
+  %call = call i32 @func_ieee_enabled()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @ieee_disabled_call_ieee_disabled(
+; CHECK-NEXT: ret i32 0
+define i32 @ieee_disabled_call_ieee_disabled() #2 {
+  %call = call i32 @func_ieee_disabled()
+  ret i32 %call
+}
+
+; Shader calling a compute function
+; CHECK-LABEL: @amdgpu_ps_default_call_default(
+; CHECK-NEXT: call i32 @func_default()
+define amdgpu_ps i32 @amdgpu_ps_default_call_default() #0 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; Shader with ieee enabled calling a compute function
+; CHECK-LABEL: @amdgpu_ps_ieee_enabled_call_default(
+; CHECK-NEXT: ret i32 0
+define amdgpu_ps i32 @amdgpu_ps_ieee_enabled_call_default() #1 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+; CHECK-LABEL: @amdgpu_ps_ieee_disabled_call_default(
+; CHECK-NEXT: call i32 @func_default()
+define amdgpu_ps i32 @amdgpu_ps_ieee_disabled_call_default() #2 {
+  %call = call i32 @func_default()
+  ret i32 %call
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "amdgpu-ieee"="true" }
+attributes #2 = { nounwind "amdgpu-ieee"="false" }