diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -510,12 +510,14 @@
     // Check possible fuse {fadd|fsub}(a,fmul(b,c)) and return zero cost for
     // fmul(b,c) supposing the fadd|fsub will get estimated cost for the whole
     // fused operation.
-    if (!HasFP32Denormals && SLT == MVT::f32 && CxtI && CxtI->hasOneUse())
+    if ((SLT == MVT::f32 || SLT == MVT::f64 || SLT == MVT::f16) && CxtI &&
+        CxtI->hasOneUse())
       if (const auto *FAdd = dyn_cast<BinaryOperator>(*CxtI->user_begin())) {
         const int OPC = TLI->InstructionOpcodeToISD(FAdd->getOpcode());
-        if (OPC == ISD::FADD || OPC == ISD::FSUB) {
+        if ((OPC == ISD::FADD || OPC == ISD::FSUB) &&
+            (!HasFP32Denormals ||
+             (CxtI->hasAllowContract() && FAdd->hasAllowContract())))
           return TargetTransformInfo::TCC_Free;
-        }
       }
     LLVM_FALLTHROUGH;
   case ISD::FADD:
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
--- a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
@@ -1,48 +1,155 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s
-
-target triple = "amdgcn--"
-
-; ALL-LABEL: 'fmul_fadd_f32':
-; FUSED: estimated cost of 0 for instruction:   %mul = fmul float
-; SLOW: estimated cost of 1 for instruction:   %mul = fmul float
-; ALL: estimated cost of 1 for instruction:   %add = fadd float
-define float @fmul_fadd_f32(float %r0, float %r1, float %r2) #0 {
-  %mul = fmul float %r0, %r1
-  %add = fadd float %mul, %r2
-  ret float %add
-}
-
-; ALL-LABEL: 'fmul_fadd_v2f32':
-; FUSED: estimated cost of 0 for instruction:   %mul = fmul <2 x float>
-; SLOW: estimated cost of 2 for instruction:   %mul = fmul <2 x float>
-; ALL: estimated cost of 2 for instruction:   %add = fadd <2 x float>
-define <2 x float> @fmul_fadd_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 {
-  %mul = fmul <2 x float> %r0, %r1
-  %add = fadd <2 x float> %mul, %r2
-  ret <2 x float> %add
-}
-
-; ALL-LABEL: 'fmul_fsub_f32':
-; FUSED: estimated cost of 0 for instruction:   %mul = fmul float
-; SLOW: estimated cost of 1 for instruction:   %mul = fmul float
-; ALL: estimated cost of 1 for instruction:   %sub = fsub float
-define float @fmul_fsub_f32(float %r0, float %r1, float %r2) #0 {
-  %mul = fmul float %r0, %r1
-  %sub = fsub float %mul, %r2
-  ret float %sub
-}
-
-; ALL-LABEL: 'fmul_fsub_v2f32':
-; FUSED: estimated cost of 0 for instruction:   %mul = fmul <2 x float>
-; SLOW: estimated cost of 2 for instruction:   %mul = fmul <2 x float>
-; ALL: estimated cost of 2 for instruction:   %sub = fsub <2 x float>
-define <2 x float> @fmul_fsub_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 {
-  %mul = fmul <2 x float> %r0, %r1
-  %sub = fsub <2 x float> %mul, %r2
-  ret <2 x float> %sub
-}
-
-attributes #0 = { nounwind }
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s
+
+target triple = "amdgcn--"
+
+; ALL-LABEL: 'fmul_fadd_f32':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul float
+; SLOW: estimated cost of 1 for instruction:   %mul = fmul float
+; ALL: estimated cost of 1 for instruction:   %add = fadd float
+define float @fmul_fadd_f32(float %r0, float %r1, float %r2) #0 {
+  %mul = fmul float %r0, %r1
+  %add = fadd float %mul, %r2
+  ret float %add
+}
+
+; ALL-LABEL: 'fmul_fadd_contract_f32':
+; ALL: estimated cost of 0 for instruction:   %mul = fmul contract float
+; ALL: estimated cost of 1 for instruction:   %add = fadd contract float
+define float @fmul_fadd_contract_f32(float %r0, float %r1, float %r2) #0 {
+  %mul = fmul contract float %r0, %r1
+  %add = fadd contract float %mul, %r2
+  ret float %add
+}
+
+; ALL-LABEL: 'fmul_fadd_v2f32':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul <2 x float>
+; SLOW: estimated cost of 2 for instruction:   %mul = fmul <2 x float>
+; ALL: estimated cost of 2 for instruction:   %add = fadd <2 x float>
+define <2 x float> @fmul_fadd_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 {
+  %mul = fmul <2 x float> %r0, %r1
+  %add = fadd <2 x float> %mul, %r2
+  ret <2 x float> %add
+}
+
+; ALL-LABEL: 'fmul_fsub_f32':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul float
+; SLOW: estimated cost of 1 for instruction:   %mul = fmul float
+; ALL: estimated cost of 1 for instruction:   %sub = fsub float
+define float @fmul_fsub_f32(float %r0, float %r1, float %r2) #0 {
+  %mul = fmul float %r0, %r1
+  %sub = fsub float %mul, %r2
+  ret float %sub
+}
+
+; ALL-LABEL: 'fmul_fsub_v2f32':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul <2 x float>
+; SLOW: estimated cost of 2 for instruction:   %mul = fmul <2 x float>
+; ALL: estimated cost of 2 for instruction:   %sub = fsub <2 x float>
+define <2 x float> @fmul_fsub_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 {
+  %mul = fmul <2 x float> %r0, %r1
+  %sub = fsub <2 x float> %mul, %r2
+  ret <2 x float> %sub
+}
+
+; ALL-LABEL: 'fmul_fadd_f16':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul half
+; SLOW: estimated cost of 1 for instruction:   %mul = fmul half
+; ALL: estimated cost of 1 for instruction:   %add = fadd half
+define half @fmul_fadd_f16(half %r0, half %r1, half %r2) #0 {
+  %mul = fmul half %r0, %r1
+  %add = fadd half %mul, %r2
+  ret half %add
+}
+
+; ALL-LABEL: 'fmul_fadd_contract_f16':
+; ALL: estimated cost of 0 for instruction:   %mul = fmul contract half
+; ALL: estimated cost of 1 for instruction:   %add = fadd contract half
+define half @fmul_fadd_contract_f16(half %r0, half %r1, half %r2) #0 {
+  %mul = fmul contract half %r0, %r1
+  %add = fadd contract half %mul, %r2
+  ret half %add
+}
+
+; ALL-LABEL: 'fmul_fadd_v2f16':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul <2 x half>
+; SLOW: estimated cost of 1 for instruction:   %mul = fmul <2 x half>
+; ALL: estimated cost of 1 for instruction:   %add = fadd <2 x half>
+define <2 x half> @fmul_fadd_v2f16(<2 x half> %r0, <2 x half> %r1, <2 x half> %r2) #0 {
+  %mul = fmul <2 x half> %r0, %r1
+  %add = fadd <2 x half> %mul, %r2
+  ret <2 x half> %add
+}
+
+; ALL-LABEL: 'fmul_fsub_f16':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul half
+; SLOW: estimated cost of 1 for instruction:   %mul = fmul half
+; ALL: estimated cost of 1 for instruction:   %sub = fsub half
+define half @fmul_fsub_f16(half %r0, half %r1, half %r2) #0 {
+  %mul = fmul half %r0, %r1
+  %sub = fsub half %mul, %r2
+  ret half %sub
+}
+
+; ALL-LABEL: 'fmul_fsub_v2f16':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul <2 x half>
+; SLOW: estimated cost of 1 for instruction:   %mul = fmul <2 x half>
+; ALL: estimated cost of 1 for instruction:   %sub = fsub <2 x half>
+define <2 x half> @fmul_fsub_v2f16(<2 x half> %r0, <2 x half> %r1, <2 x half> %r2) #0 {
+  %mul = fmul <2 x half> %r0, %r1
+  %sub = fsub <2 x half> %mul, %r2
+  ret <2 x half> %sub
+}
+
+; ALL-LABEL: 'fmul_fadd_f64':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul double
+; SLOW: estimated cost of 3 for instruction:   %mul = fmul double
+; ALL: estimated cost of 3 for instruction:   %add = fadd double
+define double @fmul_fadd_f64(double %r0, double %r1, double %r2) #0 {
+  %mul = fmul double %r0, %r1
+  %add = fadd double %mul, %r2
+  ret double %add
+}
+
+; ALL-LABEL: 'fmul_fadd_contract_f64':
+; ALL: estimated cost of 0 for instruction:   %mul = fmul contract double
+; ALL: estimated cost of 3 for instruction:   %add = fadd contract double
+define double @fmul_fadd_contract_f64(double %r0, double %r1, double %r2) #0 {
+  %mul = fmul contract double %r0, %r1
+  %add = fadd contract double %mul, %r2
+  ret double %add
+}
+
+; ALL-LABEL: 'fmul_fadd_v2f64':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul <2 x double>
+; SLOW: estimated cost of 6 for instruction:   %mul = fmul <2 x double>
+; ALL: estimated cost of 6 for instruction:   %add = fadd <2 x double>
+define <2 x double> @fmul_fadd_v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) #0 {
+  %mul = fmul <2 x double> %r0, %r1
+  %add = fadd <2 x double> %mul, %r2
+  ret <2 x double> %add
+}
+
+; ALL-LABEL: 'fmul_fsub_f64':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul double
+; SLOW: estimated cost of 3 for instruction:   %mul = fmul double
+; ALL: estimated cost of 3 for instruction:   %sub = fsub double
+define double @fmul_fsub_f64(double %r0, double %r1, double %r2) #0 {
+  %mul = fmul double %r0, %r1
+  %sub = fsub double %mul, %r2
+  ret double %sub
+}
+
+; ALL-LABEL: 'fmul_fsub_v2f64':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul <2 x double>
+; SLOW: estimated cost of 6 for instruction:   %mul = fmul <2 x double>
+; ALL: estimated cost of 6 for instruction:   %sub = fsub <2 x double>
+define <2 x double> @fmul_fsub_v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) #0 {
+  %mul = fmul <2 x double> %r0, %r1
+  %sub = fsub <2 x double> %mul, %r2
+  ret <2 x double> %sub
+}
+
+attributes #0 = { nounwind }