diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
@@ -32,6 +32,46 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}fmul_fadd_f16:
+; VI-FLUSH: v_mac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; VI-DENORM-CONTRACT: v_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; GFX10-FLUSH:  v_mul_f16_e32
+; GFX10-FLUSH:  v_add_f16_e32
+; GFX10-DENORM-CONTRACT: v_fmac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+define amdgpu_kernel void @fmul_fadd_f16(half addrspace(1)* %out, half addrspace(1)* %in1,
+                         half addrspace(1)* %in2, half addrspace(1)* %in3) #0 {
+  %r0 = load half, half addrspace(1)* %in1
+  %r1 = load half, half addrspace(1)* %in2
+  %r2 = load half, half addrspace(1)* %in3
+  %mul = fmul half %r0, %r1
+  %add = fadd half %mul, %r2
+  store half %add, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fmul_fadd_contract_f16:
+; VI-FLUSH: v_mac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; VI-DENORM: v_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; GFX10-FLUSH:  v_mul_f16_e32
+; GFX10-FLUSH:  v_add_f16_e32
+; GFX10-DENORM: v_fmac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+define amdgpu_kernel void @fmul_fadd_contract_f16(half addrspace(1)* %out, half addrspace(1)* %in1,
+                         half addrspace(1)* %in2, half addrspace(1)* %in3) #0 {
+  %r0 = load half, half addrspace(1)* %in1
+  %r1 = load half, half addrspace(1)* %in2
+  %r2 = load half, half addrspace(1)* %in3
+  %mul = fmul half %r0, %r1
+  %add = fadd contract half %mul, %r2
+  store half %add, half addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f16
 ; GCN: {{buffer|flat|global}}_load_ushort [[R1:v[0-9]+]],
 ; GCN: {{buffer|flat|global}}_load_ushort [[R2:v[0-9]+]],
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -69,6 +69,24 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}fmul_fadd_contract_f32:
+; GCN-FLUSH-FMAC: v_fmac_f32_e32
+
+; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32
+; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32
+
+; GCN-DENORM-FASTFMA: v_fma_f32
+define amdgpu_kernel void @fmul_fadd_contract_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+                           float addrspace(1)* %in2, float addrspace(1)* %in3) #0 {
+  %r0 = load volatile float, float addrspace(1)* %in1
+  %r1 = load volatile float, float addrspace(1)* %in2
+  %r2 = load volatile float, float addrspace(1)* %in3
+  %mul = fmul float %r0, %r1
+  %add = fadd contract float %mul, %r2
+  store float %add, float addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f32
 ; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
 ; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
@@ -1,4 +1,4 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICTSI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde  -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde  -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
@@ -33,6 +33,20 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}fmul_fadd_contract_f64:
+; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+
+define amdgpu_kernel void @fmul_fadd_contract_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                           double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
+  %r0 = load double, double addrspace(1)* %in1
+  %r1 = load double, double addrspace(1)* %in2
+  %r2 = load double, double addrspace(1)* %in3
+  %tmp = fmul double %r0, %r1
+  %r3 = fadd contract double %tmp, %r2
+  store double %r3, double addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}fadd_a_a_b_f64:
 ; GCN: {{buffer|flat}}_load_dwordx2 [[R1:v\[[0-9]+:[0-9]+\]]],
 ; GCN: {{buffer|flat}}_load_dwordx2 [[R2:v\[[0-9]+:[0-9]+\]]],
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
@@ -27,6 +27,39 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}fmul_fadd_v2f16:
+; GFX9-DENORM-STRICT: v_pk_mul_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; GFX9-DENORM-STRICT: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; GFX9-DENORM-CONTRACT: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+define amdgpu_kernel void @fmul_fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
+                         <2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
+  %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
+  %r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
+  %r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
+  %r3 = fmul <2 x half> %r0, %r1
+  %r4 = fadd <2 x half> %r3, %r2
+  store <2 x half> %r4, <2 x half> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fmul_fadd_contract_v2f16:
+; GFX9-FLUSH: v_pk_mul_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; GFX9-FLUSH: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; GFX9-DENORM: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+define amdgpu_kernel void @fmul_fadd_contract_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
+                         <2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
+  %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
+  %r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
+  %r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
+  %r3 = fmul <2 x half> %r0, %r1
+  %r4 = fadd contract <2 x half> %r3, %r2
+  store <2 x half> %r4, <2 x half> addrspace(1)* %out
+  ret void
+}
+
+
 ; GCN-LABEL: {{^}}fmuladd_2.0_a_b_v2f16:
 ; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
 ; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],