diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -881,10 +881,12 @@ let OtherPredicates = [HasMadMacF32Insts] in { def : FMADPat ; def : FMADPat ; +} // OtherPredicates = [HasMadMacF32Insts] // Don't allow source modifiers. If there are any source modifiers then it's // better to select mad instead of mac. -let SubtargetPredicate = isGFX6GFX7GFX10 in +let SubtargetPredicate = isGFX6GFX7GFX10, + OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in def : GCNPat < (f32 (fadd (AMDGPUfmul_legacy (VOP3NoMods f32:$src0), (VOP3NoMods f32:$src1)), @@ -892,7 +894,6 @@ (V_MAC_LEGACY_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; -} // OtherPredicates = [HasMadMacF32Insts] let SubtargetPredicate = Has16BitInsts in { def : FMADPat ; @@ -909,9 +910,10 @@ $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; -let OtherPredicates = [HasMadMacF32Insts] in { +let OtherPredicates = [HasMadMacF32Insts] in def : FMADModsPat; +let OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in def : GCNPat < (f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod), (VOP3Mods f32:$src1, i32:$src1_mod)), @@ -919,7 +921,6 @@ (V_MAD_LEGACY_F32 $src0_mod, $src0, $src1_mod, $src1, $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; -} // OtherPredicates = [HasMadMacF32Insts] let SubtargetPredicate = Has16BitInsts in def : FMADModsPat; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll @@ -237,7 +237,52 @@ ret float %result } -define float @v_mad_legacy_f32(float %a, float %b, float %c) { +; Don't form mad/mac instructions because they don't support denormals. +define float @v_add_mul_legacy_f32(float %a, float %b, float %c) { +; GFX6-LABEL: v_add_mul_legacy_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_mul_legacy_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_mul_legacy_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_add_mul_legacy_f32: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_add_mul_legacy_f32: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX103-NEXT: s_setpc_b64 s[30:31] + %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) + %add = fadd float %mul, %c + ret float %add +} + +define float @v_mad_legacy_f32(float %a, float %b, float %c) #2 { ; GFX6-LABEL: v_mad_legacy_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -279,7 +324,7 @@ ret float %add } -define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) { +define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) #2 { ; GFX6-LABEL: v_mad_legacy_fneg_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -512,3 +557,4 @@ attributes #0 = { nounwind readnone speculatable willreturn } attributes #1 = { nounwind readnone speculatable } +attributes #2 = { "denormal-fp-math-f32"="preserve-sign" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll @@ -38,6 +38,17 @@ ret void } +; Don't form mad/mac instructions because they don't support denormals. +; GCN-LABEL: {{^}}test_add_mul_legacy_f32: +; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} +; GCN: v_add_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} +define amdgpu_kernel void @test_add_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 { + %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) + %add = fadd float %mul, %c + store float %add, float addrspace(1)* %out, align 4 + ret void +} + ; GCN-LABEL: {{^}}test_mad_legacy_f32: ; GFX6: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} @@ -45,7 +56,7 @@ ; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 { +define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 { %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) %add = fadd float %mul, %c store float %add, float addrspace(1)* %out, align 4 @@ -56,7 +67,7 @@ ; MADMACF32: v_mad_legacy_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -{{[sv][0-9]+}}, v{{[0-9]+}} ; NOMADMACF32: v_mul_legacy_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}} ; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 { +define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 { %a.fneg = fneg float %a %b.fneg = fneg float %b %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg) @@ -70,3 +81,4 @@ attributes #0 = { nounwind } attributes #1 = { nounwind readnone } +attributes #2 = { nounwind "denormal-fp-math"="preserve-sign" }