diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -866,7 +866,8 @@ // VOP2 Patterns //===----------------------------------------------------------------------===// -// TODO: Check only no src2 mods? +// NoMods pattern used for mac. If there are any source modifiers then it's +// better to select mad instead of mac. class FMADPat : GCNPat <(vt (node (vt (VOP3NoMods vt:$src0)), (vt (VOP3NoMods vt:$src1)), @@ -875,18 +876,29 @@ SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; - // Prefer mac form when there are no modifiers. let AddedComplexity = 9 in { +let OtherPredicates = [HasMadMacF32Insts] in { def : FMADPat ; def : FMADPat ; +// Don't allow source modifiers. If there are any source modifiers then it's +// better to select mad instead of mac. +let SubtargetPredicate = isGFX6GFX7GFX10 in +def : GCNPat < + (f32 (fadd (AMDGPUfmul_legacy (VOP3NoMods f32:$src0), + (VOP3NoMods f32:$src1)), + (VOP3NoMods f32:$src2))), + (V_MAC_LEGACY_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, + SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) +>; +} // OtherPredicates = [HasMadMacF32Insts] + let SubtargetPredicate = Has16BitInsts in { def : FMADPat ; def : FMADPat ; -} - -} +} // SubtargetPredicate = Has16BitInsts +} // AddedComplexity = 9 class FMADModsPat : GCNPat< @@ -897,11 +909,20 @@ $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; -let SubtargetPredicate = HasMadMacF32Insts in +let OtherPredicates = [HasMadMacF32Insts] in { def : FMADModsPat; -def : FMADModsPat { - let SubtargetPredicate = Has16BitInsts; -} + +def : GCNPat < + (f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod), + (VOP3Mods f32:$src1, i32:$src1_mod)), + (VOP3Mods f32:$src2, i32:$src2_mod))), + (V_MAD_LEGACY_F32 $src0_mod, $src0, $src1_mod, $src1, + $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) +>; +} // OtherPredicates = [HasMadMacF32Insts] + +let SubtargetPredicate = Has16BitInsts in +def : FMADModsPat; class VOPSelectModsPat : GCNPat < (vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods), diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -493,16 +493,18 @@ defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN, or>; defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN, xor>; -let mayRaiseFPException = 0 in { -let SubtargetPredicate = HasMadMacF32Insts in { +let mayRaiseFPException = 0, OtherPredicates = [HasMadMacF32Insts] in { let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1 in { defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; -} + +let SubtargetPredicate = isGFX6GFX7GFX10 in +defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_F32>; +} // End Constraints = "$vdst = $src2", DisableEncoding="$src2", + // isConvertibleToThreeAddress = 1 def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; -} // End SubtargetPredicate = HasMadMacF32Insts -} +} // End mayRaiseFPException = 0, OtherPredicates = [HasMadMacF32Insts] // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. @@ -556,10 +558,6 @@ } // End SubtargetPredicate = isGFX6GFX7 let isCommutable = 1 in { -let SubtargetPredicate = isGFX6GFX7GFX10 in { -let OtherPredicates = [HasMadMacF32Insts] in -defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; -} // End SubtargetPredicate = isGFX6GFX7GFX10 let SubtargetPredicate = isGFX6GFX7 in { defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN, srl>; defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN, sra>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll @@ -1,53 +1,196 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX101 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX103 %s define float @v_mul_legacy_f32(float %a, float %b) { -; GCN-LABEL: v_mul_legacy_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: v_mul_legacy_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mul_legacy_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mul_legacy_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mul_legacy_f32: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mul_legacy_f32: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) ret float %result } define float @v_mul_legacy_undef0_f32(float %a) { -; GCN-LABEL: v_mul_legacy_undef0_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: v_mul_legacy_undef0_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mul_legacy_undef0_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mul_legacy_undef0_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mul_legacy_undef0_f32: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mul_legacy_undef0_f32: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a) ret float %result } define float @v_mul_legacy_undef1_f32(float %a) { -; GCN-LABEL: v_mul_legacy_undef1_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: v_mul_legacy_undef1_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mul_legacy_undef1_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mul_legacy_undef1_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mul_legacy_undef1_f32: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mul_legacy_undef1_f32: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef) ret float %result } define float @v_mul_legacy_undef_f32() { -; GCN-LABEL: v_mul_legacy_undef_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e64 v0, s4, s4 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: v_mul_legacy_undef_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e64 v0, s4, s4 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mul_legacy_undef_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e64 v0, s4, s4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mul_legacy_undef_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e64 v0, s4, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mul_legacy_undef_f32: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e64 v0, s4, s4 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mul_legacy_undef_f32: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e64 v0, s4, s4 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float undef, float undef) ret float %result } define float @v_mul_legacy_fabs_f32(float %a, float %b) { -; GCN-LABEL: v_mul_legacy_fabs_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1| -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: v_mul_legacy_fabs_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1| +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mul_legacy_fabs_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1| +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mul_legacy_fabs_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1| +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mul_legacy_fabs_f32: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1| +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mul_legacy_fabs_f32: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1| +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs) @@ -55,76 +198,311 @@ } define float @v_mul_legacy_fneg_f32(float %a, float %b) { -; GCN-LABEL: v_mul_legacy_fneg_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1 -; GCN-NEXT: s_setpc_b64 s[30:31] - %a.fabs = fneg float %a - %b.fabs = fneg float %b - %result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs) +; GFX6-LABEL: v_mul_legacy_fneg_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mul_legacy_fneg_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mul_legacy_fneg_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mul_legacy_fneg_f32: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mul_legacy_fneg_f32: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: s_setpc_b64 s[30:31] + %a.fneg = fneg float %a + %b.fneg = fneg float %b + %result = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg) ret float %result } -; TODO: Should match mac_legacy/mad_legacy define float @v_mad_legacy_f32(float %a, float %b, float %c) { -; GCN-LABEL: v_mad_legacy_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GCN-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: v_mad_legacy_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mac_legacy_f32_e64 v2, v0, v1 +; GFX6-NEXT: v_mov_b32_e32 v0, v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mad_legacy_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mad_legacy_f32 v0, v0, v1, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mad_legacy_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mad_legacy_f32 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mad_legacy_f32: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mac_legacy_f32_e64 v2, v0, v1 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: v_mov_b32_e32 v0, v2 +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mad_legacy_f32: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX103-NEXT: s_setpc_b64 s[30:31] %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) %add = fadd float %mul, %c ret float %add } +define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) { +; GFX6-LABEL: v_mad_legacy_fneg_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mad_legacy_f32 v0, -v0, -v1, v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mad_legacy_fneg_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mad_legacy_f32 v0, -v0, -v1, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mad_legacy_fneg_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mad_legacy_f32 v0, -v0, -v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mad_legacy_fneg_f32: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mad_legacy_f32 v0, -v0, -v1, v2 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mad_legacy_fneg_f32: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX103-NEXT: s_setpc_b64 s[30:31] + %a.fneg = fneg float %a + %b.fneg = fneg float %b + %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg) + %add = fadd float %mul, %c + ret float %add +} + define amdgpu_ps float @s_mul_legacy_f32(float inreg %a, float inreg %b) { -; GCN-LABEL: s_mul_legacy_f32: -; GCN: ; %bb.0: -; GCN-NEXT: v_mov_b32_e32 v0, s1 -; GCN-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 -; GCN-NEXT: ; return to shader part epilog +; GFX6-LABEL: s_mul_legacy_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_mov_b32_e32 v0, s1 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_mul_legacy_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_mul_legacy_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX101-LABEL: s_mul_legacy_f32: +; GFX101: ; %bb.0: +; GFX101-NEXT: v_mul_legacy_f32_e64 v0, s0, s1 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: ; return to shader part epilog +; +; GFX103-LABEL: s_mul_legacy_f32: +; GFX103: ; %bb.0: +; GFX103-NEXT: v_mul_legacy_f32_e64 v0, s0, s1 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: ; return to shader part epilog %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) ret float %result } define float @v_mul_legacy_f32_1.0(float %a) { -; GCN-LABEL: v_mul_legacy_f32_1.0: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: v_mul_legacy_f32_1.0: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mul_legacy_f32_1.0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mul_legacy_f32_1.0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mul_legacy_f32_1.0: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mul_legacy_f32_1.0: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float %a, float 1.0) ret float %result } define float @v_mul_legacy_f32_1.0_swap(float %b) { -; GCN-LABEL: v_mul_legacy_f32_1.0_swap: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: v_mul_legacy_f32_1.0_swap: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mul_legacy_f32_1.0_swap: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mul_legacy_f32_1.0_swap: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mul_legacy_f32_1.0_swap: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mul_legacy_f32_1.0_swap: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float 1.0, float %b) ret float %result } define float @v_mul_legacy_f32_2.0(float %a) { -; GCN-LABEL: v_mul_legacy_f32_2.0: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: v_mul_legacy_f32_2.0: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mul_legacy_f32_2.0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mul_legacy_f32_2.0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mul_legacy_f32_2.0: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mul_legacy_f32_2.0: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float %a, float 2.0) ret float %result } define float @v_mul_legacy_f32_2.0_swap(float %b) { -; GCN-LABEL: v_mul_legacy_f32_2.0_swap: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX6-LABEL: v_mul_legacy_f32_2.0_swap: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_mul_legacy_f32_2.0_swap: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_mul_legacy_f32_2.0_swap: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX101-LABEL: v_mul_legacy_f32_2.0_swap: +; GFX101: ; %bb.0: +; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 +; GFX101-NEXT: ; implicit-def: $vcc_hi +; GFX101-NEXT: s_setpc_b64 s[30:31] +; +; GFX103-LABEL: v_mul_legacy_f32_2.0_swap: +; GFX103: ; %bb.0: +; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 +; GFX103-NEXT: ; implicit-def: $vcc_hi +; GFX103-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float 2.0, float %b) ret float %result } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll @@ -1,9 +1,11 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s - +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX6 %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX8 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX101 %s +; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOMADMACF32,GFX103 %s ; GCN-LABEL: {{^}}test_mul_legacy_f32: -; GCN: v_mul_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} define amdgpu_kernel void @test_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b) #0 { %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) store float %result, float addrspace(1)* %out, align 4 @@ -11,7 +13,7 @@ } ; GCN-LABEL: {{^}}test_mul_legacy_undef0_f32: -; GCN: v_mul_legacy_f32_e32 +; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} define amdgpu_kernel void @test_mul_legacy_undef0_f32(float addrspace(1)* %out, float %a) #0 { %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a) store float %result, float addrspace(1)* %out, align 4 @@ -19,7 +21,7 @@ } ; GCN-LABEL: {{^}}test_mul_legacy_undef1_f32: -; GCN: v_mul_legacy_f32_e32 +; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} define amdgpu_kernel void @test_mul_legacy_undef1_f32(float addrspace(1)* %out, float %a) #0 { %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef) store float %result, float addrspace(1)* %out, align 4 @@ -27,7 +29,7 @@ } ; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32: -; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |v{{[0-9]+}}| +; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, |s{{[0-9]+}}|, |{{[sv][0-9]+}}| define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) @@ -36,10 +38,13 @@ ret void } -; TODO: Should match mac_legacy/mad_legacy ; GCN-LABEL: {{^}}test_mad_legacy_f32: -; GCN: v_mul_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GCN: v_add_f32_e32 +; GFX6: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 { %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) %add = fadd float %mul, %c @@ -47,6 +52,19 @@ ret void } +; GCN-LABEL: {{^}}test_mad_legacy_fneg_f32: +; MADMACF32: v_mad_legacy_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -{{[sv][0-9]+}}, v{{[0-9]+}} +; NOMADMACF32: v_mul_legacy_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}} +; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 { + %a.fneg = fneg float %a + %b.fneg = fneg float %b + %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg) + %add = fadd float %mul, %c + store float %add, float addrspace(1)* %out, align 4 + ret void +} + declare float @llvm.fabs.f32(float) #1 declare float @llvm.amdgcn.fmul.legacy(float, float) #1