diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1321,6 +1321,10 @@ def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">, AssemblerPredicate<(all_of FeaturePackedFP32Ops)>; +def HasFmaakFmamkF32Insts : + Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, + AssemblerPredicate<(any_of FeatureGFX10Insts)>; + def HasExtendedImageInsts : Predicate<"Subtarget->hasExtendedImageInsts()">, AssemblerPredicate<(all_of FeatureExtendedImageInsts)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -838,6 +838,10 @@ return HasPackedFP32Ops; } + bool hasFmaakFmamkF32Insts() const { + return getGeneration() >= GFX10; + } + bool hasExtendedImageInsts() const { return HasExtendedImageInsts; } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -749,17 +749,21 @@ } } // End AddedComplexity = 30 +let SubtargetPredicate = HasFmaakFmamkF32Insts in { +def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; + +let isCommutable = 1 in +def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">; +} + let SubtargetPredicate = isGFX10Plus in { -def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; -let FPDPRounding = 1 in +let FPDPRounding = 1 in { def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; -let isCommutable = 1 in { -def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">; -let FPDPRounding = 1 in +let isCommutable = 1 in def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; -} // End isCommutable = 1 +} // End FPDPRounding = 1 let Constraints = "$vdst = $src2", DisableEncoding="$src2",