diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -536,6 +536,12 @@ "Has v_fmac_f32 and v_xnor_b32 instructions" >; +def FeatureVFmacF64Inst : SubtargetFeature<"vfmacf64-inst", + "HasVFmacF64Inst", + "true", + "Has v_fmac_f64 instruction" +>; + def FeatureDot1Insts : SubtargetFeature<"dot1-insts", "HasDot1Insts", "true", @@ -1114,6 +1120,7 @@ FeatureFmaMixInsts, FeatureLDSBankCount32, FeatureDLInsts, + FeatureVFmacF64Inst, FeatureDot1Insts, FeatureDot2Insts, FeatureDot3Insts, @@ -1152,6 +1159,7 @@ FeatureFmaMixInsts, FeatureLDSBankCount32, FeatureDLInsts, + FeatureVFmacF64Inst, FeatureDot1Insts, FeatureDot2Insts, FeatureDot3Insts, @@ -1722,6 +1730,9 @@ def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">, AssemblerPredicate<(all_of FeatureDLInsts)>; +def HasVFmacF64Inst : Predicate<"Subtarget->hasVFmacF64Inst()">, + AssemblerPredicate<(all_of FeatureVFmacF64Inst)>; + def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">, AssemblerPredicate<(all_of FeatureDot1Insts)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -136,6 +136,7 @@ bool GFX10_AEncoding = false; bool GFX10_BEncoding = false; bool HasDLInsts = false; + bool HasVFmacF64Inst = false; bool HasDot1Insts = false; bool HasDot2Insts = false; bool HasDot3Insts = false; @@ -698,6 +699,8 @@ return HasDLInsts; } + bool hasVFmacF64Inst() const { return HasVFmacF64Inst; } + bool hasDot1Insts() const { return HasDot1Insts; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2819,7 +2819,7 @@ >; } -let SubtargetPredicate = isGFX90APlus in +let OtherPredicates = [HasVFmacF64Inst] in // Don't allow source modifiers. If there are any source modifiers then it's // better to select fma instead of fmac. def : GCNPat < diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1013,7 +1013,7 @@ } // End SubtargetPredicate = HasFmaLegacy32 -let SubtargetPredicate = isGFX90APlus, +let SubtargetPredicate = HasVFmacF64Inst, Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1, @@ -2280,12 +2280,13 @@ } } // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" -let SubtargetPredicate = isGFX90APlus in { +let SubtargetPredicate = HasVFmacF64Inst in { defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; - let IsSingle = 1 in { - defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; - } -} // End SubtargetPredicate = isGFX90APlus +} // End SubtargetPredicate = HasVFmacF64Inst + +let SubtargetPredicate = isGFX90APlus, IsSingle = 1 in { + defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; +} let SubtargetPredicate = HasFmaakFmamkF32Insts in { defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>;