diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -6,159 +6,6 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// VOP3 Classes -//===----------------------------------------------------------------------===// - -class getVOP3ModPat { - dag src0 = !if(P.HasOMod, - (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), - (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); - - list ret3 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT src0), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))]; - - list ret2 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT src0), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))]; - - list ret1 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT src0)))]; - - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3PModPat { - dag src0_dag = (P.Src0VT (SrcPat P.Src0VT:$src0, i32:$src0_modifiers)); - dag src1_dag = (P.Src1VT (SrcPat P.Src1VT:$src1, i32:$src1_modifiers)); - dag src2_dag = (P.Src2VT (SrcPat P.Src2VT:$src2, i32:$src2_modifiers)); - dag clamp_dag = (i1 timm:$clamp); - - list ret3 = [(set P.DstVT:$vdst, - !if(HasExplicitClamp, - (DivergentFragOrOp.ret src0_dag, src1_dag, src2_dag, clamp_dag), - (DivergentFragOrOp.ret src0_dag, src1_dag, src2_dag)))]; - - list ret2 = [(set P.DstVT:$vdst, - !if(HasExplicitClamp, - (DivergentFragOrOp.ret src0_dag, src1_dag, clamp_dag), - (DivergentFragOrOp.ret src0_dag, src1_dag)))]; - - list ret1 = [(set P.DstVT:$vdst, - !if(HasExplicitClamp, - (DivergentFragOrOp.ret src0_dag, clamp_dag), - (DivergentFragOrOp.ret src0_dag)))]; - - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3OpSelPat { - list ret3 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)), - (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))]; - - list ret2 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)), - (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))]; - - list ret1 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))))]; - - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3OpSelModPat { - list ret3 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers), - (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), - (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))]; - - list ret2 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers)), - (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), - (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))]; - - list ret1 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))))]; - - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3Pat { - list ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; - list ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0, P.Src1VT:$src1))]; - list ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0))]; - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3ClampPat { - list ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i1:$clamp))]; - list ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, i1:$clamp))]; - list ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, i1:$clamp))]; - list ret = !if(!eq(P.NumSrcArgs, 3), ret3, - !if(!eq(P.NumSrcArgs, 2), ret2, - ret1)); -} - -class getVOP3MAIPat { - list ret = !if(!eq(P.Src0VT, P.Src1VT), - // mfma - [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, - timm:$cbsz, timm:$abid, timm:$blgp))], - // smfmac - [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i32:$idx, - timm:$cbsz, timm:$abid))]); -} - -// Consistently gives instructions a _e64 suffix. -multiclass VOP3Inst_Pseudo_Wrapper pattern = []> { - def _e64 : VOP3_Pseudo; -} - -class VOP3InstBase : - VOP3_Pseudo.ret, - getVOP3OpSelPat.ret), - !if(P.HasModifiers, - getVOP3ModPat.ret, - !if(P.HasIntClamp, - getVOP3ClampPat.ret, - !if (P.IsMAI, - getVOP3MAIPat.ret, - getVOP3Pat.ret)))), - 0, P.HasOpSel> { - - let IntClamp = P.HasIntClamp; - let AsmMatchConverter = - !if(P.HasOpSel, - "cvtVOP3OpSel", - !if(!or(P.HasModifiers, P.HasOMod, P.HasIntClamp), - "cvtVOP3", - "")); -} - -multiclass VOP3Inst { - def _e64 : VOP3InstBase; -} - // Special case for v_div_fmas_{f32|f64}, since it seems to be the // only VOP instruction that implicitly reads VCC. let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in { @@ -170,30 +17,6 @@ } } -class VOP3Features { - bit HasClamp = Clamp; - bit HasOpSel = OpSel; - bit IsPacked = Packed; - bit IsMAI = MAI; -} - -def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>; -def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>; -def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>; -def VOP3_PACKED : VOP3Features<1, 1, 1, 0>; -def VOP3_MAI : VOP3Features<0, 0, 0, 1>; - -class VOP3_Profile : VOPProfile { - - let HasClamp = !if(Features.HasClamp, 1, P.HasClamp); - let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); - let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); - let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); - - let HasModifiers = !if(Features.IsMAI, 0, !or(Features.IsPacked, P.HasModifiers)); - let IsSingle = 1; -} - class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod"; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -857,6 +857,183 @@ DSTCLAMP.ENABLE) >; +//===----------------------------------------------------------------------===// +// VOP3 Classes +//===----------------------------------------------------------------------===// + +class getVOP3ModPat { + dag src0 = !if(P.HasOMod, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); + + list ret3 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT src0), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list ret2 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT src0), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list ret1 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT src0)))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3PModPat { + dag src0_dag = (P.Src0VT (SrcPat P.Src0VT:$src0, i32:$src0_modifiers)); + dag src1_dag = (P.Src1VT (SrcPat P.Src1VT:$src1, i32:$src1_modifiers)); + dag src2_dag = (P.Src2VT (SrcPat P.Src2VT:$src2, i32:$src2_modifiers)); + dag clamp_dag = (i1 timm:$clamp); + + list ret3 = [(set P.DstVT:$vdst, + !if(HasExplicitClamp, + (DivergentFragOrOp.ret src0_dag, src1_dag, src2_dag, clamp_dag), + (DivergentFragOrOp.ret src0_dag, src1_dag, src2_dag)))]; + + list ret2 = [(set P.DstVT:$vdst, + !if(HasExplicitClamp, + (DivergentFragOrOp.ret src0_dag, src1_dag, clamp_dag), + (DivergentFragOrOp.ret src0_dag, src1_dag)))]; + + list ret1 = [(set P.DstVT:$vdst, + !if(HasExplicitClamp, + (DivergentFragOrOp.ret src0_dag, clamp_dag), + (DivergentFragOrOp.ret src0_dag)))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3OpSelPat { + list ret3 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)), + (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list ret2 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)), + (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list ret1 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3OpSelModPat { + list ret3 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers), + (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list ret2 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers)), + (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list ret1 = [(set P.DstVT:$vdst, + (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3Pat { + list ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; + list ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0, P.Src1VT:$src1))]; + list ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0))]; + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3ClampPat { + list ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i1:$clamp))]; + list ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, i1:$clamp))]; + list ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, i1:$clamp))]; + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3MAIPat { + list ret = !if(!eq(P.Src0VT, P.Src1VT), + // mfma + [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, + timm:$cbsz, timm:$abid, timm:$blgp))], + // smfmac + [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i32:$idx, + timm:$cbsz, timm:$abid))]); +} + +class VOP3Features { + bit HasClamp = Clamp; + bit HasOpSel = OpSel; + bit IsPacked = Packed; + bit IsMAI = MAI; +} + +def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>; +def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>; +def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>; +def VOP3_PACKED : VOP3Features<1, 1, 1, 0>; +def VOP3_MAI : VOP3Features<0, 0, 0, 1>; + +class VOP3_Profile : VOPProfile { + + let HasClamp = !if(Features.HasClamp, 1, P.HasClamp); + let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); + let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); + let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); + + let HasModifiers = !if(Features.IsMAI, 0, !or(Features.IsPacked, P.HasModifiers)); + let IsSingle = 1; +} + +// Consistently gives instructions a _e64 suffix. +multiclass VOP3Inst_Pseudo_Wrapper pattern = []> { + def _e64 : VOP3_Pseudo; +} + +class VOP3InstBase : + VOP3_Pseudo.ret, + getVOP3OpSelPat.ret), + !if(P.HasModifiers, + getVOP3ModPat.ret, + !if(P.HasIntClamp, + getVOP3ClampPat.ret, + !if (P.IsMAI, + getVOP3MAIPat.ret, + getVOP3Pat.ret)))), + 0, P.HasOpSel> { + + let IntClamp = P.HasIntClamp; + let AsmMatchConverter = + !if(P.HasOpSel, + "cvtVOP3OpSel", + !if(!or(P.HasModifiers, P.HasOMod, P.HasIntClamp), + "cvtVOP3", + "")); +} + +multiclass VOP3Inst { + def _e64 : VOP3InstBase; +} + include "VOPCInstructions.td" include "VOP1Instructions.td"