Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -1083,6 +1083,11 @@ RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32); } +// Returns the register class to use for DPP source operands. +class getDPPSrcForVT { + RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32); +} + // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT { @@ -1150,6 +1155,34 @@ /* endif */ ))); } +class getInsDPP { + + dag ret = !if (!eq(NumSrcArgs, 1), + !if (!eq(HasModifiers, 1), + // VOP1_DPP with modifiers + (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, + i32imm:$dpp_ctrl, i1imm:$bound_ctrl, + i32imm:$bank_mask, i32imm:$row_mask) + /* else */, + // VOP1_DPP without modifiers + (ins Src0RC:$src0, i32imm:$dpp_ctrl, i1imm:$bound_ctrl, + i32imm:$bank_mask, i32imm:$row_mask) + /* endif */) + /* NumSrcArgs == 2 */, + !if (!eq(HasModifiers, 1), + // VOP2_DPP with modifiers + (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, + InputModsNoDefault:$src1_modifiers, Src1RC:$src1, + i32imm:$dpp_ctrl, i1imm:$bound_ctrl, + i32imm:$bank_mask, i32imm:$row_mask) + /* else */, + // VOP2_DPP without modifiers + (ins Src0RC:$src0, Src1RC:$src1, i32imm:$dpp_ctrl, i1imm:$bound_ctrl, + i32imm:$bank_mask, i32imm:$row_mask) + /* endif */)); +} + // Returns the assembly string for the inputs and outputs of a VOP[12C] // instruction. This does not add the _e32 suffix, so it can be reused // by getAsm64. @@ -1178,6 +1211,17 @@ "$dst, "#src0#src1#src2#"$clamp"#"$omod"); } +class getAsmDPP { + string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); + string src1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1_modifiers", + " $src1_modifiers,")); + string args = !if(!eq(HasModifiers, 0), + getAsm32<0, NumSrcArgs>.ret, + src0#src1); + string ret = " $dst"#args#", $dpp_ctrl, "#"$bound_ctrl, "#"$bank_mask, "#"$row_mask"; +} + class VOPProfile _ArgVT> { field list ArgVT = _ArgVT; @@ -1187,11 +1231,14 @@ field ValueType Src1VT = ArgVT[2]; field ValueType Src2VT = ArgVT[3]; field RegisterOperand DstRC = getVALUDstForVT.ret; + field RegisterClass DstRCDPP = !if(!eq(DstVT.Size, 64), VReg_64, VGPR_32); field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; field RegisterClass Src1RC32 = getVOPSrc1ForVT.ret; field RegisterOperand Src0RC64 = getVOP3SrcForVT.ret; field RegisterOperand Src1RC64 = getVOP3SrcForVT.ret; field RegisterOperand Src2RC64 = getVOP3SrcForVT.ret; + field RegisterClass Src0DPP = getDPPSrcForVT.ret; + field RegisterClass Src1DPP = getDPPSrcForVT.ret; field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); field bit HasDst32 = HasDst; @@ -1204,13 +1251,16 @@ // output. This is manually overridden for them. field dag Outs32 = Outs; field dag Outs64 = Outs; + field dag OutsDPP = (outs DstRCDPP:$dst); field dag Ins32 = getIns32.ret; field dag Ins64 = getIns64.ret; + field dag InsDPP = getInsDPP.ret; field string Asm32 = getAsm32.ret; field string Asm64 = getAsm64.ret; + field string AsmDPP = getAsmDPP.ret; } // FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order @@ -1428,6 +1478,16 @@ } +class VOP1_DPP : + VOP1_DPPe , + VOP_DPP { + // FIXME: remove when we are using the correct names for the encoding fields. + field bit vdst = 0; + let AssemblerPredicates = [isVI]; + let src0_modifiers = !if(p.HasModifiers, ?, 0); + let src1_modifiers = 0; +} + multiclass VOP1SI_m pattern, string asm = opName#p.Asm32> { @@ -1675,6 +1735,8 @@ defm _e64 : VOP3_1_m ; + + def _dpp : VOP1_DPP ; } multiclass VOP1Inst pattern> : + VOPAnyCommon { + let Size = 8; +} + +class VOP_DPPe : Enc64 { + bits<2> src0_modifiers; + bits<8> src0; + bits<2> src1_modifiers; + bits<9> dpp_ctrl; + bits<1> bound_ctrl; + bits<4> bank_mask; + bits<4> row_mask; + + let Inst{39-32} = src0; + let Inst{48-40} = dpp_ctrl; + let Inst{51} = bound_ctrl; + let Inst{52} = src0_modifiers{0}; // src0_neg + let Inst{53} = src0_modifiers{1}; // src0_abs + let Inst{54} = src1_modifiers{0}; // src1_neg + let Inst{55} = src1_modifiers{1}; // src1_abs + let Inst{59-56} = bank_mask; + let Inst{63-60} = row_mask; +} + +class VOP1_DPPe op> : VOP_DPPe { + bits<8> dst; + + let Inst{8-0} = 0xfa; // dpp + let Inst{16-9} = op; + let Inst{24-17} = dst; + let Inst{31-25} = 0x3f; //encoding +} + +class VOP2_DPPe op> : Enc32 { + bits<8> dst; + bits<8> src1; + + let Inst{8-0} = 0xfa; //dpp + let Inst{16-9} = src1; + let Inst{24-17} = dst; + let Inst{30-25} = op; + let Inst{31} = 0x0; //encoding +} + class EXPe_vi : EXPe { let Inst{31-26} = 0x31; //encoding }