Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -174,6 +174,14 @@ bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp) const; + bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp) const; + + bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp) const; + void SelectADD_SUB_I64(SDNode *N); void SelectUADDO_USUBO(SDNode *N); void SelectDIV_SCALE(SDNode *N); @@ -1864,6 +1872,42 @@ return SelectVOP3PMods(In, Src, SrcMods); } +bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + Src = In; + // FIXME: Handle op_sel + SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, + SDValue &SrcMods, + SDValue &Clamp) const { + SDLoc SL(In); + + // FIXME: Handle clamp + Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); + + return SelectVOP3OpSel(In, Src, SrcMods); +} + +bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + // FIXME: Handle op_sel + return SelectVOP3Mods(In, Src, SrcMods); +} + +bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, + SDValue &SrcMods, + SDValue &Clamp) const { + SDLoc SL(In); + + // FIXME: Handle clamp + Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); + + return SelectVOP3OpSelMods(In, Src, SrcMods); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast(getTargetLowering()); Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1060,6 +1060,7 @@ void cvtVOP3(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx); + void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); @@ -2688,7 +2689,7 @@ // FIXME: How to verify the number of elements matches the number of src // operands? - for (int I = 0; I < 3; ++I) { + for (int I = 0; I < 4; ++I) { if (I != 0) { if (getLexer().is(AsmToken::RBrac)) break; @@ -4088,6 +4089,30 @@ return MatchOperand_NoMatch; } +void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { + cvtVOP3P(Inst, Operands); + + int Opc = Inst.getOpcode(); + + int SrcNum; + const int Ops[] = { AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 }; + for (SrcNum = 0; + SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; + ++SrcNum); + assert(SrcNum > 0); + + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + + if ((OpSel & (1 << SrcNum)) != 0) { + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); + Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); + } +} + static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { // 1. This operand is input modifiers return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS @@ -4172,7 +4197,11 @@ int Opc = Inst.getOpcode(); addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); - addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1); + + int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); + if (OpSelHiIdx != -1) { + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1); + } int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); if (NegLoIdx != -1) { @@ -4188,13 +4217,16 @@ AMDGPU::OpName::src2_modifiers }; int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); - int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); - unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); + unsigned OpSelHi = 0; unsigned NegLo = 0; unsigned NegHi = 0; + if (OpSelHiIdx != -1) { + OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); + } + if (NegLoIdx != -1) { int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); NegLo = Inst.getOperand(NegLoIdx).getImm(); Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -127,6 +127,8 @@ const MCSubtargetInfo &STI, raw_ostream &O); void printSDWADstUnused(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod, + raw_ostream &O); void printOpSel(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printOpSelHi(const MCInst *MI, unsigned OpNo, Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -803,7 +803,8 @@ } } -static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod) { +static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod, + bool HasDstSel) { int DefaultValue = (Mod == SISrcMods::OP_SEL_1); for (int I = 0; I < NumOps; ++I) { @@ -811,11 +812,16 @@ return false; } + if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0) + return false; + return true; } -static void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod, - raw_ostream &O) { +void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI, + StringRef Name, + unsigned Mod, + raw_ostream &O) { unsigned Opc = MI->getOpcode(); int NumOps = 0; int Ops[3]; @@ -830,7 +836,12 @@ Ops[NumOps++] = MI->getOperand(Idx).getImm(); } - if (allOpsDefaultValue(Ops, NumOps, Mod)) + const bool HasDstSel = + NumOps > 0 && + Mod == SISrcMods::OP_SEL_0 && + MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL; + + if (allOpsDefaultValue(Ops, NumOps, Mod, HasDstSel)) return; O << Name; @@ -841,6 +852,10 @@ O << !!(Ops[I] & Mod); } + if (HasDstSel) { + O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL); + } + O << ']'; } Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -67,7 +67,8 @@ SCALAR_STORE = UINT64_C(1) << 39, FIXED_SIZE = UINT64_C(1) << 40, VOPAsmPrefer32Bit = UINT64_C(1) << 41, - HasFPClamp = UINT64_C(1) << 42 + HasFPClamp = UINT64_C(1) << 42, + VOP3_OPSEL = UINT64_C(1) << 43 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. @@ -137,7 +138,8 @@ SEXT = 1 << 0, // Integer sign-extend modifier NEG_HI = ABS, // Floating-point negate high packed component modifier. OP_SEL_0 = 1 << 2, - OP_SEL_1 = 1 << 3 + OP_SEL_1 = 1 << 3, + DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1) }; } Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -83,6 +83,10 @@ // the clamp modifier has floating point semantics. field bit FPClamp = 0; + // This bit indicates that this is a VOP3 opcode which supports op_sel + // modifier (gfx9 only). + field bit VOP3_OPSEL = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -127,6 +131,7 @@ let TSFlags{40} = FixedSize; let TSFlags{41} = VOPAsmPrefer32Bit; let TSFlags{42} = FPClamp; + let TSFlags{43} = VOP3_OPSEL; let SchedRW = [Write32Bit]; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -659,6 +659,15 @@ def Int32InputMods : IntInputMods; def Int64InputMods : IntInputMods; +class OpSelModsMatchClass : AsmOperandClass { + let Name = "OpSelMods"; + let ParserMethod = "parseRegOrImm"; + let PredicateMethod = "isRegOrImm"; +} + +def IntOpSelModsMatchClass : OpSelModsMatchClass; +def IntOpSelMods : InputMods; + def FPRegSDWAInputModsMatchClass : AsmOperandClass { let Name = "SDWARegWithFPInputMods"; let ParserMethod = "parseRegWithFPInputMods"; @@ -750,6 +759,11 @@ def VOP3PMods : ComplexPattern; def VOP3PMods0 : ComplexPattern; +def VOP3OpSel : ComplexPattern; +def VOP3OpSel0 : ComplexPattern; + +def VOP3OpSelMods : ComplexPattern; +def VOP3OpSelMods0 : ComplexPattern; //===----------------------------------------------------------------------===// // SI assembler operands @@ -771,6 +785,7 @@ int NEG_HI = ABS; int OP_SEL_0 = 4; int OP_SEL_1 = 8; + int DST_OP_SEL = 8; } def DSTCLAMP { @@ -1020,6 +1035,10 @@ ); } +class getOpSelMod { + Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods); +} + // Return type of input modifiers operand specified input operand for DPP class getSrcModExt { bit isFP = !if(!eq(VT.Value, f16.Value), 1, @@ -1133,6 +1152,37 @@ ); } +class getInsVOP3OpSel { + dag ret = !if (!eq(NumSrcArgs, 2), + !if (HasClamp, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + clampmod:$clamp, + op_sel:$op_sel), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + op_sel:$op_sel)), + // else NumSrcArgs == 3 + !if (HasClamp, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2, + clampmod:$clamp, + op_sel:$op_sel), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2, + op_sel:$op_sel)) + ); +} + class getInsDPP { @@ -1279,6 +1329,34 @@ string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp; } +class getAsmVOP3OpSel { + string dst = " $vdst"; + + string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); + string isrc1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1", + " $src1,")); + string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); + + string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); + string fsrc1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1_modifiers", + " $src1_modifiers,")); + string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); + + string src0 = !if(Src0HasMods, fsrc0, isrc0); + string src1 = !if(Src1HasMods, fsrc1, isrc1); + string src2 = !if(Src2HasMods, fsrc2, isrc2); + + string clamp = !if(HasClamp, "$clamp", ""); + + string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp; +} + class getAsmDPP { string dst = !if(HasDst, !if(!eq(DstVT.Size, 1), @@ -1462,7 +1540,12 @@ field dag InsVOP3P = getInsVOP3P.ret; - + field dag InsVOP3OpSel = getInsVOP3OpSel.ret, + getOpSelMod.ret, + getOpSelMod.ret>.ret; field dag InsDPP = getInsDPP.ret; field dag InsSDWA = getInsSDWA.ret; field string Asm64 = getAsm64.ret; field string AsmVOP3P = getAsmVOP3P.ret; + field string AsmVOP3OpSel = getAsmVOP3OpSel.ret; field string AsmDPP = getAsmDPP.ret; field string AsmSDWA = getAsmSDWA.ret; field string AsmSDWA9 = getAsmSDWA9.ret; @@ -1495,6 +1583,8 @@ def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; +def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; + def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1288,12 +1288,32 @@ (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; +class FP16Med3Pat : Pat< + (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), + (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), + (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), + (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), + (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))), + (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE) +>; + +class Int16Med3Pat : Pat< + (max (min_oneuse vt:$src0, vt:$src1), + (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), + (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) +>; + def : FPMed3Pat; let Predicates = [isGFX9] in { -def : FPMed3Pat; -def : IntMed3Pat; -def : IntMed3Pat; +def : FP16Med3Pat; +def : Int16Med3Pat; +def : Int16Med3Pat; } // End Predicates = [isGFX9] //============================================================================// Index: lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP3Instructions.td +++ lib/Target/AMDGPU/VOP3Instructions.td @@ -49,6 +49,46 @@ ret1)); } +class getVOP3OpSelPat { + list ret3 = [(set P.DstVT:$vdst, + (node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list ret2 = [(set P.DstVT:$vdst, + (node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), + (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list ret1 = [(set P.DstVT:$vdst, + (node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3OpSelModPat { + list ret3 = [(set P.DstVT:$vdst, + (node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list ret2 = [(set P.DstVT:$vdst, + (node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), + (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list ret1 = [(set P.DstVT:$vdst, + (node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + class getVOP3Pat { list ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; list ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]; @@ -63,6 +103,16 @@ !if(P.HasModifiers, getVOP3ModPat.ret, getVOP3Pat.ret), VOP3Only>; +class VOP3OpSelInst : + VOP3_Pseudo.ret, + getVOP3OpSelModPat.ret, + getVOP3OpSelPat.ret), + 1, 0, 1> { + + let AsmMatchConverter = "cvtVOP3OpSel"; +} + // Special case for v_div_fmas_{f32|f64}, since it seems to be the // only VOP instruction that implicitly reads VCC. let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in { @@ -89,6 +139,11 @@ let Asm64 = " " # P.Asm64; } +class VOP3OpSel_Profile : VOP3_Profile

{ + let HasClamp = 1; + let HasOpSel = 1; +} + class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { // v_div_scale_{f32|f64} do not support input modifiers. let HasModifiers = 0; @@ -298,7 +353,7 @@ } // End Predicates = [Has16BitInsts] let SubtargetPredicate = isGFX9 in { -def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile>; +def V_PACK_B32_F16 : VOP3OpSelInst <"v_pack_b32_f16", VOP3OpSel_Profile>; def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile>; def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile>; def V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile>; @@ -308,17 +363,26 @@ def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile>; -def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile, AMDGPUfmed3>; -def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile, AMDGPUsmed3>; -def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile, AMDGPUumed3>; +def V_MED3_F16 : VOP3OpSelInst <"v_med3_f16", VOP3OpSel_Profile, AMDGPUfmed3>; +def V_MED3_I16 : VOP3OpSelInst <"v_med3_i16", VOP3OpSel_Profile, AMDGPUsmed3>; +def V_MED3_U16 : VOP3OpSelInst <"v_med3_u16", VOP3OpSel_Profile, AMDGPUumed3>; -def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile, AMDGPUfmin3>; -def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile, AMDGPUsmin3>; -def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile, AMDGPUumin3>; +def V_MIN3_F16 : VOP3OpSelInst <"v_min3_f16", VOP3OpSel_Profile, AMDGPUfmin3>; +def V_MIN3_I16 : VOP3OpSelInst <"v_min3_i16", VOP3OpSel_Profile, AMDGPUsmin3>; +def V_MIN3_U16 : VOP3OpSelInst <"v_min3_u16", VOP3OpSel_Profile, AMDGPUumin3>; -def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile, AMDGPUfmax3>; -def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile, AMDGPUsmax3>; -def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile, AMDGPUumax3>; +def V_MAX3_F16 : VOP3OpSelInst <"v_max3_f16", VOP3OpSel_Profile, AMDGPUfmax3>; +def V_MAX3_I16 : VOP3OpSelInst <"v_max3_i16", VOP3OpSel_Profile, AMDGPUsmax3>; +def V_MAX3_U16 : VOP3OpSelInst <"v_max3_u16", VOP3OpSel_Profile, AMDGPUumax3>; + +def V_ADD_I16 : VOP3OpSelInst <"v_add_i16", VOP3OpSel_Profile>; +def V_SUB_I16 : VOP3OpSelInst <"v_sub_i16", VOP3OpSel_Profile>; + +def V_MAD_U32_U16 : VOP3OpSelInst <"v_mad_u32_u16", VOP3OpSel_Profile>; +def V_MAD_I32_I16 : VOP3OpSelInst <"v_mad_i32_i16", VOP3OpSel_Profile>; + +def V_CVT_PKNORM_I16_F16 : VOP3OpSelInst <"v_cvt_pknorm_i16_f16", VOP3OpSel_Profile>; +def V_CVT_PKNORM_U16_F16 : VOP3OpSelInst <"v_cvt_pknorm_u16_f16", VOP3OpSel_Profile>; } // End SubtargetPredicate = isGFX9 @@ -438,6 +502,11 @@ VOP3be_vi (NAME).Pfl>; } +multiclass VOP3OpSel_Real_gfx9 op> { + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3OpSel_gfx9 (NAME).Pfl>; +} + } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; @@ -522,18 +591,27 @@ defm V_LSHL_OR_B32 : VOP3_Real_vi <0x200>; defm V_AND_OR_B32 : VOP3_Real_vi <0x201>; defm V_OR3_B32 : VOP3_Real_vi <0x202>; -defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>; +defm V_PACK_B32_F16 : VOP3OpSel_Real_gfx9 <0x2a0>; defm V_XAD_U32 : VOP3_Real_vi <0x1f3>; -defm V_MIN3_F16 : VOP3_Real_vi <0x1f4>; -defm V_MIN3_I16 : VOP3_Real_vi <0x1f5>; -defm V_MIN3_U16 : VOP3_Real_vi <0x1f6>; +defm V_MIN3_F16 : VOP3OpSel_Real_gfx9 <0x1f4>; +defm V_MIN3_I16 : VOP3OpSel_Real_gfx9 <0x1f5>; +defm V_MIN3_U16 : VOP3OpSel_Real_gfx9 <0x1f6>; + +defm V_MAX3_F16 : VOP3OpSel_Real_gfx9 <0x1f7>; +defm V_MAX3_I16 : VOP3OpSel_Real_gfx9 <0x1f8>; +defm V_MAX3_U16 : VOP3OpSel_Real_gfx9 <0x1f9>; + +defm V_MED3_F16 : VOP3OpSel_Real_gfx9 <0x1fa>; +defm V_MED3_I16 : VOP3OpSel_Real_gfx9 <0x1fb>; +defm V_MED3_U16 : VOP3OpSel_Real_gfx9 <0x1fc>; + +defm V_ADD_I16 : VOP3OpSel_Real_gfx9 <0x29e>; +defm V_SUB_I16 : VOP3OpSel_Real_gfx9 <0x29f>; -defm V_MAX3_F16 : VOP3_Real_vi <0x1f7>; -defm V_MAX3_I16 : VOP3_Real_vi <0x1f8>; -defm V_MAX3_U16 : VOP3_Real_vi <0x1f9>; +defm V_MAD_U32_U16 : VOP3OpSel_Real_gfx9 <0x1f1>; +defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx9 <0x1f2>; -defm V_MED3_F16 : VOP3_Real_vi <0x1fa>; -defm V_MED3_I16 : VOP3_Real_vi <0x1fb>; -defm V_MED3_U16 : VOP3_Real_vi <0x1fc>; +defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>; +defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>; Index: lib/Target/AMDGPU/VOPInstructions.td =================================================================== --- lib/Target/AMDGPU/VOPInstructions.td +++ lib/Target/AMDGPU/VOPInstructions.td @@ -65,8 +65,13 @@ } class VOP3_Pseudo pattern = [], - bit VOP3Only = 0, bit isVOP3P = 0> : - InstSI , + bit VOP3Only = 0, bit isVOP3P = 0, bit isVop3OpSel = 0> : + InstSI , VOP , SIMCInstr, MnemonicAlias { @@ -74,9 +79,12 @@ let isPseudo = 1; let isCodeGenOnly = 1; let UseNamedOperandTable = 1; + let VOP3_OPSEL = isVop3OpSel; string Mnemonic = opName; - string AsmOperands = !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64); + string AsmOperands = !if(isVop3OpSel, + P.AsmVOP3OpSel, + !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64)); let Size = 8; let mayLoad = 0; @@ -144,11 +152,11 @@ VOP3_Real; class VOP3a : Enc64 { - bits<2> src0_modifiers; + bits<4> src0_modifiers; bits<9> src0; - bits<2> src1_modifiers; + bits<3> src1_modifiers; bits<9> src1; - bits<2> src2_modifiers; + bits<3> src2_modifiers; bits<9> src2; bits<1> clamp; bits<2> omod; @@ -187,6 +195,13 @@ let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); } +class VOP3OpSel_gfx9 op, VOPProfile P> : VOP3e_vi { + let Inst{11} = !if(P.HasSrc0, src0_modifiers{2}, 0); + let Inst{12} = !if(P.HasSrc1, src1_modifiers{2}, 0); + let Inst{13} = !if(P.HasSrc2, src2_modifiers{2}, 0); + let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0); +} + class VOP3be : Enc64 { bits<8> vdst; bits<2> src0_modifiers; Index: test/MC/AMDGPU/vop3-gfx9.s =================================================================== --- test/MC/AMDGPU/vop3-gfx9.s +++ test/MC/AMDGPU/vop3-gfx9.s @@ -31,6 +31,15 @@ // GFX9: v_pack_b32_f16 v1, v2, v3 ; encoding: [0x01,0x00,0xa0,0xd2,0x02,0x07,0x02,0x00] // NOVI: :1: error: instruction not supported on this GPU +v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] +// GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0xa0,0xd2,0x01,0x05,0x02,0x00] + +v_pack_b32_f16 v5, v1, v2 op_sel:[0,1,0] +// GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[0,1,0] ; encoding: [0x05,0x10,0xa0,0xd2,0x01,0x05,0x02,0x00] + +v_pack_b32_f16 v5, v1, v2 op_sel:[0,0,1] +// GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[0,0,1] ; encoding: [0x05,0x40,0xa0,0xd2,0x01,0x05,0x02,0x00] + v_xad_u32 v1, v2, v3, v4 // GFX9: v_xad_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf3,0xd1,0x02,0x07,0x12,0x04] // NOVI: :1: error: instruction not supported on this GPU @@ -51,10 +60,46 @@ // GFX9: v_max3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf7,0xd1,0x02,0x07,0x12,0x04] // NOVI: :1: error: instruction not supported on this GPU +v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,0] +// GFX9: v_max3_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf7,0xd1,0x01,0x05,0x0e,0x04] + +v_max3_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf7,0xd1,0x01,0x05,0x0e,0x04] + +v_max3_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] +// GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf7,0xd1,0x01,0x05,0x0e,0x04] + +v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] +// GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf7,0xd1,0x01,0x05,0x0e,0x04] + +v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf7,0xd1,0x01,0x05,0x0e,0x04] + +v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x04] + v_max3_i16 v1, v2, v3, v4 // GFX9: v_max3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf8,0xd1,0x02,0x07,0x12,0x04] // NOVI: :1: error: instruction not supported on this GPU +v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,0] +// GFX9: v_max3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04] + +v_max3_i16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf8,0xd1,0x01,0x05,0x0e,0x04] + +v_max3_i16 v5, v1, v2, v3 op_sel:[0,1,0,0] +// GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf8,0xd1,0x01,0x05,0x0e,0x04] + +v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,1,0] +// GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf8,0xd1,0x01,0x05,0x0e,0x04] + +v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf8,0xd1,0x01,0x05,0x0e,0x04] + +v_max3_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf8,0xd1,0x01,0x05,0x0e,0x04] + v_max3_u16 v1, v2, v3, v4 // GFX9: v_max3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf9,0xd1,0x02,0x07,0x12,0x04] // NOVI: :1: error: instruction not supported on this GPU @@ -70,3 +115,78 @@ v_med3_u16 v1, v2, v3, v4 // GFX9: v_med3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfc,0xd1,0x02,0x07,0x12,0x04] // NOVI: :1: error: instruction not supported on this GPU + +v_mad_u32_u16 v5, v1, v2, v3 +// GFX9: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf1,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf1,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,1,0,0] +// GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf1,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,1,0] +// GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf1,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf1,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf1,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_i32_i16 v5, v1, v2, v3 +// GFX9: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf2,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_i32_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_mad_i32_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf2,0xd1,0x01,0x05,0x0e,0x04] + +v_cvt_pknorm_i16_f16 v5, v1, v2 +// GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x00] + +v_cvt_pknorm_i16_f16 v5, -v1, v2 +// GFX9: v_cvt_pknorm_i16_f16 v5, -v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x20] + +v_cvt_pknorm_i16_f16 v5, v1, -v2 +// GFX9: v_cvt_pknorm_i16_f16 v5, v1, -v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x40] + +v_cvt_pknorm_i16_f16 v5, -v1, -v2 +// GFX9: v_cvt_pknorm_i16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x60] + +v_cvt_pknorm_i16_f16 v5, |v1|, v2 +// GFX9: v_cvt_pknorm_i16_f16 v5, |v1|, v2 ; encoding: [0x05,0x01,0x99,0xd2,0x01,0x05,0x02,0x00] + +v_cvt_pknorm_i16_f16 v5, v1, |v2| +// GFX9: v_cvt_pknorm_i16_f16 v5, v1, |v2| ; encoding: [0x05,0x02,0x99,0xd2,0x01,0x05,0x02,0x00] + +v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[0,0,0] +// GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x00] + +v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,0,0] +// GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x99,0xd2,0x01,0x05,0x02,0x00] + +v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,1,1] +// GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x99,0xd2,0x01,0x05,0x02,0x00] + +v_cvt_pknorm_u16_f16 v5, -v1, -v2 +// GFX9: v_cvt_pknorm_u16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x9a,0xd2,0x01,0x05,0x02,0x60] + +v_cvt_pknorm_u16_f16 v5, |v1|, |v2| +// GFX9: v_cvt_pknorm_u16_f16 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x9a,0xd2,0x01,0x05,0x02,0x00] + +v_cvt_pknorm_u16_f16 v5, v1, v2 op_sel:[1,1,1] +// GFX9: v_cvt_pknorm_u16_f16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9a,0xd2,0x01,0x05,0x02,0x00] + +v_add_i16 v5, v1, v2 +// GFX9: v_add_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x9e,0xd2,0x01,0x05,0x02,0x00] + +v_add_i16 v5, v1, v2 op_sel:[1,1,1] +// GFX9: v_add_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9e,0xd2,0x01,0x05,0x02,0x00] + +v_sub_i16 v5, v1, v2 +// GFX9: v_sub_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x9f,0xd2,0x01,0x05,0x02,0x00] + +v_sub_i16 v5, v1, v2 op_sel:[1,1,1] +// GFX9: v_sub_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9f,0xd2,0x01,0x05,0x02,0x00] + +v_sub_i16 v5, v1, v2 clamp +// GFX9: v_sub_i16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x9f,0xd2,0x01,0x05,0x02,0x00] Index: test/MC/AMDGPU/vop3p-err.s =================================================================== --- test/MC/AMDGPU/vop3p-err.s +++ test/MC/AMDGPU/vop3p-err.s @@ -36,8 +36,8 @@ // GFX9: 35: error: failed parsing operand. v_pk_add_u16 v1, v2, v3 op_sel:[0,-1] -// GFX9: 40: error: not a valid operand. -v_pk_add_u16 v1, v2, v3 op_sel:[0,0,0,0] +// GFX9: 42: error: not a valid operand. +v_pk_add_u16 v1, v2, v3 op_sel:[0,0,0,0,0] // XXGFX9: invalid operand for instruction v_pk_add_u16 v1, v2, v3 neg_lo:[0,0]