Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -74,6 +74,8 @@ ImmTyDppRowMask, ImmTyDppBankMask, ImmTyDppBoundCtrl, + ImmTySdwaSel, + ImmTySdwaDstUnused, ImmTyDMask, ImmTyUNorm, ImmTyDA, @@ -252,6 +254,14 @@ return isImmTy(ImmTyDppBoundCtrl); } + bool isSDWASel() const { + return isImmTy(ImmTySdwaSel); + } + + bool isSDWADstUnused() const { + return isImmTy(ImmTySdwaDstUnused); + } + void setModifiers(unsigned Mods) { assert(isReg() || (isImm() && Imm.Modifiers == 0)); if (isReg()) @@ -520,6 +530,7 @@ OperandMatchResultTy parseOptionalOps( const ArrayRef &OptionalOps, OperandVector &Operands); + OperandMatchResultTy parseStringWithPrefix(const char *Prefix, StringRef &Value); void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); @@ -565,6 +576,9 @@ void cvtDPP_mod(MCInst &Inst, const OperandVector &Operands); void cvtDPP_nomod(MCInst &Inst, const OperandVector &Operands); void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool HasMods); + + OperandMatchResultTy parseSDWASel(OperandVector &Operands); + OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); }; struct OptionalOperand { @@ -1392,6 +1406,30 @@ return MatchOperand_NoMatch; } +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseStringWithPrefix(const char *Prefix, StringRef &Value) { + if (getLexer().isNot(AsmToken::Identifier)) { + return MatchOperand_NoMatch; + } + StringRef Tok = Parser.getTok().getString(); + if (Tok != Prefix) { + return MatchOperand_NoMatch; + } + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) { + return MatchOperand_ParseFail; + } + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Identifier)) { + return MatchOperand_ParseFail; + } + + Value = Parser.getTok().getString(); + return MatchOperand_Success; +} + //===----------------------------------------------------------------------===// // ds //===----------------------------------------------------------------------===// @@ -2202,6 +2240,80 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); } +//===----------------------------------------------------------------------===// +// sdwa +//===----------------------------------------------------------------------===// + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseSDWASel(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + StringRef Value; + AMDGPUAsmParser::OperandMatchResultTy res; + + res = parseStringWithPrefix("dst_sel", Value); + if (res == MatchOperand_ParseFail) { + return MatchOperand_ParseFail; + } else if (res == MatchOperand_NoMatch) { + res = parseStringWithPrefix("src0_sel", Value); + if (res == MatchOperand_ParseFail) { + return MatchOperand_ParseFail; + } else if (res == MatchOperand_NoMatch) { + res = parseStringWithPrefix("src1_sel", Value); + if (res != MatchOperand_Success) { + return res; + } + } + } + + int64_t Int; + Int = StringSwitch(Value) + .Case("BYTE_0", 0) + .Case("BYTE_1", 1) + .Case("BYTE_2", 2) + .Case("BYTE_3", 3) + .Case("WORD_0", 4) + .Case("WORD_1", 5) + .Case("DWORD", 6) + .Default(0xffffffff); + Parser.Lex(); // eat last token + + if (Int == 0xffffffff) { + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(Int, S, + AMDGPUOperand::ImmTySdwaSel)); + return MatchOperand_Success; +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + StringRef Value; + AMDGPUAsmParser::OperandMatchResultTy res; + + res = parseStringWithPrefix("dst_unused", Value); + if (res != MatchOperand_Success) { + return res; + } + + int64_t Int; + Int = StringSwitch(Value) + .Case("UNUSED_PAD", 0) + .Case("UNUSED_SEXT", 1) + .Case("UNUSED_PRESERVE", 2) + .Default(0xffffffff); + Parser.Lex(); // eat last token + + if (Int == 0xffffffff) { + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(Int, S, + AMDGPUOperand::ImmTySdwaDstUnused)); + return MatchOperand_Success; +} + /// Force static initialization. extern "C" void LLVMInitializeAMDGPUAsmParser() { Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -67,6 +67,11 @@ void printRowMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printBankMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printBoundCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSDWADstSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSDWASrc1Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSDWADstUnused(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -282,6 +282,8 @@ O << "_e64 "; else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP) O << "_dpp "; + else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SDWA) + O << "_sdwa "; else O << "_e32 "; @@ -479,6 +481,51 @@ } } +void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNo).getImm(); + switch (Imm) { + case 0: O << "BYTE_0"; break; + case 1: O << "BYTE_1"; break; + case 2: O << "BYTE_2"; break; + case 3: O << "BYTE_3"; break; + case 4: O << "WORD_0"; break; + case 5: O << "WORD_1"; break; + case 6: O << "DWORD"; break; + default: llvm_unreachable("Invalid SDWA data select operand"); + } +} + +void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << "dst_sel:"; + printSDWASel(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << "src0_sel:"; + printSDWASel(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << "src1_sel:"; + printSDWASel(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << "dst_unused:"; + unsigned Imm = MI->getOperand(OpNo).getImm(); + switch (Imm) { + case 0: O << "UNUSED_PAD"; break; + case 1: O << "UNUSED_SEXT"; break; + case 2: O << "UNUSED_PRESERVE"; break; + default: llvm_unreachable("Invalid SDWA dest_unused operand"); + } +} + void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -29,17 +29,18 @@ VOP2 = 1 << 11, VOP3 = 1 << 12, VOPC = 1 << 13, - DPP = 1 << 14, - - MUBUF = 1 << 15, - MTBUF = 1 << 16, - SMRD = 1 << 17, - DS = 1 << 18, - MIMG = 1 << 19, - FLAT = 1 << 20, - WQM = 1 << 21, - VGPRSpill = 1 << 22, - VOPAsmPrefer32Bit = 1 << 23 + SDWA = 1 << 14, + DPP = 1 << 15, + + MUBUF = 1 << 16, + MTBUF = 1 << 17, + SMRD = 1 << 18, + DS = 1 << 19, + MIMG = 1 << 20, + FLAT = 1 << 21, + WQM = 1 << 22, + VGPRSpill = 1 << 23, + VOPAsmPrefer32Bit = 1 << 24 }; } Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -31,6 +31,7 @@ field bits<1> VOP2 = 0; field bits<1> VOP3 = 0; field bits<1> VOPC = 0; + field bits<1> SDWA = 0; field bits<1> DPP = 0; field bits<1> MUBUF = 0; @@ -64,17 +65,18 @@ let TSFlags{11} = VOP2; let TSFlags{12} = VOP3; let TSFlags{13} = VOPC; - let TSFlags{14} = DPP; - - let TSFlags{15} = MUBUF; - let TSFlags{16} = MTBUF; - let TSFlags{17} = SMRD; - let TSFlags{18} = DS; - let TSFlags{19} = MIMG; - let TSFlags{20} = FLAT; - let TSFlags{21} = WQM; - let TSFlags{22} = VGPRSpill; - let TSFlags{23} = VOPAsmPrefer32Bit; + let TSFlags{14} = SDWA; + let TSFlags{15} = DPP; + + let TSFlags{16} = MUBUF; + let TSFlags{17} = MTBUF; + let TSFlags{18} = SMRD; + let TSFlags{19} = DS; + let TSFlags{20} = MIMG; + let TSFlags{21} = FLAT; + let TSFlags{22} = WQM; + let TSFlags{23} = VGPRSpill; + let TSFlags{24} = VOPAsmPrefer32Bit; let SchedRW = [Write32Bit]; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -576,6 +576,22 @@ let IsOptional = 1; } +def SDWASelMatchClass : AsmOperandClass { + let Name = "SDWASel"; + let PredicateMethod = "isSDWASel"; + let ParserMethod = "parseSDWASel"; + let RenderMethod = "addImmOperands"; + let IsOptional = 1; +} + +def SDWADstUnusedMatchClass : AsmOperandClass { + let Name = "SDWADstUnused"; + let PredicateMethod = "isSDWADstUnused"; + let ParserMethod = "parseSDWADstUnused"; + let RenderMethod = "addImmOperands"; + let IsOptional = 1; +} + class OptionalImmAsmOperand : AsmOperandClass { let Name = "Imm"#OpName; let PredicateMethod = "isImm"; @@ -730,6 +746,26 @@ let ParserMatchClass = DPPOptionalMatchClass<"BoundCtrl">; } +def dst_sel : Operand { + let PrintMethod = "printSDWADstSel"; + let ParserMatchClass = SDWASelMatchClass; +} + +def src0_sel : Operand { + let PrintMethod = "printSDWASrc0Sel"; + let ParserMatchClass = SDWASelMatchClass; +} + +def src1_sel : Operand { + let PrintMethod = "printSDWASrc1Sel"; + let ParserMatchClass = SDWASelMatchClass; +} + +def dst_unused : Operand { + let PrintMethod = "printSDWADstUnused"; + let ParserMatchClass = SDWADstUnusedMatchClass; +} + } // End OperandType = "OPERAND_IMMEDIATE" @@ -1304,16 +1340,11 @@ RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32); } -// Returns the register class to use for source 1 of VOP[12C] for the -// given VT. -class getVOPSrc1ForVT { +// Returns the vreg register class to use for source operand given VT +class getVregSrcForVT { RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32); } -// Returns the register class to use for DPP source operands. -class getDPPSrcForVT { - RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32); -} // Returns the register class to use for sources of VOP3 instructions for the // given VT. @@ -1419,7 +1450,40 @@ /* endif */))); } -class getOutsDPP { +class getInsSDWA { + + dag ret = !if (!eq(NumSrcArgs, 0), + // VOP1 without input operands (V_NOP) + (ins), + !if (!eq(NumSrcArgs, 1), + !if (!eq(HasModifiers, 1), + // VOP1_SDWA with modifiers + (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, + ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel) + /* else */, + // VOP1_SDWA without modifiers + (ins Src0RC:$src0, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel) + /* endif */) + /* NumSrcArgs == 2 */, + !if (!eq(HasModifiers, 1), + // VOP2_SDWA with modifiers + (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, + InputModsNoDefault:$src1_modifiers, Src1RC:$src1, + ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel) + /* else */, + // VOP2_DPP without modifiers + (ins Src0RC:$src0, Src1RC:$src1, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel) + /* endif */))); +} + +// Outs for DPP and SDWA +class getOutsExt { dag ret = !if(HasDst, !if(!eq(DstVT.Size, 1), (outs DstRCDPP:$sdst), // sdst for VOPC @@ -1472,20 +1536,41 @@ string ret = dst#args#" $dpp_ctrl $row_mask $bank_mask $bound_ctrl"; } -class getHasDPP { + string dst = !if(HasDst, + !if(!eq(DstVT.Size, 1), + "$sdst", + "$vdst"), + ""); // use $sdst for VOPC + string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); + string src1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1_modifiers", + " $src1_modifiers,")); + string args = !if(!eq(HasModifiers, 0), + getAsm32<0, NumSrcArgs, DstVT>.ret, + ", "#src0#src1#", $clamp"); + string sdwa = !if(!eq(NumSrcArgs, 0), + "", + !if(!eq(NumSrcArgs, 1), + " $dst_sel $dst_unused $src0_sel", + " $dst_sel $dst_unused $src0_sel $src1_sel" + ) + ); + string ret = dst#args#sdwa; +} + +// Function that checks if instruction supports DPP and SDWA +class getHasExt { bit ret = !if(!eq(NumSrcArgs, 3), - 0, // NumSrcArgs == 3 - No DPP for VOP3 - !if(!eq(DstVT.Size, 1), - 0, // No DPP for VOPC - !if(!eq(DstVT.Size, 64), - 0, // 64-bit dst - No DPP for 64-bit operands + 0, // NumSrcArgs == 3 - No DPP or SDWA for VOP3 + !if(!eq(DstVT.Size, 64), + 0, // 64-bit dst - No DPP or SDWA for 64-bit operands + !if(!eq(Src0VT.Size, 64), + 0, // 64-bit src0 !if(!eq(Src0VT.Size, 64), - 0, // 64-bit src0 - !if(!eq(Src0VT.Size, 64), - 0, // 64-bit src2 - 1 - ) + 0, // 64-bit src2 + 1 ) ) ) @@ -1502,41 +1587,47 @@ field ValueType Src2VT = ArgVT[3]; field RegisterOperand DstRC = getVALUDstForVT.ret; field RegisterOperand DstRCDPP = getVALUDstForVT.ret; + field RegisterOperand DstRCSDWA = getVALUDstForVT.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; - field RegisterClass Src1RC32 = getVOPSrc1ForVT.ret; + field RegisterClass Src1RC32 = getVregSrcForVT.ret; field RegisterOperand Src0RC64 = getVOP3SrcForVT.ret; field RegisterOperand Src1RC64 = getVOP3SrcForVT.ret; field RegisterOperand Src2RC64 = getVOP3SrcForVT.ret; - field RegisterClass Src0DPP = getDPPSrcForVT.ret; - field RegisterClass Src1DPP = getDPPSrcForVT.ret; - + field RegisterClass Src0DPP = getVregSrcForVT.ret; + field RegisterClass Src1DPP = getVregSrcForVT.ret; + field RegisterClass Src0SDWA = getVregSrcForVT.ret; + field RegisterClass Src1SDWA = getVregSrcForVT.ret; + field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); field bit HasDst32 = HasDst; field int NumSrcArgs = getNumSrcArgs.ret; field bit HasModifiers = hasModifiers.ret; - field bit HasDPP = getHasDPP.ret; - + field bit HasExt = getHasExt.ret; + field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs)); // VOP3b instructions are a special case with a second explicit // output. This is manually overridden for them. field dag Outs32 = Outs; field dag Outs64 = Outs; - field dag OutsDPP = getOutsDPP.ret; + field dag OutsDPP = getOutsExt.ret; + field dag OutsSDWA = getOutsExt.ret; field dag Ins32 = getIns32.ret; field dag Ins64 = getIns64.ret; field dag InsDPP = getInsDPP.ret; + field dag InsSDWA = getInsSDWA.ret; field string Asm32 = getAsm32.ret; field string Asm64 = getAsm64.ret; field string AsmDPP = getAsmDPP.ret; + field string AsmSDWA = getAsmSDWA.ret; } -class VOP_NO_DPP : VOPProfile { - let HasDPP = 0; +class VOP_NO_EXT : VOPProfile { + let HasExt = 0; } // FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order @@ -1647,12 +1738,12 @@ def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> { field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32imm:$imm); field string Asm32 = "$vdst, $src0, $src1, $imm"; - field bit HasDPP = 0; + field bit HasExt = 0; } def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> { field dag Ins32 = (ins VCSrc_32:$src0, u32imm:$imm, VGPR_32:$src1); field string Asm32 = "$vdst, $src0, $imm, $src1"; - field bit HasDPP = 0; + field bit HasExt = 0; } def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> { let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); @@ -1663,9 +1754,15 @@ VGPR_32:$src2, // stub argument dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsSDWA = (ins InputModsNoDefault:$src0_modifiers, Src0RC32:$src0, + InputModsNoDefault:$src1_modifiers, Src1RC32:$src1, + VGPR_32:$src2, // stub argument + ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm32 = getAsm32<1, 2, f32>.ret; let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret; let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret; + let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret; } def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; @@ -1775,13 +1872,37 @@ class VOP1_DPP : VOP1_DPPe , VOP_DPP { - let AssemblerPredicates = !if(p.HasDPP, [isVI], [DisableInst]); + let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); let DecoderNamespace = "DPP"; let DisableDecoder = DisableVIDecoder; let src0_modifiers = !if(p.HasModifiers, ?, 0); let src1_modifiers = 0; } +class SDWADisableFields { + bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?); + bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?); + bits<3> src0_modifiers = !if(p.HasModifiers, ?, 0); + bits<3> src1_sel = !if(!eq(p.NumSrcArgs, 0), 6, + !if(!eq(p.NumSrcArgs, 1), 6, + ?)); + bits<3> src1_modifiers = !if(!eq(p.NumSrcArgs, 0), 0, + !if(!eq(p.NumSrcArgs, 1), 0, + !if(p.HasModifiers, ?, 0))); + bits<3> dst_sel = !if(p.HasDst, ?, 6); + bits<2> dst_unused = !if(p.HasDst, ?, 0); + bits<1> clamp = !if(p.HasModifiers, ?, 0); +} + +class VOP1_SDWA : + VOP1_SDWAe , + VOP_SDWA , + SDWADisableFields

{ + let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); + let DecoderNamespace = "SDWA"; + let DisableDecoder = DisableVIDecoder; +} + multiclass VOP1SI_m pattern, string asm = opName#p.Asm32> { @@ -1839,13 +1960,22 @@ class VOP2_DPP : VOP2_DPPe , VOP_DPP { - let AssemblerPredicates = !if(p.HasDPP, [isVI], [DisableInst]); + let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); let DecoderNamespace = "DPP"; let DisableDecoder = DisableVIDecoder; let src0_modifiers = !if(p.HasModifiers, ?, 0); let src1_modifiers = !if(p.HasModifiers, ?, 0); } +class VOP2_SDWA : + VOP2_SDWAe , + VOP_SDWA , + SDWADisableFields

{ + let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); + let DecoderNamespace = "SDWA"; + let DisableDecoder = DisableVIDecoder; +} + class VOP3DisableFields { bits<2> src0_modifiers = !if(HasModifiers, ?, 0); @@ -2077,6 +2207,8 @@ p.HasModifiers>; def _dpp : VOP1_DPP ; + + def _sdwa : VOP1_SDWA ; } multiclass VOP1Inst ; def _dpp : VOP2_DPP ; + + def _sdwa : VOP2_SDWA ; } multiclass VOP2Inst ; let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in { -defm V_CLREXCP : VOP1Inst , "v_clrexcp", VOP_NO_DPP>; +defm V_CLREXCP : VOP1Inst , "v_clrexcp", VOP_NO_EXT>; } let Uses = [M0, EXEC] in { -defm V_MOVRELD_B32 : VOP1Inst , "v_movreld_b32", VOP_NO_DPP>; -defm V_MOVRELS_B32 : VOP1Inst , "v_movrels_b32", VOP_NO_DPP>; -defm V_MOVRELSD_B32 : VOP1Inst , "v_movrelsd_b32", VOP_NO_DPP>; +defm V_MOVRELD_B32 : VOP1Inst , "v_movreld_b32", VOP_NO_EXT>; +defm V_MOVRELS_B32 : VOP1Inst , "v_movrels_b32", VOP_NO_EXT>; +defm V_MOVRELSD_B32 : VOP1Inst , "v_movrelsd_b32", VOP_NO_EXT>; } // End Uses = [M0, EXEC] // These instruction only exist on SI and CI Index: lib/Target/AMDGPU/VIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/VIInstrFormats.td +++ lib/Target/AMDGPU/VIInstrFormats.td @@ -225,6 +225,61 @@ let Inst{31} = 0x0; //encoding } +class VOP_SDWA pattern, bit HasMods = 0> : + VOPAnyCommon { + let SDWA = 1; + let Size = 8; +} + +class VOP_SDWAe : Enc64 { + bits<8> src0; + bits<3> src0_sel; + bits<3> src0_modifiers; // {abs,neg,sext} + bits<3> src1_sel; + bits<3> src1_modifiers; + bits<3> dst_sel; + bits<2> dst_unused; + bits<1> clamp; + + let Inst{39-32} = src0; + let Inst{42-40} = dst_sel; + let Inst{44-43} = dst_unused; + let Inst{45} = clamp; + let Inst{50-48} = src0_sel; + let Inst{53-51} = src0_modifiers; + let Inst{58-56} = src1_sel; + let Inst{61-59} = src1_modifiers; +} + +class VOP1_SDWAe op> : VOP_SDWAe { + bits<8> vdst; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = op; + let Inst{24-17} = vdst; + let Inst{31-25} = 0x3f; // encoding +} + +class VOP2_SDWAe op> : VOP_SDWAe { + bits<8> vdst; + bits<8> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = src1; + let Inst{24-17} = vdst; + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding +} + +class VOPC_SDWAe op> : VOP_SDWAe { + bits<8> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = src1; + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; // encoding +} + class EXPe_vi : EXPe { let Inst{31-26} = 0x31; //encoding } Index: test/MC/AMDGPU/vop_sdwa.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/vop_sdwa.s @@ -0,0 +1,40 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI + +// ToDo: converters +// ToDo: VOPC +// ToDo: VOP2b (see vop_dpp.s) +// ToDo: V_MAC_F32 (see vop_dpp.s) +// ToDo: sext() +// ToDo: intrinsics + + +// NOSICI: error: +// VI: v_mov_b32_sdwa v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x02,0x10,0x06,0x06] +v_mov_b32 v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD + +// NOSICI: error: +// VI: v_mov_b32_sdwa v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x06,0x7e,0x04,0x11,0x05,0x06] +v_mov_b32 v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 + +// NOSICI: error: +// VI: v_mov_b32_sdwa v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0 ; encoding: [0xf9,0x02,0x1e,0x7e,0x63,0x0a,0x04,0x06] +v_mov_b32 v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0 + +// NOSICI: error: +// VI: v_min_u32_sdwa v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 ; encoding: [0xf9,0x02,0x84,0x1d,0x0d,0x0b,0x03,0x02] +v_min_u32 v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 + +// NOSICI: error: +// VI: v_min_u32_sdwa v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1 ; encoding: [0xf9,0x02,0xfe,0x1d,0x04,0x04,0x02,0x05] +v_min_u32 v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1 + +// NOSICI: error: +// VI: v_min_u32_sdwa v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x90,0x1d,0xc8,0x05,0x01,0x06] +v_min_u32 v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD + +// NOSICI: error: +// VI: v_min_u32_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x1c,0x01,0x06,0x00,0x06] +v_min_u32 v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD