Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1368,8 +1368,8 @@ getForcedEncodingSize() != 64) return Match_PreferE32; - if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa || - Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa) { + if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || + Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { // v_mac_f32/16 allow only dst_sel == DWORD; auto OpNum = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); @@ -3028,8 +3028,8 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa) { - // V_NOP_sdwa has no optional sdwa arguments + if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { + // V_NOP_sdwa_vi has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6); @@ -3056,8 +3056,8 @@ // special case v_mac_{f16, f32}: // it has src2 register operand that is tied to dst operand - if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa || - Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa) { + if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || + Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { auto it = Inst.begin(); std::advance( it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); Index: lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP1Instructions.td +++ lib/Target/AMDGPU/VOP1Instructions.td @@ -21,6 +21,15 @@ let Inst{31-25} = 0x3f; //encoding } +class VOP1_SDWAe op, VOPProfile P> : VOP_SDWAe

{ + bits<8> vdst; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; // encoding +} + class VOP1_Pseudo pattern=[]> : InstSI , VOP , @@ -65,6 +74,11 @@ let TSFlags = ps.TSFlags; } +class VOP1_SDWA_Pseudo pattern=[]> : + VOP_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP1"; +} + class getVOP1Pat64 : LetDummies { list ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, @@ -76,6 +90,7 @@ SDPatternOperator node = null_frag> { def _e32 : VOP1_Pseudo ; def _e64 : VOP3_Pseudo .ret>; + def _sdwa : VOP1_SDWA_Pseudo ; } //===----------------------------------------------------------------------===// @@ -420,21 +435,6 @@ // VI //===----------------------------------------------------------------------===// -class VOP1_SDWA op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> : - VOP_SDWA { - let Defs = ps.Defs; - let Uses = ps.Uses; - let SchedRW = ps.SchedRW; - let hasSideEffects = ps.hasSideEffects; - let AsmMatchConverter = "cvtSdwaVOP1"; - - bits<8> vdst; - let Inst{8-0} = 0xf9; // sdwa - let Inst{16-9} = op; - let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); - let Inst{31-25} = 0x3f; // encoding -} - class VOP1_DPP op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> : VOP_DPP { let Defs = ps.Defs; @@ -459,9 +459,12 @@ VOP3e_vi (NAME#"_e64").Pfl>; } - // for now left sdwa/dpp only for asm/dasm + def _sdwa_vi : + VOP_SDWA_Real (NAME#"_sdwa")>, + VOP1_SDWAe (NAME#"_sdwa").Pfl>; + + // for now left dpp only for asm/dasm // TODO: add corresponding pseudo - def _sdwa : VOP1_SDWA(NAME#"_e32")>; def _dpp : VOP1_DPP(NAME#"_e32")>; } Index: lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP2Instructions.td +++ lib/Target/AMDGPU/VOP2Instructions.td @@ -37,6 +37,17 @@ let Inst{63-32} = imm; } +class VOP2_SDWAe op, VOPProfile P> : VOP_SDWAe

{ + bits<8> vdst; + bits<8> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding +} + class VOP2_Pseudo pattern=[], string suffix = "_e32"> : InstSI , VOP , @@ -81,6 +92,11 @@ let TSFlags = ps.TSFlags; } +class VOP2_SDWA_Pseudo pattern=[]> : + VOP_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2"; +} + class getVOP2Pat64 : LetDummies { list ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, @@ -99,8 +115,12 @@ def _e64 : VOP3_Pseudo .ret>, Commutable_REV; + + def _sdwa : VOP2_SDWA_Pseudo , + Commutable_REV; } +// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst multiclass VOP2bInst op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> : - VOP_SDWA { - let Defs = ps.Defs; - let Uses = ps.Uses; - let SchedRW = ps.SchedRW; - let hasSideEffects = ps.hasSideEffects; - let AsmMatchConverter = "cvtSdwaVOP2"; - - bits<8> vdst; - bits<8> src1; - let Inst{8-0} = 0xf9; // sdwa - let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); - let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); - let Inst{30-25} = op; - let Inst{31} = 0x0; // encoding -} - class VOP2_DPP op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> : VOP_DPP { let Defs = ps.Defs; @@ -618,12 +621,17 @@ VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" + +multiclass VOP2_SDWA_Real op> { + def _sdwa_vi : + VOP_SDWA_Real (NAME#"_sdwa")>, + VOP2_SDWAe (NAME#"_sdwa").Pfl>; +} multiclass VOP2_Real_e32e64_vi op> : - Base_VOP2_Real_e32e64_vi { - // for now left sdwa/dpp only for asm/dasm + Base_VOP2_Real_e32e64_vi, VOP2_SDWA_Real { + // for now left dpp only for asm/dasm // TODO: add corresponding pseudo - def _sdwa : VOP2_SDWA(NAME#"_e32")>; def _dpp : VOP2_DPP(NAME#"_e32")>; } Index: lib/Target/AMDGPU/VOPCInstructions.td =================================================================== --- lib/Target/AMDGPU/VOPCInstructions.td +++ lib/Target/AMDGPU/VOPCInstructions.td @@ -21,6 +21,19 @@ let Inst{31-25} = 0x3e; } +class VOPC_SDWAe op, VOPProfile P> : VOP_SDWAe

{ + bits<8> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; // encoding + + // VOPC disallows dst_sel and dst_unused as they have no effect on destination + let Inst{42-40} = SDWA.DWORD; + let Inst{44-43} = SDWA.UNUSED_PRESERVE; +} + //===----------------------------------------------------------------------===// // VOPC classes //===----------------------------------------------------------------------===// @@ -79,6 +92,11 @@ let TSFlags = ps.TSFlags; } +class VOPC_SDWA_Pseudo pattern=[]> : + VOP_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOPC"; +} + // This class is used only with VOPC instructions. Use $sdst for out operand class VOPCInstAlias : InstAlias , PredicateControl { @@ -128,6 +146,7 @@ let isCompare = 1; let isCommutable = 1; } + def _e64 : VOP3_Pseudo, + Commutable_REV { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = P.Schedule; + let isConvergent = DefExec; + let isCompare = 1; + let isCommutable = 1; + } } def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>; @@ -468,10 +496,17 @@ let SchedRW = p.Schedule; let isConvergent = DefExec; } + def _e64 : VOP3_Pseudo.ret> { let Defs = !if(DefExec, [EXEC], []); let SchedRW = p.Schedule; } + + def _sdwa : VOPC_SDWA_Pseudo { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = p.Schedule; + let isConvergent = DefExec; + } } def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>; @@ -819,25 +854,6 @@ // VI //===----------------------------------------------------------------------===// -class VOPC_SDWA op, VOPC_Pseudo ps, VOPProfile P = ps.Pfl> : - VOP_SDWA { - let Defs = ps.Defs; - let hasSideEffects = ps.hasSideEffects; - let AsmMatchConverter = "cvtSdwaVOPC"; - let isCompare = ps.isCompare; - let isCommutable = ps.isCommutable; - - bits<8> src1; - let Inst{8-0} = 0xf9; // sdwa - let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); - let Inst{24-17} = op; - let Inst{31-25} = 0x3e; // encoding - - // VOPC disallows dst_sel and dst_unused as they have no effect on destination - let Inst{42-40} = SDWA_DWORD; - let Inst{44-43} = SDWA_UNUSED_PRESERVE; -} - multiclass VOPC_Real_vi op> { let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { def _e32_vi : @@ -854,9 +870,9 @@ } } - // for now left sdwa only for asm/dasm - // TODO: add corresponding pseudo - def _sdwa : VOPC_SDWA(NAME#"_e32")>; + def _sdwa_vi : + VOP_SDWA_Real (NAME#"_sdwa")>, + VOPC_SDWAe (NAME#"_sdwa").Pfl>; def : VOPCInstAlias (NAME#"_e64"), !cast(NAME#"_e32_vi")> { Index: lib/Target/AMDGPU/VOPInstructions.td =================================================================== --- lib/Target/AMDGPU/VOPInstructions.td +++ lib/Target/AMDGPU/VOPInstructions.td @@ -204,6 +204,22 @@ let Inst{15} = !if(P.HasClamp, clamp{0}, 0); } +def SDWA { + // sdwa_sel + int BYTE_0 = 0; + int BYTE_1 = 1; + int BYTE_2 = 2; + int BYTE_3 = 3; + int WORD_0 = 4; + int WORD_1 = 5; + int DWORD = 6; + + // dst_unused + int UNUSED_PAD = 0; + int UNUSED_SEXT = 1; + int UNUSED_PRESERVE = 2; +} + class VOP_SDWAe : Enc64 { bits<8> src0; bits<3> src0_sel; @@ -214,37 +230,71 @@ bits<2> dst_unused; bits<1> clamp; - bits<3> SDWA_DWORD = 6; - bits<2> SDWA_UNUSED_PRESERVE = 2; - let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); - let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA_DWORD{2-0}); - let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0}); + let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA.DWORD); + let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE); let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); - let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA_DWORD{2-0}); + let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD); let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); - let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA_DWORD{2-0}); + let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD); let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); } -class VOP_SDWA : - InstSI , - VOP_SDWAe

{ +class VOP_SDWA_Pseudo pattern=[]> : + InstSI , + VOP , + SIMCInstr , + MnemonicAlias { + + let isPseudo = 1; + let isCodeGenOnly = 1; + let UseNamedOperandTable = 1; + + string Mnemonic = opName; + string AsmOperands = P.AsmSDWA; + + let Size = 8; let mayLoad = 0; let mayStore = 0; - let hasSideEffects = 0; - let UseNamedOperandTable = 1; + let hasSideEffects = 0; + let VALU = 1; let SDWA = 1; - let Size = 8; - + let Uses = [EXEC]; + let SubtargetPredicate = isVI; let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst); let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA, AMDGPUAsmVariants.Disable); let DecoderNamespace = "SDWA"; + + VOPProfile Pfl = P; +} + +class VOP_SDWA_Real : + InstSI , + SIMCInstr { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + + // copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AssemblerPredicate = ps.AssemblerPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + let AsmVariantName = ps.AsmVariantName; + let UseNamedOperandTable = ps.UseNamedOperandTable; + let DecoderNamespace = ps.DecoderNamespace; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; } class VOP_DPPe : Enc64 {