Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -66,7 +66,9 @@ // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td enum SIEncodingFamily { SI = 0, - VI = 1 + VI = 1, + SDWA = 2, + SDWA9 = 3 }; // Wrapper for Tablegen'd function. enum Subtarget is not defined in any @@ -101,7 +103,12 @@ } int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { - int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST)); + SIEncodingFamily Gen = subtargetEncodingFamily(ST); + if (get(Opcode).TSFlags & SIInstrFlags::SDWA) + Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 + : SIEncodingFamily::SDWA; + + int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); // -1 means that Opcode is already a native instruction. if (MCOp == -1) Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -260,6 +260,8 @@ return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); } + bool isSDWARegKind() const; + bool isImmTy(ImmTy ImmT) const { return isImm() && Imm.Type == ImmT; } @@ -1244,6 +1246,15 @@ return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); } +bool AMDGPUOperand::isSDWARegKind() const { + if (AsmParser->isVI()) + return isVReg(); + else if (AsmParser->isGFX9()) + return isRegKind(); + else + return false; +} + uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const { assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); @@ -4490,12 +4501,11 @@ if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { - // V_NOP_sdwa_vi has no optional sdwa arguments + // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (isGFX9() && - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); @@ -4505,8 +4515,7 @@ case SIInstrFlags::VOP2: addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (isGFX9() && - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); @@ -4516,9 +4525,7 @@ break; case SIInstrFlags::VOPC: - if (isVI()) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; Index: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -65,6 +65,8 @@ uint64_t Inst, uint64_t Address) const; + DecodeStatus convertSDWAInst(MCInst &MI) const; + MCOperand decodeOperand_VGPR_32(unsigned Val) const; MCOperand decodeOperand_VS_32(unsigned Val) const; MCOperand decodeOperand_VS_64(unsigned Val) const; @@ -105,10 +107,10 @@ MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; - MCOperand decodeSDWA9Src(const OpWidthTy Width, unsigned Val) const; - MCOperand decodeSDWA9Src16(unsigned Val) const; - MCOperand decodeSDWA9Src32(unsigned Val) const; - MCOperand decodeSDWA9VopcDst(unsigned Val) const; + MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeSDWASrc16(unsigned Val) const; + MCOperand decodeSDWASrc32(unsigned Val) const; + MCOperand decodeSDWAVopcDst(unsigned Val) const; }; //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -49,6 +49,17 @@ MCDisassembler::SoftFail; } +static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, + uint16_t NameIdx) { + int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); + if (OpIdx != -1) { + auto I = MI.begin(); + std::advance(I, OpIdx); + MI.insert(I, Op); + } + return OpIdx; +} + static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder) { auto DAsm = static_cast(Decoder); @@ -106,12 +117,12 @@ return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); } -#define DECODE_SDWA9(DecName) \ -DECODE_OPERAND(decodeSDWA9##DecName, decodeSDWA9##DecName) +#define DECODE_SDWA(DecName) \ +DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) -DECODE_SDWA9(Src32) -DECODE_SDWA9(Src16) -DECODE_SDWA9(VopcDst) +DECODE_SDWA(Src32) +DECODE_SDWA(Src16) +DECODE_SDWA(VopcDst) #include "AMDGPUGenDisassemblerTables.inc" @@ -149,6 +160,7 @@ raw_ostream &WS, raw_ostream &CS) const { CommentStream = &CS; + bool IsSDWA = false; // ToDo: AMDGPUDisassembler supports only VI ISA. if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]) @@ -170,10 +182,10 @@ if (Res) break; Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); - if (Res) break; + if (Res) { IsSDWA = true; break; } Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); - if (Res) break; + if (Res) { IsSDWA = true; break; } } // Reinitialize Bytes as DPP64 could have eaten too much @@ -200,17 +212,36 @@ MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si || MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi)) { // Insert dummy unused src2_modifiers. - int Src2ModIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), - AMDGPU::OpName::src2_modifiers); - auto I = MI.begin(); - std::advance(I, Src2ModIdx); - MI.insert(I, MCOperand::createImm(0)); + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src2_modifiers); } + if (Res && IsSDWA) + Res = convertSDWAInst(MI); + Size = Res ? (MaxInstBytesNum - Bytes.size()) : 0; return Res; } +DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { + if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { + if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) + // VOPC - insert clamp + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); + } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { + int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); + if (SDst != -1) { + // VOPC - insert VCC register as sdst + insertNamedMCOperand(MI, MCOperand::createReg(AMDGPU::VCC), + AMDGPU::OpName::sdst); + } else { + // VOP1/2 - insert omod if present in instruction + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); + } + } + return MCDisassembler::Success; +} + const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { return getContext().getRegisterInfo()-> getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); @@ -592,36 +623,43 @@ return errOperand(Val, "unknown operand encoding " + Twine(Val)); } -MCOperand AMDGPUDisassembler::decodeSDWA9Src(const OpWidthTy Width, - unsigned Val) const { +MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, + unsigned Val) const { using namespace AMDGPU::SDWA; - if (SDWA9EncValues::SRC_VGPR_MIN <= Val && - Val <= SDWA9EncValues::SRC_VGPR_MAX) { - return createRegOperand(getVgprClassId(Width), - Val - SDWA9EncValues::SRC_VGPR_MIN); - } - if (SDWA9EncValues::SRC_SGPR_MIN <= Val && - Val <= SDWA9EncValues::SRC_SGPR_MAX) { - return createSRegOperand(getSgprClassId(Width), - Val - SDWA9EncValues::SRC_SGPR_MIN); - } + if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { + if (SDWA9EncValues::SRC_VGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_VGPR_MAX) { + return createRegOperand(getVgprClassId(Width), + Val - SDWA9EncValues::SRC_VGPR_MIN); + } + if (SDWA9EncValues::SRC_SGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_SGPR_MAX) { + return createSRegOperand(getSgprClassId(Width), + Val - SDWA9EncValues::SRC_SGPR_MIN); + } - return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN); + return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN); + } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { + return createRegOperand(getVgprClassId(Width), Val); + } + llvm_unreachable("unsupported target"); } -MCOperand AMDGPUDisassembler::decodeSDWA9Src16(unsigned Val) const { - return decodeSDWA9Src(OPW16, Val); +MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { + return decodeSDWASrc(OPW16, Val); } -MCOperand AMDGPUDisassembler::decodeSDWA9Src32(unsigned Val) const { - return decodeSDWA9Src(OPW32, Val); +MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { + return decodeSDWASrc(OPW32, Val); } -MCOperand AMDGPUDisassembler::decodeSDWA9VopcDst(unsigned Val) const { +MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { using namespace AMDGPU::SDWA; + assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] && + "SDWAVopcDst should be present only on GFX9"); if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; if (Val > AMDGPU::EncValues::SGPR_MAX) { Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -52,15 +52,15 @@ return 0; } - virtual unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { + virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { return 0; } - virtual unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { + virtual unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { return 0; } Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -69,14 +69,14 @@ unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; - - unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; - - unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; + + unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; }; } // end anonymous namespace @@ -328,11 +328,11 @@ } unsigned -SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { using namespace AMDGPU::SDWA; - + uint64_t RegEnc = 0; const MCOperand &MO = MI.getOperand(OpNo); @@ -347,9 +347,9 @@ } unsigned -SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { using namespace AMDGPU::SDWA; uint64_t RegEnc = 0; Index: llvm/trunk/lib/Target/AMDGPU/SIDefines.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIDefines.h +++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h @@ -118,9 +118,9 @@ // Operand for source modifiers for VOP instructions OPERAND_INPUT_MODS, - // Operand for GFX9 SDWA instructions - OPERAND_SDWA9_SRC, - OPERAND_SDWA9_VOPC_DST, + // Operand for SDWA instructions + OPERAND_SDWA_SRC, + OPERAND_SDWA_VOPC_DST, /// Operand with 32-bit immediate that uses the constant bus. OPERAND_KIMM32, Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h @@ -814,6 +814,9 @@ int getSDWAOp(uint16_t Opcode); LLVM_READONLY + int getBasicFromSDWAOp(uint16_t Opcode); + + LLVM_READONLY int getCommuteRev(uint16_t Opcode); LLVM_READONLY Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2108,7 +2108,9 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const { - if (!MO.isImm() || OperandType < MCOI::OPERAND_FIRST_TARGET) + if (!MO.isImm() || + OperandType < AMDGPU::OPERAND_SRC_FIRST || + OperandType > AMDGPU::OPERAND_SRC_LAST) return false; // MachineOperand provides no way to tell the true operand size, since it only @@ -2433,8 +2435,73 @@ } } + // Verify SDWA + if (isSDWA(MI)) { + + if (!ST.hasSDWA()) { + ErrInfo = "SDWA is not supported on this target"; + return false; + } + + int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); + if ( DstIdx == -1) + DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst); + + const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx }; + + for (int OpIdx: OpIndicies) { + if (OpIdx == -1) + continue; + const MachineOperand &MO = MI.getOperand(OpIdx); + + if (AMDGPU::isVI(ST)) { + // Only VGPRS on VI + if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) { + ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI"; + return false; + } + } else { + // No immediates on GFX9 + if (!MO.isReg()) { + ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9"; + return false; + } + } + } + + if (AMDGPU::isVI(ST)) { + // No omod allowed on VI + const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); + if (OMod != nullptr && + (!OMod->isImm() || OMod->getImm() != 0)) { + ErrInfo = "OMod not allowed in SDWA instructions on VI"; + return false; + } + } + + uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode); + if (isVOPC(BasicOpcode)) { + if (AMDGPU::isVI(ST) && DstIdx != -1) { + // Only vcc allowed as dst on VI for VOPC + const MachineOperand &Dst = MI.getOperand(DstIdx); + if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) { + ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI"; + return false; + } + } else if (AMDGPU::isGFX9(ST)) { + // No clamp allowed on GFX9 for VOPC + const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); + if (Clamp != nullptr && + (!Clamp->isImm() || Clamp->getImm() != 0)) { + ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI"; + return false; + } + } + } + } + // Verify VOP* - if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI)) { + if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) { // Only look at the true operands. Only a real operand can use the constant // bus, and we don't want to check pseudo-operands like the source modifier // flags. Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -20,6 +20,8 @@ int NONE = -1; int SI = 0; int VI = 1; + int SDWA = 2; + int SDWA9 = 3; } //===----------------------------------------------------------------------===// @@ -452,25 +454,25 @@ let ParserMatchClass = VReg32OrOffClass; } -class SDWA9Src : RegisterOperand { +class SDWASrc : RegisterOperand { let OperandNamespace = "AMDGPU"; - let OperandType = "OPERAND_SDWA9_SRC"; - let EncoderMethod = "getSDWA9SrcEncoding"; + let OperandType = "OPERAND_SDWA_SRC"; + let EncoderMethod = "getSDWASrcEncoding"; } -def SDWA9Src32 : SDWA9Src { - let DecoderMethod = "decodeSDWA9Src32"; +def SDWASrc32 : SDWASrc { + let DecoderMethod = "decodeSDWASrc32"; } -def SDWA9Src16 : SDWA9Src { - let DecoderMethod = "decodeSDWA9Src16"; +def SDWASrc16 : SDWASrc { + let DecoderMethod = "decodeSDWASrc16"; } -def SDWA9VopcDst : VOPDstOperand { +def SDWAVopcDst : VOPDstOperand { let OperandNamespace = "AMDGPU"; - let OperandType = "OPERAND_SDWA9_VOPC_DST"; - let EncoderMethod = "getSDWA9VopcDstEncoding"; - let DecoderMethod = "decodeSDWA9VopcDst"; + let OperandType = "OPERAND_SDWA_VOPC_DST"; + let EncoderMethod = "getSDWAVopcDstEncoding"; + let DecoderMethod = "decodeSDWAVopcDst"; } class NamedMatchClass : AsmOperandClass { @@ -634,13 +636,13 @@ def Int32InputMods : IntInputMods; def Int64InputMods : IntInputMods; -def FPRegInputModsMatchClass : AsmOperandClass { - let Name = "RegWithFPInputMods"; +def FPRegSDWAInputModsMatchClass : AsmOperandClass { + let Name = "SDWARegWithFPInputMods"; let ParserMethod = "parseRegWithFPInputMods"; - let PredicateMethod = "isRegKind"; + let PredicateMethod = "isSDWARegKind"; } -def FPRegInputMods : InputMods { +def FPRegSDWAInputMods : InputMods { let PrintMethod = "printOperandAndFPInputMods"; } @@ -655,13 +657,13 @@ } -def IntRegInputModsMatchClass : AsmOperandClass { - let Name = "RegWithIntInputMods"; +def IntRegSDWAInputModsMatchClass : AsmOperandClass { + let Name = "SDWARegWithIntInputMods"; let ParserMethod = "parseRegWithIntInputMods"; - let PredicateMethod = "isRegKind"; + let PredicateMethod = "isSDWARegKind"; } -def IntRegInputMods : InputMods { +def IntRegSDWAInputMods : InputMods { let PrintMethod = "printOperandAndIntInputMods"; } @@ -851,10 +853,10 @@ } // Returns the register class to use for the destination of VOP[12C] -// instructions with GFX9 SDWA extension -class getSDWA9DstForVT { +// instructions with SDWA extension +class getSDWADstForVT { RegisterOperand ret = !if(!eq(VT.Size, 1), - SDWA9VopcDst, // VOPC + SDWAVopcDst, // VOPC VOPDstOperand); // VOP1/2 32-bit dst } @@ -898,8 +900,8 @@ !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); } -class getSDWA9SrcForVT { - RegisterOperand ret = !if(!eq(VT.Size, 16), SDWA9Src16, SDWA9Src32); +class getSDWASrcForVT { + RegisterOperand ret = !if(!eq(VT.Size, 16), SDWASrc16, SDWASrc32); } // Returns the register class to use for sources of VOP3 instructions for the @@ -995,7 +997,7 @@ ); } -// Return type of input modifiers operand specified input operand for SDWA/DPP +// Return type of input modifiers operand specified input operand for DPP class getSrcModExt { bit isFP = !if(!eq(VT.Value, f16.Value), 1, !if(!eq(VT.Value, f32.Value), 1, @@ -1004,13 +1006,13 @@ Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } -// Return type of input modifiers operand specified input operand for SDWA 9 -class getSrcModSDWA9 { +// Return type of input modifiers operand specified input operand for SDWA +class getSrcModSDWA { bit isFP = !if(!eq(VT.Value, f16.Value), 1, !if(!eq(VT.Value, f32.Value), 1, !if(!eq(VT.Value, f64.Value), 1, 0))); - Operand ret = !if(isFP, FPRegInputMods, IntRegInputMods); + Operand ret = !if(isFP, FPRegSDWAInputMods, IntRegSDWAInputMods); } // Returns the input arguments for VOP[12C] instructions for the given SrcVT. @@ -1141,36 +1143,12 @@ /* endif */))); } -class getInsSDWA { - dag ret = !if(!eq(NumSrcArgs, 0), - // VOP1 without input operands (V_NOP) - (ins), - !if(!eq(NumSrcArgs, 1), - // VOP1_SDWA - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel), - !if(!eq(NumSrcArgs, 2), - !if(!eq(DstVT.Size, 1), - // VOPC_SDWA with modifiers - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), - // VOP2_SDWA with modifiers - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel)), - (ins)/* endif */))); -} -// Ins for GFX9 SDWA -class getInsSDWA9 { +// Ins for SDWA +class getInsSDWA { dag ret = !if(!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) @@ -1178,31 +1156,31 @@ !if(!eq(NumSrcArgs, 1), // VOP1 !if(!eq(HasSDWAOMod, 0), - // VOP1_SDWA9 without omod + // VOP1_SDWA without omod (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel), - // VOP1_SDWA9 with omod + // VOP1_SDWA with omod (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel)), !if(!eq(NumSrcArgs, 2), !if(!eq(DstVT.Size, 1), - // VOPC_SDWA9 + // VOPC_SDWA (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, - src0_sel:$src0_sel, src1_sel:$src1_sel), - // VOP2_SDWA9 + clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP2_SDWA !if(!eq(HasSDWAOMod, 0), - // VOP2_SDWA9 without omod + // VOP2_SDWA without omod (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel), - // VOP1_SDWA9 with omod + // VOP2_SDWA with omod (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, clampmod:$clamp, omod:$omod, @@ -1220,12 +1198,12 @@ (outs)); // V_NOP } -// Outs for GFX9 SDWA -class getOutsSDWA9 { +// Outs for SDWA +class getOutsSDWA { dag ret = !if(HasDst, !if(!eq(DstVT.Size, 1), - (outs DstRCSDWA9:$sdst), - (outs DstRCSDWA9:$vdst)), + (outs DstRCSDWA:$sdst), + (outs DstRCSDWA:$vdst)), (outs)); // V_NOP } @@ -1387,8 +1365,7 @@ field ValueType Src2VT = ArgVT[3]; field RegisterOperand DstRC = getVALUDstForVT.ret; field RegisterOperand DstRCDPP = getVALUDstForVT.ret; - field RegisterOperand DstRCSDWA = getVALUDstForVT.ret; - field RegisterOperand DstRCSDWA9 = getSDWA9DstForVT.ret; + field RegisterOperand DstRCSDWA = getSDWADstForVT.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; field RegisterClass Src1RC32 = getVregSrcForVT.ret; field RegisterOperand Src0RC64 = getVOP3SrcForVT.ret; @@ -1396,19 +1373,15 @@ field RegisterOperand Src2RC64 = getVOP3SrcForVT.ret; field RegisterClass Src0DPP = getVregSrcForVT.ret; field RegisterClass Src1DPP = getVregSrcForVT.ret; - field RegisterClass Src0SDWA = getVregSrcForVT.ret; - field RegisterClass Src1SDWA = getVregSrcForVT.ret; - field RegisterOperand Src0SDWA9 = getSDWA9SrcForVT.ret; - field RegisterOperand Src1SDWA9 = getSDWA9SrcForVT.ret; + field RegisterOperand Src0SDWA = getSDWASrcForVT.ret; + field RegisterOperand Src1SDWA = getSDWASrcForVT.ret; field Operand Src0Mod = getSrcMod.ret; field Operand Src1Mod = getSrcMod.ret; field Operand Src2Mod = getSrcMod.ret; field Operand Src0ModDPP = getSrcModExt.ret; field Operand Src1ModDPP = getSrcModExt.ret; - field Operand Src0ModSDWA = getSrcModExt.ret; - field Operand Src1ModSDWA = getSrcModExt.ret; - field Operand Src0ModSDWA9 = getSrcModSDWA9.ret; - field Operand Src1ModSDWA9 = getSrcModSDWA9.ret; + field Operand Src0ModSDWA = getSrcModSDWA.ret; + field Operand Src1ModSDWA = getSrcModSDWA.ret; field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); @@ -1457,8 +1430,7 @@ field dag Outs32 = Outs; field dag Outs64 = Outs; field dag OutsDPP = getOutsExt.ret; - field dag OutsSDWA = getOutsExt.ret; - field dag OutsSDWA9 = getOutsSDWA9.ret; + field dag OutsSDWA = getOutsSDWA.ret; field dag Ins32 = getIns32.ret; field dag Ins64 = getIns64.ret; field dag InsSDWA = getInsSDWA.ret; - field dag InsSDWA9 = getInsSDWA9.ret; + field string Asm32 = getAsm32.ret; field string Asm64 = getAsm64.ret; @@ -1628,13 +1598,13 @@ let ValueCols = [["SDWA"]]; } -// Maps ordinary instructions to their SDWA GFX9 counterparts -def getSDWA9Op : InstrMapping { +// Maps SDWA instructions to their ordinary counterparts +def getBasicFromSDWAOp : InstrMapping { let FilterClass = "VOP"; let RowFields = ["OpName"]; let ColFields = ["AsmVariantName"]; - let KeyCol = ["Default"]; - let ValueCols = [["SDWA9"]]; + let KeyCol = ["SDWA"]; + let ValueCols = [["Default"]]; } def getMaskedMIMGOp : InstrMapping { @@ -1669,7 +1639,9 @@ let ColFields = ["Subtarget"]; let KeyCol = [!cast(SIEncodingFamily.NONE)]; let ValueCols = [[!cast(SIEncodingFamily.SI)], - [!cast(SIEncodingFamily.VI)]]; + [!cast(SIEncodingFamily.VI)], + [!cast(SIEncodingFamily.SDWA)], + [!cast(SIEncodingFamily.SDWA9)]]; } // Get equivalent SOPK instruction. Index: llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -224,7 +224,7 @@ static bool isSubregOf(const MachineOperand &SubReg, const MachineOperand &SuperReg, const TargetRegisterInfo *TRI) { - + if (!SuperReg.isReg() || !SubReg.isReg()) return false; @@ -557,7 +557,7 @@ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - + if (TRI->isPhysicalRegister(Src0->getReg()) || TRI->isPhysicalRegister(Dst->getReg())) break; @@ -590,7 +590,7 @@ break; MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - + if (TRI->isPhysicalRegister(Src1->getReg()) || TRI->isPhysicalRegister(Dst->getReg())) break; @@ -613,9 +613,17 @@ if (AMDGPU::getSDWAOp(Opc) != -1) return true; int Opc32 = AMDGPU::getVOPe32(Opc); - if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1) - return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) && - !TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1) { + if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) + return false; + + if (TII->isVOPC(Opc)) { + const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + return SDst && SDst->getReg() == AMDGPU::VCC; + } else { + return !TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + } + } return false; } @@ -641,6 +649,11 @@ if (Dst) { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); SDWAInst.add(*Dst); + } else { + Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + assert(Dst && + AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); + SDWAInst.add(*Dst); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and @@ -678,8 +691,12 @@ } // Initialize clamp. - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1); - SDWAInst.addImm(0); + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1) + SDWAInst.addImm(0); + + // Initialize omod. + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) + SDWAInst.addImm(0); // Initialize dst_sel and dst_unused if present if (Dst) { @@ -766,7 +783,7 @@ MRI = &MF.getRegInfo(); TRI = ST.getRegisterInfo(); TII = ST.getInstrInfo(); - + // Find all SDWA operands in MF. matchSDWAOperands(MF); Index: llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td @@ -93,11 +93,6 @@ let AsmMatchConverter = "cvtSdwaVOP1"; } -class VOP1_SDWA9_Pseudo pattern=[]> : - VOP_SDWA9_Pseudo { - let AsmMatchConverter = "cvtSdwaVOP1"; -} - class getVOP1Pat64 : LetDummies { list ret = !if(P.HasModifiers, @@ -117,7 +112,6 @@ def _e32 : VOP1_Pseudo ; def _e64 : VOP3_Pseudo .ret>; def _sdwa : VOP1_SDWA_Pseudo ; - def _sdwa9 : VOP1_SDWA9_Pseudo ; } // Special profile for instructions which have clamp @@ -274,12 +268,10 @@ let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel); - let InsSDWA9 = (ins Src0RC32:$vdst, Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, - clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel); let Asm32 = getAsm32<1, 1>.ret; let Asm64 = getAsm64<1, 1, 0, 1>.ret; @@ -545,8 +537,8 @@ VOP1_SDWAe (NAME#"_sdwa").Pfl>; def _sdwa_gfx9 : - VOP_SDWA9_Real (NAME#"_sdwa9")>, - VOP1_SDWA9Ae (NAME#"_sdwa9").Pfl>; + VOP_SDWA9_Real (NAME#"_sdwa")>, + VOP1_SDWA9Ae (NAME#"_sdwa").Pfl>; // For now left dpp only for asm/dasm // TODO: add corresponding pseudo Index: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td @@ -114,11 +114,6 @@ let AsmMatchConverter = "cvtSdwaVOP2"; } -class VOP2_SDWA9_Pseudo pattern=[]> : - VOP_SDWA9_Pseudo { - let AsmMatchConverter = "cvtSdwaVOP2"; -} - class getVOP2Pat64 : LetDummies { list ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, @@ -139,7 +134,6 @@ Commutable_REV; def _sdwa : VOP2_SDWA_Pseudo ; - def _sdwa9 : VOP2_SDWA9_Pseudo ; } multiclass VOP2bInst { let AsmMatchConverter = "cvtSdwaVOP2b"; } - - def _sdwa9 : VOP2_SDWA9_Pseudo { - let AsmMatchConverter = "cvtSdwaVOP2b"; - } } def _e64 : VOP3_Pseudo .ret>, @@ -221,17 +211,13 @@ VGPR_32:$src2, // stub argument dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, VGPR_32:$src2, // stub argument - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); - let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, - Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, - VGPR_32:$src2, // stub argument - clampmod:$clamp, omod:$omod, - dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm32 = getAsm32<1, 2, vt>.ret; let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret; let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret; @@ -289,15 +275,10 @@ let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); - let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, - Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, - clampmod:$clamp, omod:$omod, - dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel); - let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0, Src1Mod:$src1_modifiers, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, @@ -728,8 +709,8 @@ multiclass VOP2_SDWA9_Real op> { def _sdwa_gfx9 : - VOP_SDWA9_Real (NAME#"_sdwa9")>, - VOP2_SDWA9Ae (NAME#"_sdwa9").Pfl>; + VOP_SDWA9_Real (NAME#"_sdwa")>, + VOP2_SDWA9Ae (NAME#"_sdwa").Pfl>; } multiclass VOP2be_Real_e32e64_vi op> : Index: llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td @@ -113,11 +113,6 @@ let AsmMatchConverter = "cvtSdwaVOPC"; } -class VOPC_SDWA9_Pseudo pattern=[]> : - VOP_SDWA9_Pseudo { - let AsmMatchConverter = "cvtSdwaVOPC"; -} - // This class is used only with VOPC instructions. Use $sdst for out operand class VOPCInstAlias : InstAlias , PredicateControl { @@ -189,13 +184,6 @@ let isConvergent = DefExec; let isCompare = 1; } - - def _sdwa9 : VOPC_SDWA9_Pseudo { - let Defs = !if(DefExec, [VCC, EXEC], [VCC]); - let SchedRW = P.Schedule; - let isConvergent = DefExec; - let isCompare = 1; - } } def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>; @@ -540,14 +528,12 @@ VOPC_Profile { let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); let Asm64 = "$sdst, $src0_modifiers, $src1"; + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); - let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, - Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, - src0_sel:$src0_sel, src1_sel:$src1_sel); + let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel"; - //let AsmSDWA9 = " $sdst, $src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let HasSrc1Mods = 0; let HasClamp = 0; let HasOMod = 0; @@ -580,12 +566,6 @@ let SchedRW = p.Schedule; let isConvergent = DefExec; } - - def _sdwa9 : VOPC_SDWA9_Pseudo { - let Defs = !if(DefExec, [VCC, EXEC], [VCC]); - let SchedRW = p.Schedule; - let isConvergent = DefExec; - } } def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>; @@ -954,8 +934,8 @@ VOPC_SDWAe (NAME#"_sdwa").Pfl>; def _sdwa_gfx9 : - VOP_SDWA9_Real (NAME#"_sdwa9")>, - VOPC_SDWA9e (NAME#"_sdwa9").Pfl>; + VOP_SDWA9_Real (NAME#"_sdwa")>, + VOPC_SDWA9e (NAME#"_sdwa").Pfl>; def : VOPCInstAlias (NAME#"_e64"), !cast(NAME#"_e32_vi")> { Index: llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td @@ -300,6 +300,19 @@ let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); } +// GFX9 adds two features to SDWA: +// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD. +// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather +// than VGPRs (at most 1 can be an SGPR); +// b. OMOD is the standard output modifier (result *2, *4, /2) +// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This +// replaces OMOD and the dest fields with SD and SDST (SGPR destination) +// field. +// a. When SD=1, the SDST is used as the destination for the compare result; +// b. When SD=0, VCC is used. +// +// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA + // gfx9 SDWA basic encoding class VOP_SDWA9e : Enc64 { bits<9> src0; // {src0_sgpr{0}, src0{7-0}} @@ -353,6 +366,7 @@ string Mnemonic = opName; string AsmOperands = P.AsmSDWA; + string AsmOperands9 = P.AsmSDWA9; let Size = 8; let mayLoad = 0; @@ -372,53 +386,9 @@ VOPProfile Pfl = P; } -// GFX9 adds two features to SDWA: -// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD. -// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather -// than VGPRs (at most 1 can be an SGPR); -// b. OMOD is the standard output modifier (result *2, *4, /2) -// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This -// replaces OMOD and the dest fields with SD and SDST (SGPR destination) -// field. -// a. When SD=1, the SDST is used as the destination for the compare result; -// b.when SD=0, VCC is used. -// -// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA - -class VOP_SDWA9_Pseudo pattern=[]> : - InstSI , - VOP , - SIMCInstr , - MnemonicAlias { - - let isPseudo = 1; - let isCodeGenOnly = 1; - let UseNamedOperandTable = 1; - - string Mnemonic = opName; - string AsmOperands = P.AsmSDWA9; - - let Size = 8; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - - let VALU = 1; - let SDWA = 1; - let Uses = [EXEC]; - - let SubtargetPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst); - let AssemblerPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst); - let AsmVariantName = !if(P.HasSDWA9, AMDGPUAsmVariants.SDWA9, - AMDGPUAsmVariants.Disable); - let DecoderNamespace = "SDWA9"; - - VOPProfile Pfl = P; -} - class VOP_SDWA_Real : InstSI , - SIMCInstr { + SIMCInstr { let isPseudo = 0; let isCodeGenOnly = 0; @@ -431,6 +401,10 @@ let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; + // string Mnemonic = ps.Mnemonic; + // string AsmOperands = ps.AsmOperands; + // string AsmOperands9 = ps.AsmOperands9; + // Copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; let AssemblerPredicate = ps.AssemblerPredicate; @@ -443,9 +417,9 @@ let TSFlags = ps.TSFlags; } -class VOP_SDWA9_Real : - InstSI , - SIMCInstr { +class VOP_SDWA9_Real : + InstSI , + SIMCInstr { let isPseudo = 0; let isCodeGenOnly = 0; @@ -458,13 +432,15 @@ let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; + let SubtargetPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst); + let AssemblerPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst); + let AsmVariantName = !if(ps.Pfl.HasSDWA9, AMDGPUAsmVariants.SDWA9, + AMDGPUAsmVariants.Disable); + let DecoderNamespace = "SDWA9"; + // Copy relevant pseudo op flags - let SubtargetPredicate = ps.SubtargetPredicate; - let AssemblerPredicate = ps.AssemblerPredicate; let AsmMatchConverter = ps.AsmMatchConverter; - let AsmVariantName = ps.AsmVariantName; let UseNamedOperandTable = ps.UseNamedOperandTable; - let DecoderNamespace = ps.DecoderNamespace; let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; let TSFlags = ps.TSFlags;