diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1296,8 +1296,10 @@ int SDWA9_ID = 3; string DPP = "DPP"; int DPP_ID = 4; + string VOP3_DPP = "VOP3_DPP"; + int VOP3_DPP_ID = 5; string Disable = "Disable"; - int Disable_ID = 5; + int Disable_ID = 6; } def DefaultAMDGPUAsmParserVariant : AsmParserVariant { @@ -1325,6 +1327,11 @@ let Name = AMDGPUAsmVariants.DPP; } +def VOP3_DPPAsmParserVariant : AsmParserVariant { + let Variant = AMDGPUAsmVariants.VOP3_DPP_ID; + let Name = AMDGPUAsmVariants.VOP3_DPP; +} + def AMDGPU : Target { // Pull in Instruction Info: let InstructionSet = AMDGPUInstrInfo; @@ -1333,7 +1340,8 @@ VOP3AsmParserVariant, SDWAAsmParserVariant, SDWA9AsmParserVariant, - DPPAsmParserVariant]; + DPPAsmParserVariant, + VOP3_DPPAsmParserVariant]; let AssemblyWriters = [AMDGPUAsmWriter]; let AllowRegisterRenaming = 1; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -40,7 +40,7 @@ // instructions to not match without killing the whole decode process. It is // mainly used for ARM, but Tablegen expects this field to exist or it fails // to build the decode table. - field bits<64> SoftFail = 0; + field bits<96> SoftFail = 0; let DecoderNamespace = Namespace; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -25,6 +25,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -123,12 +124,6 @@ ImmTyD16, ImmTyClampSI, ImmTyOModSI, - ImmTyDPP8, - ImmTyDppCtrl, - ImmTyDppRowMask, - ImmTyDppBankMask, - ImmTyDppBoundCtrl, - ImmTyDppFi, ImmTySdwaDstSel, ImmTySdwaSrc0Sel, ImmTySdwaSrc1Sel, @@ -154,6 +149,12 @@ ImmTyOpSelHi, ImmTyNegLo, ImmTyNegHi, + ImmTyDPP8, + ImmTyDppCtrl, + ImmTyDppRowMask, + ImmTyDppBankMask, + ImmTyDppBoundCtrl, + ImmTyDppFi, ImmTySwizzle, ImmTyGprIdxMode, ImmTyHigh, @@ -267,6 +268,14 @@ return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); } + bool isRegOrInlineImmWithInt16InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); + } + + bool isRegOrInlineImmWithInt32InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); + } + bool isRegOrImmWithInt64InputMods() const { return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); } @@ -283,6 +292,15 @@ return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); } + bool isRegOrInlineImmWithFP16InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); + } + + bool isRegOrInlineImmWithFP32InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); + } + + bool isVReg() const { return isRegClass(AMDGPU::VGPR_32RegClassID) || isRegClass(AMDGPU::VReg_64RegClassID) || @@ -1745,6 +1763,8 @@ AMDGPUOperand::Ptr defaultFI() const; void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } + void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); + void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { cvtVOP3DPP(Inst, Operands, true); } OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, AMDGPUOperand::ImmTy Type); @@ -3222,7 +3242,8 @@ static ArrayRef getAllVariants() { static const unsigned Variants[] = { AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, - AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, + AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP }; return makeArrayRef(Variants); @@ -3230,6 +3251,10 @@ // What asm variants we should check ArrayRef AMDGPUAsmParser::getMatchedVariants() const { + if (isForcedDPP() && isForcedVOP3()) { + static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; + return makeArrayRef(Variants); + } if (getForcedEncodingSize() == 32) { static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; return makeArrayRef(Variants); @@ -3255,6 +3280,9 @@ } StringRef AMDGPUAsmParser::getMatchedVariantName() const { + if (isForcedDPP() && isForcedVOP3()) + return "e64_dpp"; + if (getForcedEncodingSize() == 32) return "e32"; @@ -5663,7 +5691,11 @@ setForcedDPP(false); setForcedSDWA(false); - if (Name.endswith("_e64")) { + if (Name.endswith("_e64_dpp")) { + setForcedDPP(true); + setForcedEncodingSize(64); + return Name.substr(0, Name.size() - 8); + } else if (Name.endswith("_e64")) { setForcedEncodingSize(64); return Name.substr(0, Name.size() - 4); } else if (Name.endswith("_e32")) { @@ -7831,10 +7863,6 @@ {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, - {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, - {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, - {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, - {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, @@ -7845,6 +7873,12 @@ {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, + {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, + {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, + {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, + {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, + {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, + {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, @@ -7915,6 +7949,10 @@ res = parseDim(Operands); } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { res = parseCPol(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { + res = parseDPP8(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { + res = parseDPPCtrl(Operands); } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { @@ -8550,6 +8588,70 @@ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); } +void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { + OptionalImmIndexMap OptionalIdx; + unsigned Opc = Inst.getOpcode(); + bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + int Fi = 0; + for (unsigned E = Operands.size(); I != E; ++I) { + auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), + MCOI::TIED_TO); + if (TiedTo != -1) { + assert((unsigned)TiedTo < Inst.getNumOperands()); + // handle tied old or src2 for MAC instructions + Inst.addOperand(Inst.getOperand(TiedTo)); + } + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + // Add the register arguments + if (IsDPP8 && Op.isFI()) { + Fi = Op.getImm(); + } else if (HasModifiers && + isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + } else if (Op.isImm() && + Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { + assert(!HasModifiers && "Case should be unreachable with modifiers"); + assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); + Op.addImmOperands(Inst, 1); + } else if (Op.isImm()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + llvm_unreachable("unhandled operand type"); + } + } + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + } + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + } + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); + } + + if (IsDPP8) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); + using namespace llvm::AMDGPU::DPP; + Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); + } else { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); + } + } +} + void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { OptionalImmIndexMap OptionalIdx; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H #define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H +#include "llvm/ADT/APInt.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCInst.h" @@ -28,6 +29,60 @@ class MCSubtargetInfo; class Twine; +// Exposes an interface expected by autogenerated code in +// FixedLenDecoderEmitter +class DecoderUInt128 { +private: + uint64_t Lo = 0; + uint64_t Hi = 0; + +public: + DecoderUInt128() = default; + DecoderUInt128(uint64_t Lo, uint64_t Hi = 0) : Lo(Lo), Hi(Hi) {} + operator bool() const { return Lo || Hi; } + void insertBits(uint64_t SubBits, unsigned BitPosition, unsigned NumBits) { + assert(NumBits && NumBits <= 64); + assert(SubBits >> 1 >> (NumBits - 1) == 0); + assert(BitPosition < 128); + if (BitPosition < 64) { + Lo |= SubBits << BitPosition; + Hi |= SubBits >> 1 >> (63 - BitPosition); + } else { + Hi |= SubBits << (BitPosition - 64); + } + } + uint64_t extractBitsAsZExtValue(unsigned NumBits, + unsigned BitPosition) const { + assert(NumBits && NumBits <= 64); + assert(BitPosition < 128); + uint64_t Val; + if (BitPosition < 64) + Val = Lo >> BitPosition | Hi << 1 << (63 - BitPosition); + else + Val = Hi >> (BitPosition - 64); + return Val & ((uint64_t(2) << (NumBits - 1)) - 1); + } + DecoderUInt128 operator&(const DecoderUInt128 &RHS) const { + return DecoderUInt128(Lo & RHS.Lo, Hi & RHS.Hi); + } + DecoderUInt128 operator&(const uint64_t &RHS) const { + return *this & DecoderUInt128(RHS); + } + DecoderUInt128 operator~() const { return DecoderUInt128(~Lo, ~Hi); } + bool operator==(const DecoderUInt128 &RHS) { + return Lo == RHS.Lo && Hi == RHS.Hi; + } + bool operator!=(const DecoderUInt128 &RHS) { + return Lo != RHS.Lo || Hi != RHS.Hi; + } + bool operator!=(const int &RHS) { + return *this != DecoderUInt128(RHS); + } + friend raw_ostream &operator<<(raw_ostream &OS, const DecoderUInt128 &RHS) { + return OS << APInt(128, {RHS.Lo, RHS.Hi}); + } +}; + //===----------------------------------------------------------------------===// // AMDGPUDisassembler //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -391,6 +391,17 @@ return Res; } +static inline DecoderUInt128 eat12Bytes(ArrayRef &Bytes) { + assert(Bytes.size() >= 12); + uint64_t Lo = support::endian::read( + Bytes.data()); + Bytes = Bytes.slice(8); + uint64_t Hi = support::endian::read( + Bytes.data()); + Bytes = Bytes.slice(4); + return DecoderUInt128(Lo, Hi); +} + // The disassembler is greedy, so we need to check FI operand value to // not parse a dpp if the correct literal is not set. For dpp16 the // autogenerated decoder checks the dpp literal @@ -421,6 +432,21 @@ // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 // encodings + if (isGFX11Plus() && Bytes.size() >= 12 ) { + DecoderUInt128 DecW = eat12Bytes(Bytes); + Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, + Address); + if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) + break; + MI = MCInst(); // clear + Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, + Address); + if (Res) + break; + } + // Reinitialize Bytes + Bytes = Bytes_.slice(0, MaxInstBytesNum); + if (Bytes.size() >= 8) { const uint64_t QW = eatBytes(Bytes); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -363,23 +363,22 @@ } void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { auto Opcode = MI->getOpcode(); auto Flags = MII.get(Opcode).TSFlags; - if (OpNo == 0) { - if (Flags & SIInstrFlags::VOP3) { + if (Flags & SIInstrFlags::VOP3 && Flags & SIInstrFlags::DPP) + O << "_e64_dpp"; + else if (Flags & SIInstrFlags::VOP3) { if (!getVOP3IsSingle(Opcode)) O << "_e64"; - } else if (Flags & SIInstrFlags::DPP) { + } else if (Flags & SIInstrFlags::DPP) O << "_dpp"; - } else if (Flags & SIInstrFlags::SDWA) { + else if (Flags & SIInstrFlags::SDWA) O << "_sdwa"; - } else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) || - ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode))) { + else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) || + ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode))) O << "_e32"; - } O << " "; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCCODEEMITTER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCCODEEMITTER_H +#include "llvm/ADT/APInt.h" #include "llvm/MC/MCCodeEmitter.h" #include @@ -34,46 +35,34 @@ AMDGPUMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {} public: + void getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl &Fixups, + APInt &Inst, APInt &Scratch, + const MCSubtargetInfo &STI) const; - uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + virtual void getMachineOpValue(const MCInst &MI, const MCOperand &MO, + APInt &Op, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const = 0; - virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } + virtual void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const = 0; - virtual unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, + virtual void getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } + const MCSubtargetInfo &STI) const = 0; - virtual unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } + virtual void getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const = 0; - virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + virtual void getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } - - virtual unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } - - virtual unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 0; - } + const MCSubtargetInfo &STI) const = 0; + + virtual void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const = 0; protected: FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -17,6 +17,8 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/APInt.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" @@ -48,34 +50,38 @@ const MCSubtargetInfo &STI) const override; /// \returns the encoding for an MCOperand. - uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; + void getMachineOpValue(const MCInst &MI, const MCOperand &MO, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; /// Use a fixup to encode the simm16 field for SOPP branch /// instructions. - unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, + void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + void getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; - unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; + void getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; - unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + void getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; - unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; - - unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; + void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; private: uint64_t getImplicitOpSelHiEncoding(int Opcode) const; + void getMachineOpValueCommon(const MCInst &MI, const MCOperand &MO, + unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; }; } // end anonymous namespace @@ -310,8 +316,9 @@ computeAvailableFeatures(STI.getFeatureBits())); int Opcode = MI.getOpcode(); - uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI); - const MCInstrDesc &Desc = MCII.get(Opcode); + APInt Encoding, Scratch; + getBinaryCodeForInstr(MI, Fixups, Encoding, Scratch, STI); + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); unsigned bytes = Desc.getSize(); // Set unused op_sel_hi bits to 1 for VOP3P and MAI instructions. @@ -323,7 +330,7 @@ } for (unsigned i = 0; i < bytes; i++) { - OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); + OS.write((uint8_t)Encoding.extractBitsAsZExtValue(8, 8 * i)); } // NSA encoding. @@ -336,9 +343,11 @@ unsigned NumExtraAddrs = srsrc - vaddr0 - 1; unsigned NumPadding = (-NumExtraAddrs) & 3; - for (unsigned i = 0; i < NumExtraAddrs; ++i) - OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i), - Fixups, STI)); + for (unsigned i = 0; i < NumExtraAddrs; ++i) { + getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i), Encoding, Fixups, + STI); + OS.write((uint8_t)Encoding.getLimitedValue()); + } for (unsigned i = 0; i < NumPadding; ++i) OS.write(0); } @@ -386,34 +395,36 @@ } } -unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br; Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); - return 0; + Op = APInt::getNullValue(96); + } else { + getMachineOpValue(MI, MO, Op, Fixups, STI); } - - return getMachineOpValue(MI, MO, Fixups, STI); } -unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { auto Offset = MI.getOperand(OpNo).getImm(); // VI only supports 20-bit unsigned offsets. assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset)); - return Offset; + Op = Offset; } -unsigned -SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { using namespace AMDGPU::SDWA; uint64_t RegEnc = 0; @@ -427,23 +438,24 @@ if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) { RegEnc |= SDWA9EncValues::SRC_SGPR_MASK; } - return RegEnc; + Op = RegEnc; + return; } else { const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI); if (Enc != ~0U && Enc != 255) { - return Enc | SDWA9EncValues::SRC_SGPR_MASK; + Op = Enc | SDWA9EncValues::SRC_SGPR_MASK; + return; } } llvm_unreachable("Unsupported operand kind"); - return 0; } -unsigned -SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { using namespace AMDGPU::SDWA; uint64_t RegEnc = 0; @@ -456,13 +468,13 @@ RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK; RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK; } - return RegEnc; + Op = RegEnc; } -unsigned -SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo, + APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { unsigned Reg = MI.getOperand(OpNo).getReg(); uint64_t Enc = MRI.getEncodingValue(Reg); @@ -481,7 +493,7 @@ MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg)) Enc |= 512; - return Enc; + Op = Enc; } static bool needsPCRel(const MCExpr *Expr) { @@ -507,12 +519,21 @@ llvm_unreachable("invalid kind"); } -uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, - const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (MO.isReg()) - return MRI.getEncodingValue(MO.getReg()); +void SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()){ + Op = MRI.getEncodingValue(MO.getReg()); + return; + } + unsigned OpNo = &MO - MI.begin(); + getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI); +} + +void SIMCCodeEmitter::getMachineOpValueCommon( + const MCInst &MI, const MCOperand &MO, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) { // FIXME: If this is expression is PCRel or not should not depend on what @@ -535,28 +556,22 @@ uint32_t Offset = Desc.getSize(); assert(Offset == 4 || Offset == 8); - Fixups.push_back( - MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc())); - } - - // Figure out the operand number, needed for isSrcOperand check - unsigned OpNo = 0; - for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) { - if (&MO == &MI.getOperand(OpNo)) - break; + Fixups.push_back(MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc())); } const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); if (AMDGPU::isSISrcOperand(Desc, OpNo)) { uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI); - if (Enc != ~0U) - return Enc; - - } else if (MO.isImm()) - return MO.getImm(); + if (Enc != ~0U) { + Op = Enc; + return; + } + } else if (MO.isImm()) { + Op = MO.getImm(); + return; + } llvm_unreachable("Encoding of this operand type is not supported yet."); - return 0; } #define ENABLE_INSTR_PREDICATE_VERIFIER diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -264,9 +264,10 @@ VOP3 = 1, SDWA = 2, SDWA9 = 3, - DPP = 4 + DPP = 4, + VOP3_DPP = 5 }; -} +} // namespace AMDGPUAsmVariants namespace AMDGPU { namespace EncValues { // Encoding values of enum9/8/7 operands diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -270,6 +270,11 @@ int Size = 8; } +class Enc96 { + field bits<96> Inst; + int Size = 12; +} + def CPolBit { int GLC = 0; int SLC = 1; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1272,14 +1272,6 @@ def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>; -def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>; - -def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; -def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; -def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; -def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; -def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>; - def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>; def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>; def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>; @@ -1290,6 +1282,14 @@ def neg_lo0 : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>; def neg_hi0 : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>; +def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>; +def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; + +def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; +def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; +def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; +def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>; + def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>; def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>; def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>; @@ -1335,10 +1335,18 @@ let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; } +class FPVCSrcInputModsMatchClass : FPInputModsMatchClass { + let Name = "RegOrInlineImmWithFP"#opSize#"InputMods"; + let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; +} + def FP16InputModsMatchClass : FPInputModsMatchClass<16>; def FP32InputModsMatchClass : FPInputModsMatchClass<32>; def FP64InputModsMatchClass : FPInputModsMatchClass<64>; +def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>; +def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; + class InputMods : Operand { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_INPUT_MODS"; @@ -1353,19 +1361,28 @@ def FP32InputMods : FPInputMods; def FP64InputMods : FPInputMods; +def FP16VCSrcInputMods : FPInputMods; +def FP32VCSrcInputMods : FPInputMods; + class IntInputModsMatchClass : AsmOperandClass { let Name = "RegOrImmWithInt"#opSize#"InputMods"; let ParserMethod = "parseRegOrImmWithIntInputMods"; let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; } +class IntVCSrcInputModsMatchClass : IntInputModsMatchClass { + let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; + let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; +} def Int32InputModsMatchClass : IntInputModsMatchClass<32>; def Int64InputModsMatchClass : IntInputModsMatchClass<64>; +def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; class IntInputMods : InputMods { let PrintMethod = "printOperandAndIntInputMods"; } def Int32InputMods : IntInputMods; def Int64InputMods : IntInputMods; +def Int32VCSrcInputMods : IntInputMods; class OpSelModsMatchClass : AsmOperandClass { let Name = "OpSelMods"; @@ -1692,6 +1709,19 @@ ); } +// Src2 of VOP3 DPP instructions cannot be a literal +class getVOP3DPPSrcForVT { + bit isFP = isFloatType.ret; + RegisterOperand ret = + !if (!eq(VT.Value, i1.Value), SSrc_i1, + !if (isFP, + !if (!eq(VT.Value, f16.Value), VCSrc_f16, + !if (!eq(VT.Value, v2f16.Value), VCSrc_v2f16, VCSrc_f32)), + !if (!eq(VT.Value, i16.Value), VCSrc_b16, + !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16, + VCSrc_b32)))); +} + // Float or packed int class isModifierType { bit ret = !or(!eq(SrcVT.Value, f16.Value), @@ -1732,6 +1762,17 @@ Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } +// Return type of input modifiers operand for specified input operand for DPP +class getSrcModVOP3DPP { + bit isFP = isFloatType.ret; + bit isPacked = isPackedType.ret; + Operand ret = + !if (isFP, + !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods, + FP32VCSrcInputMods), + !if (EnableF32SrcMods, FP32VCSrcInputMods, Int32VCSrcInputMods)); +} + // Return type of input modifiers operand specified input operand for SDWA class getSrcModSDWA { Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, @@ -1864,8 +1905,8 @@ } class getInsDPPBase { + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { dag ret = !if (!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) @@ -1879,6 +1920,7 @@ // VOP1_DPP without modifiers (ins OldRC:$old, Src0RC:$src0) /* endif */), + !if (!eq(NumSrcArgs, 2), !if (HasModifiers, // VOP2_DPP with modifiers (ins OldRC:$old, @@ -1888,34 +1930,72 @@ // VOP2_DPP without modifiers (ins OldRC:$old, Src0RC:$src0, Src1RC:$src1) - ))); + ) + /* NumSrcArgs == 3, VOP3 */, + !if (HasModifiers, + // VOP3_DPP with modifiers + (ins OldRC:$old, + Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2) + /* else */, + // VOP3_DPP without modifiers + (ins OldRC:$old, + Src0RC:$src0, Src1RC:$src1, + Src2RC:$src2) + ) + /* endif */))); } class getInsDPP { - dag ret = !con(getInsDPPBase.ret, + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + dag ret = !con(getInsDPPBase.ret, (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, - bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); } class getInsDPP16 { - dag ret = !con(getInsDPP.ret, + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + dag ret = !con(getInsDPP.ret, (ins FI:$fi)); } class getInsDPP8 { - dag ret = !con(getInsDPPBase.ret, + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + dag ret = !con(getInsDPPBase.ret, (ins dpp8:$dpp8, FI:$fi)); } +class getInsVOP3DPPBase { + dag old = ( ins OldRC:$old ); + dag base = VOP3Base; + dag ret = !con( + !if(!ne(NumSrcArgs, 0), old, (ins)), + base + ); +} + +class getInsVOP3DPP { + dag ret = !con(getInsVOP3DPPBase.ret, + (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); +} + +class getInsVOP3DPP16 { + dag ret = !con(getInsVOP3DPP.ret, + (ins FI:$fi)); +} + +class getInsVOP3DPP8 { + dag ret = !con(getInsVOP3DPPBase.ret, + (ins dpp8:$dpp8, FI:$fi)); +} // Ins for SDWA class getInsSDWA - : getAsmDPP { + : getAsmDPP{ let ret = dst#args#" $dpp8$fi"; } +class getAsmVOP3DPPBase { + string dst = !if(HasDst, + !if(!eq(DstVT.Size, 1), + "$sdst", + "$vdst"), + ""); // use $sdst for VOPC + string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); + string isrc1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1", + " $src1,")); + string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); + + string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); + string fsrc1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1_modifiers", + " $src1_modifiers,")); + string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); + + string src0 = !if(Src0HasMods, fsrc0, isrc0); + string src1 = !if(Src1HasMods, fsrc1, isrc1); + string src2 = !if(Src2HasMods, fsrc2, isrc2); + string opsel = !if(HasOpSel, "$op_sel", ""); + string 3PMods = !if(IsVOP3P, + !if(HasOpSel, "$op_sel_hi", "") + #!if(HasModifiers, "$neg_lo$neg_hi", ""), + ""); + string clamp = !if(HasClamp, "$clamp", ""); + string omod = !if(HasOMod, "$omod", ""); + + string ret = dst#", "#src0#src1#src2#opsel#3PMods#clamp#omod; + +} + +class getAsmVOP3DPP { + string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; +} + +class getAsmVOP3DPP16 { + string ret = getAsmVOP3DPP.ret # "$fi"; +} + +class getAsmVOP3DPP8 { + string ret = base # " $dpp8$fi"; +} + class getAsmSDWA { string dst = !if(HasDst, @@ -2219,6 +2347,24 @@ bit ret = !if(a, !if(b, 1, 0), 0); } +class getHasVOP3DPP { + bit ret = !if(!eq(DstVT.Size, 64), + 0, // 64-bit dst No DPP for 64-bit operands + !if(!eq(Src0VT.Size, 64), + 0, // 64-bit src0 + !if(!eq(Src1VT.Size, 64), + 0, // 64-bit src1 + !if(!eq(Src2VT.Size, 64), + 0, // 64-bit src2 + 1 + ) + ) + ) + ); +} + + def PatGenMode { int NoPattern = 0; int Pattern = 1; @@ -2246,6 +2392,10 @@ field RegisterOperand Src2RC64 = getVOP3SrcForVT.ret; field RegisterClass Src0DPP = getVregSrcForVT.ret; field RegisterClass Src1DPP = getVregSrcForVT.ret; + field RegisterClass Src2DPP = getVregSrcForVT.ret; + field RegisterOperand Src0VOP3DPP = VGPRSrc_32; + field RegisterOperand Src1VOP3DPP = VGPRSrc_32; + field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT.ret; field RegisterOperand Src0SDWA = getSDWASrcForVT.ret; field RegisterOperand Src1SDWA = getSDWASrcForVT.ret; field Operand Src0Mod = getSrcMod.ret; @@ -2253,6 +2403,8 @@ field Operand Src2Mod = getSrcMod.ret; field Operand Src0ModDPP = getSrcModDPP.ret; field Operand Src1ModDPP = getSrcModDPP.ret; + field Operand Src2ModDPP = getSrcModDPP.ret; + field Operand Src2ModVOP3DPP = getSrcModVOP3DPP.ret; field Operand Src0ModSDWA = getSrcModSDWA.ret; field Operand Src1ModSDWA = getSrcModSDWA.ret; @@ -2300,7 +2452,9 @@ field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); field bit HasExt = getHasExt.ret; - field bit HasExtDPP = getHasDPP.ret; + field bit HasExtVOP3DPP = getHasVOP3DPP.ret; + field bit HasExtDPP = !if(!or(getHasDPP.ret, + HasExtVOP3DPP), 1, 0); field bit HasExt32BitDPP = getHasExt32BitDPP.ret; field bit HasExt64BitDPP = getHasExt64BitDPP.ret; field bit HasExtSDWA = getHasSDWA.ret; @@ -2324,6 +2478,8 @@ field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs)); field dag OutsDPP = getOutsDPP.ret; field dag OutsDPP8 = getOutsDPP.ret; + field dag OutsVOP3DPP = OutsDPP; + field dag OutsVOP3DPP8 = OutsDPP8; field dag OutsSDWA = getOutsSDWA.ret; field dag Ins32 = getIns32.ret; @@ -2339,18 +2495,24 @@ getOpSelMod.ret, getOpSelMod.ret>.ret; field dag InsDPP = !if(HasExtDPP, - getInsDPP.ret, + getInsDPP.ret, (ins)); - field dag InsDPP16 = getInsDPP16.ret; - field dag InsDPP8 = getInsDPP8.ret; + field dag InsDPP16 = getInsDPP16.ret; + field dag InsDPP8 = getInsDPP8.ret; + field dag InsVOP3Base = getInsVOP3Base.ret; + field dag InsVOP3DPP = getInsVOP3DPP.ret; + field dag InsVOP3DPP16 = getInsVOP3DPP16.ret; + field dag InsVOP3DPP8 = getInsVOP3DPP8.ret; field dag InsSDWA = getInsSDWA.ret; - field string Asm32 = getAsm32.ret; field string Asm64 = getAsm64.ret; field string AsmVOP3P = getAsmVOP3P.ret; @@ -2365,15 +2527,21 @@ // DPP8 encoding has no fields for modifiers, and it is enforced by setting // the asm operand name via this HasModifiers flag field string AsmDPP8 = getAsmDPP8.ret; + field string AsmVOP3DPPBase = getAsmVOP3DPPBase.ret; + field string AsmVOP3DPP = getAsmVOP3DPP.ret; + field string AsmVOP3DPP16 = getAsmVOP3DPP16.ret; + field string AsmVOP3DPP8 = getAsmVOP3DPP8.ret; field string AsmSDWA = getAsmSDWA.ret; field string AsmSDWA9 = getAsmSDWA9.ret; - field string TieRegDPP = "$old"; } -class VOP_NO_EXT : VOPProfile { + class VOP_NO_EXT : VOPProfile { let HasExt = 0; let HasExtDPP = 0; + let HasExtVOP3DPP = 0; let HasExt32BitDPP = 0; let HasExt64BitDPP = 0; let HasExtSDWA = 0; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1068,6 +1068,15 @@ let DecoderMethod = "DecodeVS_32RegisterClass"; } +//===----------------------------------------------------------------------===// +// VGPRSrc_* +//===----------------------------------------------------------------------===// + +// An 8-bit RegisterOperand wrapper for a VGPR +def VGPRSrc_32 : RegisterOperand { + let DecoderMethod = "DecodeVGPR_32RegisterClass"; +} + //===----------------------------------------------------------------------===// // ASrc_* Operands with an AccVGPR //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -369,6 +369,16 @@ let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; let AsmDPP16 = AsmDPP#"$fi"; + let InsDPP = (ins DstRCDPP:$old, + Src0DPP:$src0, + Src1DPP:$src1, + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsDPP16 = !con(InsDPP, (ins FI:$fi)); + let InsDPP8 = (ins DstRCDPP:$old, + Src0DPP:$src0, + Src1DPP:$src1, + dpp8:$dpp8, FI:$fi); let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); } @@ -376,6 +386,7 @@ // Write out to vcc or arbitrary SGPR and read in from vcc or // arbitrary SGPR. def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> { + let HasSrc2Mods = 0; let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; @@ -402,6 +413,10 @@ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); + let InsDPP8 = (ins DstRCDPP:$old, + Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + Src1ModDPP:$src1_modifiers, Src1DPP:$src1, + dpp8:$dpp8, FI:$fi); let HasExt = 1; let HasExtDPP = 1; @@ -411,7 +426,7 @@ } // Read in from vcc or arbitrary SGPR. -def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> { +class VOP2e_SGPR ArgVT> : VOPProfile { let Asm32 = "$vdst, $src0, $src1"; let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; @@ -439,6 +454,8 @@ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); + let InsDPP8 = (ins DstRCDPP:$old, Src0DPP:$src0, Src1DPP:$src1, + dpp8:$dpp8, FI:$fi); let HasExt = 1; let HasExtDPP = 1; @@ -447,7 +464,9 @@ let HasExtSDWA9 = 1; } -def VOP_READLANE : VOPProfile<[i32, i32, i32]> { +def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; + +def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { let Outs32 = (outs SReg_32:$vdst); let Outs64 = Outs32; let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -11,6 +11,8 @@ let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in { def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> { let Outs64 = (outs DstRC.RegClass:$vdst); + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; } def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> { let Outs64 = (outs DstRC.RegClass:$vdst); @@ -21,6 +23,8 @@ let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod"; let IsSingle = 1; + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; } def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile; @@ -28,16 +32,20 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> { let HasClamp = 1; - let IsSingle = 1; + let IsSingle = 1; let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; } class V_MUL_PROF : VOP3_Profile

{ + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; } def DIV_FIXUP_F32_PROF : VOP3_Profile { + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; } //===----------------------------------------------------------------------===// @@ -569,6 +577,8 @@ IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2, VGPR_32:$vdst_in, op_sel0:$op_sel); let HasClamp = 0; + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; } class PermlanePat : VOP3_Profile { + // FIXME VOP3 DPP versions are unsupported + let HasExtVOP3DPP = 0; let HasClamp = 0; let HasOMod = 0; let InsVOP3OpSel = getInsVOP3OpSel; defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>; defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>; -defm V_DOT2_F16_F16 : VOP3_Realtriple_gfx11<0x266>; -defm V_DOT2_BF16_BF16 : VOP3_Realtriple_gfx11<0x267>; +// FIXME VOP3 DPP Dot instructions are unsupported +defm V_DOT2_F16_F16 : VOP3_Real_Base_gfx11<0x266>; +defm V_DOT2_BF16_BF16 : VOP3_Real_Base_gfx11<0x267>; defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">; defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -658,8 +658,56 @@ let Inst{63-60} = row_mask; } -class VOP_DPP_Pseudo pattern=[]> : - InstSI , +class VOP3_DPPe_Fields_Base { + bits<9> dpp_ctrl; + bits<1> bound_ctrl; + bits<4> bank_mask; + bits<4> row_mask; + bit fi; +} +class VOP3_DPPe_Fields : VOP3_DPPe_Fields_Base { + bits<8> src0; +} + +// Common refers to common between DPP and DPP8 +class VOP3_DPPe_Common_Base op, VOPProfile P> : Enc96 { + bits<4> src0_modifiers; + bits<3> src1_modifiers; + bits<3> src2_modifiers; + bits<1> clamp; + bits<2> omod; + + let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); + let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); + let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); + // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs. + let Inst{11} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{2}, 0),?); + let Inst{12} = !if(P.HasOpSel,!if(P.HasSrc1Mods, src1_modifiers{2}, 0),?); + let Inst{13} = !if(P.HasOpSel,!if(P.HasSrc2Mods, src2_modifiers{2}, 0),?); + let Inst{14} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{3}, 0),?); + let Inst{15} = !if(P.HasClamp, clamp, 0); + let Inst{25-16} = op; + let Inst{31-26} = 0x35; + + let Inst{60-59} = !if(P.HasOMod, omod, 0); + let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); + let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); + let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); +} + +class VOP3_DPPe_Common op, VOPProfile P> : VOP3_DPPe_Common_Base { + bits<8> vdst; + bits<9> src1; + bits<9> src2; + + let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{49-41} = !if(P.HasSrc1, src1, 0); + let Inst{58-50} = !if(P.HasSrc2, src2, 0); +} + +class VOP_DPP_Pseudo pattern=[], + dag Ins = P.InsDPP, string asmOps = P.AsmDPP> : + InstSI , VOP , SIMCInstr { @@ -682,7 +730,7 @@ let isConvergent = 1; string Mnemonic = OpName; - string AsmOperands = P.AsmDPP; + string AsmOperands = asmOps; let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", ""); let SubtargetPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP); @@ -696,6 +744,17 @@ VOPProfile Pfl = P; } +class VOP3_DPP_Pseudo : + VOP_DPP_Pseudo { + let PseudoInstr = OpName#"_e64"#"_dpp"; + let OutOperandList = P.OutsVOP3DPP; + let Size = 12; + let VOP3 = 1; + let AsmMatchConverter = "cvtVOP3DPP"; + let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP, + AMDGPUAsmVariants.Disable); +} + class VOP_DPP_Real : InstSI , SIMCInstr { @@ -730,11 +789,10 @@ let TRANS = ps.TRANS; } -class VOP_DPP : - InstSI , - VOP_DPPe { +class VOP_DPP_Base : + InstSI { let mayLoad = 0; let mayStore = 0; @@ -755,6 +813,40 @@ let DecoderNamespace = "DPP"; } +class VOP_DPP : + VOP_DPP_Base, VOP_DPPe; + +class VOP3_DPP_Base : + VOP_DPP_Base { + let OutOperandList = P.OutsVOP3DPP; + let AsmMatchConverter = "cvtVOP3DPP"; + let VOP3 = 1; + let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP, + AMDGPUAsmVariants.Disable); + let Size = 12; +} + +class VOP3_DPP op, string OpName, VOPProfile P, bit IsDPP16, + dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP), + string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> : + VOP3_DPP_Base, VOP3_DPPe_Common, + VOP3_DPPe_Fields { + + let Inst{40-32} = 0xfa; + let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{80-72} = dpp_ctrl; + let Inst{82} = !if(IsDPP16, fi, ?); + let Inst{83} = bound_ctrl; + + // Inst{87-84} ignored by hw + let Inst{91-88} = bank_mask; + let Inst{95-92} = row_mask; +} + class VOP_DPP8e : Enc64 { bits<8> src0; bits<24> dpp8; @@ -764,9 +856,14 @@ let Inst{63-40} = dpp8{23-0}; } -class VOP_DPP8 : - InstSI, - VOP_DPP8e

{ +class VOP3_DPP8e_Fields { + bits<8> src0; + bits<24> dpp8; + bits<9> fi; +} + +class VOP_DPP8_Base : + InstSI { let mayLoad = 0; let mayStore = 0; @@ -780,12 +877,34 @@ let AsmMatchConverter = "cvtDPP8"; let SubtargetPredicate = HasDPP8; let AssemblerPredicate = HasDPP8; - let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP, - AMDGPUAsmVariants.Disable); + let AsmVariantName = AMDGPUAsmVariants.DPP; let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", ""); let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, ""); } +class VOP_DPP8 : + VOP_DPP8_Base, VOP_DPP8e

; + +class VOP3_DPP8_Base : + VOP_DPP8_Base { + let OutOperandList = P.OutsVOP3DPP8; + let AsmMatchConverter = "cvtVOP3DPP8"; + let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP, + AMDGPUAsmVariants.Disable); + let VOP3 = 1; + let Size = 12; +} + + +class VOP3_DPP8 op, string OpName, VOPProfile P> : + VOP3_DPP8_Base, VOP3_DPPe_Common, + VOP3_DPP8e_Fields { + + let Inst{40-32} = fi; + let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{95-72} = dpp8{23-0}; +} + def DPP8Mode { int FI_0 = 0xE9; int FI_1 = 0xEA; @@ -1059,6 +1178,52 @@ multiclass VOP3Inst { def _e64 : VOP3InstBase; + let SubtargetPredicate = isGFX11Plus in { + foreach _ = BoolToList.ret in + def _e64_dpp : VOP3_DPP_Pseudo ; + } // end SubtargetPredicate = isGFX11Plus +} + +//===----------------------------------------------------------------------===// +// VOP3 DPP +//===----------------------------------------------------------------------===// + +class Base_VOP3_DPP16 op, VOP_DPP_Pseudo ps, string opName = ps.OpName> + : VOP3_DPP { + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + let AssemblerPredicate = HasDPP16; + let SubtargetPredicate = HasDPP16; + let OtherPredicates = ps.OtherPredicates; +} + +class VOP3_DPP16 op, VOP_DPP_Pseudo ps, int subtarget, + string opName = ps.OpName> + : Base_VOP3_DPP16, SIMCInstr; + +class Base_VOP3_DPP8 op, VOP_Pseudo ps, string opName = ps.OpName> + : VOP3_DPP8 { + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + + let OtherPredicates = ps.OtherPredicates; +} + +class Base_VOP3b_DPP16 op, VOP_DPP_Pseudo ps, + string opName = ps.OpName> + : Base_VOP3_DPP16 { + bits<7> sdst; + let Inst{14 - 8} = sdst; +} + +class VOP3b_DPP8_Base op, VOP_Pseudo ps, string opName = ps.OpName> + : Base_VOP3_DPP8 { + bits<7> sdst; + let Inst{14 - 8} = sdst; } //===----------------------------------------------------------------------===// @@ -1105,6 +1270,31 @@ VOP3_Real, VOP3e_gfx11; } + multiclass VOP3_Real_dpp_Base_gfx11 op, string opName = NAME> { + def _e64_dpp_gfx11 : VOP3_DPP16(opName#"_e64"#"_dpp"), SIEncodingFamily.GFX11> { + let DecoderNamespace = "DPPGFX11"; + } + } + multiclass VOP3_Real_dpp_with_name_gfx11 op, string opName, + string asmName> { + defvar ps = !cast(opName#"_e64"); + let AsmString = asmName # ps.Pfl.AsmVOP3DPP16, DecoderNamespace = "DPPGFX11" in { + defm NAME : VOP3_Real_dpp_Base_gfx11; + } + } + multiclass VOP3_Real_dpp8_Base_gfx11 op, string opName = NAME> { + defvar ps = !cast(opName#"_e64"); + def _e64_dpp8_gfx11 : Base_VOP3_DPP8 { + let DecoderNamespace = "DPP8GFX11"; + } + } + multiclass VOP3_Real_dpp8_with_name_gfx11 op, string opName, + string asmName> { + defvar ps = !cast(opName#"_e64"); + let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8GFX11" in { + defm NAME : VOP3_Real_dpp8_Base_gfx11; + } + } multiclass VOP3be_Real_gfx11 op, string opName, string asmName, bit isSingle = 0> { defvar ps = !cast(opName#"_e64"); @@ -1113,19 +1303,37 @@ VOP3_Real, VOP3be_gfx11 ; } + multiclass VOP3be_Real_dpp_gfx11 op, string opName, string asmName> { + defvar ps = !cast(opName #"_e64"); + defvar dpp_ps = !cast(opName #"_e64" #"_dpp"); + def _e64_dpp_gfx11 : Base_VOP3b_DPP16, + SIMCInstr { + let DecoderNamespace = "DPPGFX11"; + } + } + multiclass VOP3be_Real_dpp8_gfx11 op, string opName, string asmName> { + defvar ps = !cast(opName #"_e64"); + def _e64_dpp8_gfx11 : VOP3b_DPP8_Base { + let DecoderNamespace = "DPP8GFX11"; + } + } } // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" // VOP1 and VOP2 depend on these triple defs multiclass VOP3_Realtriple_gfx11 op, bit isSingle = 0, string opName = NAME> : - VOP3_Real_Base_gfx11; + VOP3_Real_Base_gfx11, + VOP3_Real_dpp_Base_gfx11, + VOP3_Real_dpp8_Base_gfx11; multiclass VOP3Only_Realtriple_gfx11 op> : VOP3_Realtriple_gfx11; multiclass VOP3_Realtriple_with_name_gfx11 op, string opName, string asmName, bit isSingle = 0> : - VOP3_Real_with_name_gfx11; + VOP3_Real_with_name_gfx11, + VOP3_Real_dpp_with_name_gfx11, + VOP3_Real_dpp8_with_name_gfx11; multiclass VOP3Only_Realtriple_with_name_gfx11 op, string opName, string asmName> : @@ -1134,7 +1342,9 @@ multiclass VOP3be_Realtriple_gfx11< bits<10> op, bit isSingle = 0, string opName = NAME, string asmName = !cast(opName#"_e64").Mnemonic> : - VOP3be_Real_gfx11; + VOP3be_Real_gfx11, + VOP3be_Real_dpp_gfx11, + VOP3be_Real_dpp8_gfx11; multiclass VOP3beOnly_Realtriple_gfx11 op> : VOP3be_Realtriple_gfx11; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_dpp.s b/llvm/test/MC/AMDGPU/gfx11_asm_dpp.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_dpp.s @@ -0,0 +1,57 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s + +; DPP8 + +; VOP3 +v_bfe_u32_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0x00,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] + +v_maxmin_f32_e64_dpp v0, v1, v2, v3 clamp dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0x00,0x80,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] + +v_maxmin_f32_e64_dpp v0, v1, v2, v3 div:2 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0x00,0x00,0x5e,0xd6,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] + +v_minmax_f32_e64_dpp v0, -v1, -v2, -v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0x00,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0xe4,0x01,0x88,0xc6,0xfa] + +v_minmax_f32_e64_dpp v0, abs(v1), v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0x00,0x01,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] + +v_fma_f32_e64_dpp v80, v81, v82, v81 dpp8:[0,1,6,3,4,5,6,7] +// GFX11: encoding: [0x50,0x00,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa] + +v_fma_f32_e64_dpp v80, v81, abs(v82), v81 dpp8:[0,1,6,3,4,5,6,7] +// GFX11: encoding: [0x50,0x02,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa] + +v_max3_f32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, s4 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x12,0x00,0x01,0x77,0x39,0x05] + +; DPP + + +; VOP3 +v_bfe_u32_e64_dpp v0, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0x00,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x00] + +v_fma_f32_e64_dpp v93, abs(v94), v95, v94 quad_perm:[3,2,1,0] bank_mask:0xe +// GFX11: encoding: [0x5d,0x01,0x13,0xd6,0xfa,0xbe,0x7a,0x05,0x5e,0x1b,0x00,0xfe] + +v_sub_nc_i32_e64_dpp v93, v94, v95 row_ror:7 bank_mask:0x1 bound_ctrl:0 +// GFX11: encoding: [0x5d,0x00,0x25,0xd7,0xfa,0xbe,0x02,0x00,0x5e,0x27,0x09,0xf1] + +v_lshl_or_b32_e64_dpp v255, v5, v0, vcc_hi row_xmask:0x6 row_mask:0x0 fi:1 +// GFX11: encoding: [0xff,0x00,0x56,0xd6,0xfa,0x00,0xae,0x01,0x05,0x66,0x05,0x0f] + +v_cubesc_f32_e64_dpp v5, v1, v2, 1 row_shr:4 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x06,0x02,0x01,0x14,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, s2 row_shr:4 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0a,0x00,0x01,0x14,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_err.s b/llvm/test/MC/AMDGPU/gfx11_err.s --- a/llvm/test/MC/AMDGPU/gfx11_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_err.s @@ -35,3 +35,12 @@ global_atomic_cmpswap_x2 v[1:4], v3, v[5:8], off offset:2047 glc // GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cubesc_f32_e64_dpp v5, v1, v2, 12345678 row_shr:4 row_mask:0xf bank_mask:0xf +// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add3_u32_e64_dpp v5, v1, v2, 49812340 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add3_u32_e64_dpp v5, v1, s1, v0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx9-asm-err.s b/llvm/test/MC/AMDGPU/gfx9-asm-err.s --- a/llvm/test/MC/AMDGPU/gfx9-asm-err.s +++ b/llvm/test/MC/AMDGPU/gfx9-asm-err.s @@ -31,7 +31,7 @@ // GFX9ERR: error: literal operands are not supported v_cvt_u32_f64 v5, v[0:1] quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf -// GFX9ERR: error: not a valid operand. +// GFX9ERR: error: invalid operand for instruction global_load_lds_dword v[2:3], off // GFX9ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/vop_dpp.s b/llvm/test/MC/AMDGPU/vop_dpp.s --- a/llvm/test/MC/AMDGPU/vop_dpp.s +++ b/llvm/test/MC/AMDGPU/vop_dpp.s @@ -115,19 +115,19 @@ // Check modifiers //===----------------------------------------------------------------------===// -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_add_f32_dpp v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x19,0xa1] v_add_f32 v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_add_f32_dpp v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x89,0xa1] v_add_f32 v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_add_f32_dpp v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x99,0xa1] v_add_f32 v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_add_f32_dpp v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x69,0xa1] v_add_f32 v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -136,18 +136,18 @@ //===----------------------------------------------------------------------===// // NOSICI: error: not a valid operand. -// GCN: v_nop row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x7e,0x00,0x01,0x09,0xa1] +// GCN: v_nop row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x00,0x00,0x7e,0x00,0x01,0x09,0xa1] v_nop row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_u32_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x0e,0x00,0x7e,0x00,0x01,0x09,0xa1] v_cvt_u32_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_fract_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x36,0x00,0x7e,0x00,0x01,0x09,0xa1] v_fract_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_sin_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x52,0x00,0x7e,0x00,0x01,0x09,0xa1] v_sin_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -155,95 +155,95 @@ // VI9: v_mov_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x02,0x02,0x7e,0x00,0x01,0x09,0xa1] v_mov_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_f32_i32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x0a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_i32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_f32_u32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x0c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_u32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x10,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_f16_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x14,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f16_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_f32_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x16,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_rpi_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x18,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_rpi_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_flr_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x1a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_flr_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_off_f32_i4_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x1c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_off_f32_i4 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_f32_ubyte0_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x22,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte0 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_f32_ubyte1_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x24,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte1 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_f32_ubyte2_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x26,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte2 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cvt_f32_ubyte3_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x28,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte3 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_trunc_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x38,0x02,0x7e,0x00,0x01,0x09,0xa1] v_trunc_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_ceil_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x3a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ceil_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_rndne_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x3c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rndne_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_floor_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x3e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_floor_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_exp_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x40,0x02,0x7e,0x00,0x01,0x09,0xa1] v_exp_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_log_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x42,0x02,0x7e,0x00,0x01,0x09,0xa1] v_log_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_rcp_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x44,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rcp_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_rcp_iflag_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x46,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rcp_iflag_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_rsq_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x48,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rsq_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_sqrt_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x4e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_sqrt_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_cos_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x54,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cos_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -267,22 +267,22 @@ // VI9: v_ffbh_i32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x5e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ffbh_i32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_frexp_exp_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x66,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_exp_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_frexp_mant_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x68,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_mant_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // VI9: v_log_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x98,0x02,0x7e,0x00,0x01,0x09,0xa1] // NOSI: error: instruction not supported on this GPU -// NOCI: error: not a valid operand. +// NOCI: error: operands are not valid for this GPU or mode v_log_legacy_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // VI9: v_exp_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x96,0x02,0x7e,0x00,0x01,0x09,0xa1] // NOSI: error: instruction not supported on this GPU -// NOCI: error: not a valid operand. +// NOCI: error: operands are not valid for this GPU or mode v_exp_legacy_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: instruction not supported on this GPU @@ -382,23 +382,23 @@ //===----------------------------------------------------------------------===// // ToDo: VOP2bInst instructions: v_add_u32, v_sub_u32 ... (vcc and ApplyMnemonic in AsmMatcherEmitter.cpp) -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_mac_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x01,0x01,0xff] v_mac_f32 v0, v0, v0 row_shl:1 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_mac_f32_dpp v0, v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x1f,0x01,0xff] v_mac_f32 v0, v0, v0 row_shr:0xf -// NOSICI: error: not a valid operand. -// VI9: v_mac_f32_dpp v0, v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:1 ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x4d,0x08,0xaf] +// NOSICI: error: operands are not valid for this GPU or mode +// VI9: v_mac_f32_dpp v0, v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:1 ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x4d,0x08,0xaf] v_mac_f32 v0, v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_add_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x09,0xa1] v_add_f32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_min_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x00,0x00,0x14,0x00,0x01,0x09,0xa1] v_min_f32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -406,19 +406,19 @@ // VI9: v_and_b32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x00,0x00,0x26,0x00,0x01,0x09,0xa1] v_and_b32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_mul_i32_i24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x0c,0x02,0x01,0x09,0xa1] v_mul_i32_i24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_sub_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x04,0x02,0x01,0x09,0xa1] v_sub_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_subrev_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x06,0x02,0x01,0x09,0xa1] v_subrev_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_mul_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x0a,0x02,0x01,0x09,0xa1] v_mul_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -426,7 +426,7 @@ // VI9: v_mul_hi_i32_i24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x0e,0x02,0x01,0x09,0xa1] v_mul_hi_i32_i24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_mul_u32_u24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x10,0x02,0x01,0x09,0xa1] v_mul_u32_u24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -434,7 +434,7 @@ // VI9: v_mul_hi_u32_u24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x12,0x02,0x01,0x09,0xa1] v_mul_hi_u32_u24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // VI9: v_max_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x16,0x02,0x01,0x09,0xa1] v_max_f32 v1, v2 v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -551,46 +551,46 @@ v_ldexp_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: operands are not valid for this GPU or mode // VI: v_add_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] v_add_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: operands are not valid for this GPU or mode // VI: v_sub_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] v_sub_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: not a valid operand. +// NOGFX9: error: operands are not valid for this GPU or mode // VI: v_subrev_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] v_subrev_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // NOGFX9: error: instruction not supported on this GPU // VI: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1] v_addc_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // NOGFX9: error: instruction not supported on this GPU // VI: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1] v_subb_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // NOGFX9: error: instruction not supported on this GPU // VI: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] v_subbrev_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // NOVI: error: instruction not supported on this GPU // GFX9: v_add_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] v_add_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // NOVI: error: instruction not supported on this GPU // GFX9: v_sub_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] v_sub_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand. +// NOSICI: error: operands are not valid for this GPU or mode // NOVI: error: instruction not supported on this GPU // GFX9: v_subrev_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] v_subrev_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -619,7 +619,7 @@ v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 //===----------------------------------------------------------------------===// -// Check that immideates and scalar regs are not supported +// Check that immediates and scalar regs are not supported //===----------------------------------------------------------------------===// // NOSICI: error: not a valid operand @@ -632,7 +632,7 @@ // NOGFX9: error: invalid operand for instruction v_and_b32 v0, 42, v1 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand +// NOSICI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32 v0, v1, 345 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -647,7 +647,7 @@ // NOGFX9: error: invalid operand for instruction v_and_b32 v0, s42, v1 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: not a valid operand +// NOSICI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32 v0, v1, s45 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt @@ -10522,6 +10522,12 @@ # GFX11: v_add3_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x6a,0x04,0x0e,0x04] 0x05,0x00,0x55,0xd6,0x6a,0x04,0x0e,0x04 +# GFX11: v_add3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +0x05,0x00,0x55,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 + +# GFX11: v_add3_u32_e64_dpp v5, v1, v2, s4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x12,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x55,0xd6,0xe9,0x04,0x12,0x00,0x01,0x77,0x39,0x05 + # GFX11: v_add_f64 v[254:255], v[1:2], v[2:3] ; encoding: [0xfe,0x00,0x27,0xd7,0x01,0x05,0x02,0x00] 0xfe,0x00,0x27,0xd7,0x01,0x05,0x02,0x00 @@ -11266,6 +11272,12 @@ # GFX11: v_bfe_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x6a,0x04,0x0e,0x04] 0x05,0x00,0x10,0xd6,0x6a,0x04,0x0e,0x04 +# GFX11: v_bfe_u32_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] +0x00,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa + +# GFX11: v_bfe_u32_e64_dpp v0, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x00] +0x00,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x00 + # GFX11: v_bfi_b32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x12,0xd6,0x01,0x05,0x0e,0x04] 0xff,0x00,0x12,0xd6,0x01,0x05,0x0e,0x04 @@ -11932,6 +11944,12 @@ # GFX11: v_cubesc_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x6a,0x04,0x0e,0x04] 0x05,0x00,0x0d,0xd6,0x6a,0x04,0x0e,0x04 +# GFX11: v_cubesc_f32_e64_dpp v5, v1, v2, 1 row_shr:4 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x06,0x02,0x01,0x14,0x01,0xff] +0x05,0x00,0x0d,0xd6,0xfa,0x04,0x06,0x02,0x01,0x14,0x01,0xff + +# GFX11: v_cubesc_f32_e64_dpp v5, v1, v2, s2 row_shr:4 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0a,0x00,0x01,0x14,0x01,0xff] +0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0a,0x00,0x01,0x14,0x01,0xff + # GFX11: v_cubetc_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x04] 0xff,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x04 @@ -12871,6 +12889,15 @@ # GFX11: v_fma_f32 v5, v1, null, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xf9,0x0c,0x04] 0x05,0x00,0x13,0xd6,0x01,0xf9,0x0c,0x04 +# GFX11: v_fma_f32_e64_dpp v80, v81, v82, v81 dpp8:[0,1,6,3,4,5,6,7] ; encoding: [0x50,0x00,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa] +0x50,0x00,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa + +# GFX11: v_fma_f32_e64_dpp v80, v81, |v82|, v81 dpp8:[0,1,6,3,4,5,6,7] ; encoding: [0x50,0x02,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa] +0x50,0x02,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa + +# GFX11: v_fma_f32_e64_dpp v93, |v94|, v95, v94 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x5d,0x01,0x13,0xd6,0xfa,0xbe,0x7a,0x05,0x5e,0x1b,0x00,0xfe] +0x5d,0x01,0x13,0xd6,0xfa,0xbe,0x7a,0x05,0x5e,0x1b,0x00,0xfe + # GFX11: v_fma_dx9_zero_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x09,0xd6,0x01,0x05,0x0e,0x04] 0xff,0x00,0x09,0xd6,0x01,0x05,0x0e,0x04 @@ -13843,6 +13870,9 @@ # GFX11: v_lshl_or_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x6a,0x04,0x0e,0x04] 0x05,0x00,0x56,0xd6,0x6a,0x04,0x0e,0x04 +# GFX11: v_lshl_or_b32_e64_dpp v255, v5, v0, vcc_hi row_xmask:6 row_mask:0x0 bank_mask:0xf fi:1 ; encoding: [0xff,0x00,0x56,0xd6,0xfa,0x00,0xae,0x01,0x05,0x66,0x05,0x0f] +0xff,0x00,0x56,0xd6,0xfa,0x00,0xae,0x01,0x05,0x66,0x05,0x0f + # GFX11: v_lshlrev_b64 v[254:255], v1, v[2:3] ; encoding: [0xfe,0x00,0x3c,0xd7,0x01,0x05,0x02,0x00] 0xfe,0x00,0x3c,0xd7,0x01,0x05,0x02,0x00 @@ -15032,6 +15062,9 @@ # GFX11: v_max3_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x6a,0x04,0x0e,0x04] 0x05,0x00,0x1c,0xd6,0x6a,0x04,0x0e,0x04 +# GFX11: v_max3_f32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 + # GFX11: v_max3_i16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x04] 0xff,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x04 @@ -15563,6 +15596,12 @@ # GFX11: v_max_f64 v[5:6], vcc, v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x2a,0xd7,0x6a,0x04,0x02,0x00 +# GFX11: v_maxmin_f32_e64_dpp v0, v1, v2, v3 clamp dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x80,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] +0x00,0x80,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa + +# GFX11: v_maxmin_f32_e64_dpp v0, v1, v2, v3 div:2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x00,0x5e,0xd6,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x00,0x5e,0xd6,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa + # GFX11: v_med3_f16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x04] 0xff,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x04 @@ -16970,6 +17009,12 @@ # GFX11: v_min3_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x6a,0x04,0x0e,0x04] 0x05,0x00,0x1b,0xd6,0x6a,0x04,0x0e,0x04 +# GFX11: v_minmax_f32_e64_dpp v0, -v1, -v2, -v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0xe4,0x01,0x88,0xc6,0xfa] +0x00,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0xe4,0x01,0x88,0xc6,0xfa + +# GFX11: v_minmax_f32_e64_dpp v0, |v1|, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x01,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] +0x00,0x01,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa + # GFX11: v_min_f64 v[254:255], v[1:2], v[2:3] ; encoding: [0xfe,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] 0xfe,0x00,0x29,0xd7,0x01,0x05,0x02,0x00 @@ -19256,6 +19301,9 @@ # GFX11: v_sqrt_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x68,0x0a,0x7e] 0x6a,0x68,0x0a,0x7e +# GFX11: v_sub_nc_i32_e64_dpp v93, v94, v95 row_ror:7 row_mask:0xf bank_mask:0x1 bound_ctrl:1 ; encoding: [0x5d,0x00,0x25,0xd7,0xfa,0xbe,0x02,0x00,0x5e,0x27,0x09,0xf1] +0x5d,0x00,0x25,0xd7,0xfa,0xbe,0x02,0x00,0x5e,0x27,0x09,0xf1 + # GFX11: v_trig_preop_f64 v[254:255], v[1:2], v2 ; encoding: [0xfe,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] 0xfe,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00