diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -87,6 +87,17 @@ !listconcat([pred], !filter(item, lst, !ne(item, pred))); } +// Add a Register to the list if does not already exist +class RegAppend lst, Register reg> { + list ret = + !listconcat([reg], !filter(item, lst, !ne(item, reg))); +} +// Get the union of two Register lists +class RegListUnion lstA, list lstB> { + list ret = + !foldl(lstA, lstB, temp, item, RegAppend.ret); +} + class PredicateControl { Predicate SubtargetPredicate = TruePredicate; Predicate AssemblerPredicate = TruePredicate; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1680,7 +1680,7 @@ bool parseExpr(int64_t &Imm, StringRef Expected = ""); bool parseExpr(OperandVector &Operands); StringRef getTokenStr() const; - AsmToken peekToken(); + AsmToken peekToken(bool ShouldSkipSpace = true); AsmToken getToken() const; SMLoc getLoc() const; void lex(); @@ -1738,6 +1738,7 @@ void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); + void cvtVOPD(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx); @@ -1804,6 +1805,7 @@ AMDGPUOperand::Ptr defaultWaitVDST() const; AMDGPUOperand::Ptr defaultWaitEXP() const; + OperandMatchResultTy parseVOPD(OperandVector &Operands); }; struct OptionalOperand { @@ -2909,7 +2911,8 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { // TODO: add syntactic sugar for 1/(2*PI) - assert(!isRegister()); + if (isRegister()) + return MatchOperand_NoMatch; assert(!isModifier()); const auto& Tok = getToken(); @@ -5671,8 +5674,13 @@ OperandMatchResultTy AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, OperandMode Mode) { + OperandMatchResultTy ResTy = parseVOPD(Operands); + if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || + isToken(AsmToken::EndOfStatement)) + return ResTy; + // Try to parse with a custom parser - OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + ResTy = MatchOperandParserImpl(Operands, Mnemonic); // If we successfully parsed the operand or if there as an error parsing, // we are done. @@ -7108,9 +7116,10 @@ return Parser.getTok(); } -AsmToken -AMDGPUAsmParser::peekToken() { - return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); +AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { + return isToken(AsmToken::EndOfStatement) + ? getToken() + : getLexer().peekTok(ShouldSkipSpace); } void @@ -8316,6 +8325,118 @@ cvtVOP3P(Inst, Operands, OptIdx); } +//===----------------------------------------------------------------------===// +// VOPD +//===----------------------------------------------------------------------===// + +OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { + if (!hasVOPD(getSTI())) + return MatchOperand_NoMatch; + + if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { + SMLoc S = getLoc(); + lex(); + lex(); + Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); + const MCExpr *Expr; + if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) { + Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); + return MatchOperand_Success; + } + Error(S, "invalid VOPD :: usage"); + return MatchOperand_ParseFail; + } + return MatchOperand_NoMatch; +} + +// Create VOPD MCInst operands using parsed assembler operands. +// Parsed VOPD operands are ordered as follows: +// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' +// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] +// If both OpX and OpY have an imm, the first imm has a different name: +// OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::' +// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] +// MCInst operands have the following order: +// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] +void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { + auto addOp = [&](uint16_t i) { // NOLINT:function pointer + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + return; + } + if (Op.isImm()) { + Op.addImmOperands(Inst, 1); + return; + } + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + return; + } + llvm_unreachable("Unhandled operand type in cvtVOPD"); + }; + + // Indices into MCInst.Operands + const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ... + const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ... + const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ... + + unsigned Opc = Inst.getOpcode(); + bool HasVsrc1X = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1; + bool HasImmX = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || + (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == + FmamkOpXImmMCIndex || + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == + FmaakOpXImmMCIndex)); + + bool HasVsrc1Y = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1; + bool HasImmY = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >= + MinOpYImmMCIndex + HasVsrc1X; + + // Indices of parsed operands relative to dst + const auto DstIdx = 0; + const auto Src0Idx = 1; + const auto Vsrc1OrImmIdx = 2; + + const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X; + const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo) + + // Offsets into parsed operands + const auto OpXFirstOperandOffset = 1; + const auto OpYFirstOperandOffset = + OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize; + + // Order of addOp calls determines MC operand order + addOp(OpXFirstOperandOffset + DstIdx); // vdstX + addOp(OpYFirstOperandOffset + DstIdx); // vdstY + + addOp(OpXFirstOperandOffset + Src0Idx); // src0X + if (HasImmX) { + // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak + addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); + addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1); + } else { + if (HasVsrc1X) // all except v_mov + addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X + } + + addOp(OpYFirstOperandOffset + Src0Idx); // src0Y + if (HasImmY) { + // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak + addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); + addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1); + } else { + if (HasVsrc1Y) // all except v_mov + addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y + } +} + //===----------------------------------------------------------------------===// // dpp //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -233,6 +233,7 @@ MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral = false) const; MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -288,6 +288,12 @@ Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true)); } +static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, + uint64_t Addr, const void *Decoder) { + const auto *DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val)); +} + static bool IsAGPROperand(const MCInst &Inst, int OpIdx, const MCRegisterInfo *MRI) { if (OpIdx < 0) @@ -448,6 +454,9 @@ convertVOPCDPPInst(MI); break; } + Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address); + if (Res) + break; } // Reinitialize Bytes Bytes = Bytes_.slice(0, MaxInstBytesNum); @@ -970,6 +979,8 @@ assert(HasLiteral && "Should have decoded a literal"); const MCInstrDesc &Desc = MCII->get(MI.getOpcode()); unsigned DescNumOps = Desc.getNumOperands(); + insertNamedMCOperand(MI, MCOperand::createImm(Literal), + AMDGPU::OpName::immDeferred); assert(DescNumOps == MI.getNumOperands()); for (unsigned I = 0; I < DescNumOps; ++I) { auto &Op = MI.getOperand(I); @@ -1212,6 +1223,9 @@ MCOperand AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { if (HasLiteral) { + assert( + AMDGPU::hasVOPD(STI) && + "Should only decode multiple kimm with VOPD, check VSrc operand types"); if (Literal != Val) return errOperand(Val, "More than one unique literal is illegal"); } @@ -1504,6 +1518,20 @@ llvm_unreachable("unknown dst register"); } +// Bit 0 of DstY isn't stored in the instruction, because it's always the +// opposite of bit 0 of DstX. +MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst, + unsigned Val) const { + int VDstXInd = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX); + assert(VDstXInd != -1); + assert(Inst.getOperand(VDstXInd).isReg()); + unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg()); + Val |= ~XDstReg & 1; + auto Width = llvm::AMDGPUDisassembler::OPW32; + return createRegOperand(getVgprClassId(Width), Val); +} + MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { using namespace AMDGPU; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2074,6 +2074,15 @@ !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); } +class getAsmVOPDPart { + string dst = "$vdst" # XorY; + string src0 = ", $src0" # XorY; + string src1 = ", $vsrc1" # XorY; + string ret = dst # + !if(!ge(NumSrcArgs, 1), src0, "") # + !if(!ge(NumSrcArgs, 2), src1, ""); +} + // Returns the assembly string for the inputs and outputs of a VOP3 // instruction. class getAsm64 .ret; + field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X); + // It is a slight misnomer to use the deferred f32 operand type for non-float + // operands, but this operand type will only be used if the other dual + // component is FMAAK or FMAMK + field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X); + field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); + field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y); + field string Asm32 = getAsm32.ret; field string Asm64 = getAsm64.ret; @@ -2536,6 +2553,8 @@ field string AsmVOP3DPP8 = getAsmVOP3DPP8.ret; field string AsmSDWA = getAsmSDWA.ret; field string AsmSDWA9 = getAsmSDWA9.ret; + field string AsmVOPDX = getAsmVOPDPart.ret; + field string AsmVOPDY = getAsmVOPDPart.ret; field string TieRegDPP = "$old"; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -823,6 +823,7 @@ bool isGFX940(const MCSubtargetInfo &STI); bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); bool hasMAIInsts(const MCSubtargetInfo &STI); +bool hasVOPD(const MCSubtargetInfo &STI); int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); /// Is Reg - scalar register diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1783,6 +1783,10 @@ return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts]; } +bool hasVOPD(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureVOPD]; +} + int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR) { if (has90AInsts && ArgNumAGPR) diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -110,13 +110,17 @@ } multiclass VOP1Inst { + SDPatternOperator node = null_frag, int VOPDOp = -1> { // We only want to set this on the basic, non-SDWA or DPP forms. defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"), !eq(opName, "v_mov_b64")); let isMoveImm = should_mov_imm in { - def _e32 : VOP1_Pseudo ; + if !eq(VOPDOp, -1) then + def _e32 : VOP1_Pseudo ; + else + // Only for V_MOV_B32 + def _e32 : VOP1_Pseudo , VOPD_Component; def _e64 : VOP3InstBase ; } @@ -182,8 +186,15 @@ defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; } +def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { + let InsVOPDX = (ins Src0RC32:$src0X); + let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); + let InsVOPDY = (ins Src0RC32:$src0Y); + let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); +} + let isReMaterializable = 1, isAsCheapAsAMove = 1 in { -defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; +defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; let SubtargetPredicate = isGFX940Plus in defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -140,7 +140,16 @@ Commutable_REV; } // End renamedInGFX9 = GFX9Renamed } - +multiclass VOP2Inst_e32_VOPD VOPDOp, + string VOPDName, + SDPatternOperator node = null_frag, + string revOp = opName, + bit GFX9Renamed = 0> { + defm NAME : VOP2Inst_e32, + VOPD_Component; +} multiclass VOP2Inst_e64 VOPDOp, + string VOPDName, + SDPatternOperator node = null_frag, + string revOp = opName, + bit GFX9Renamed = 0> : + VOP2Inst_e32_VOPD, + VOP2Inst_e64, + VOP2Inst_sdwa { + let renamedInGFX9 = GFX9Renamed in { + foreach _ = BoolToList.ret in + def _dpp : VOP2_DPP_Pseudo ; + } +} + multiclass VOP2bInst VOPDOp, + string VOPDName, + SDPatternOperator node = null_frag, + string revOp = opName, + bit useSGPRInput = !eq(P.NumSrcArgs, 3) + > { + + let SchedRW = [Write32Bit] in { + let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { + def _e32 : VOP2_Pseudo , + Commutable_REV, + VOPD_Component; + + foreach _ = BoolToList.ret in + def _sdwa : VOP2_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2e"; + } + + foreach _ = BoolToList.ret in + def _dpp : VOP2_DPP_Pseudo ; + } + + def _e64 : VOP3InstBase , + Commutable_REV { + let isReMaterializable = 1; + } + + let SubtargetPredicate = isGFX11Plus in { + foreach _ = BoolToList.ret in + def _e64_dpp : VOP3_DPP_Pseudo ; + } // End SubtargetPredicate = isGFX11Plus + } +} class VOP2eInstAlias : InstAlias : VOPProfile <[vt, vt, vt, vt]> { +class VOP_MADK_Base : VOPProfile <[vt, vt, vt, vt]> { + string AsmVOPDXDeferred = ?; +} + +class VOP_MADAK : VOP_MADK_Base { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); field dag Ins32 = !if(!eq(vt.Size, 32), (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); + field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm); + // Note that both src0X and imm are deferred + let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred); + field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm); + field string Asm32 = "$vdst, $src0, $src1, $imm"; + field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm"; + let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred"; + field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm"; field bit HasExt = 0; let IsSingle = 1; } @@ -296,10 +368,17 @@ def VOP_MADAK_F16 : VOP_MADAK ; def VOP_MADAK_F32 : VOP_MADAK ; -class VOP_MADMK : VOPProfile <[vt, vt, vt, vt]> { +class VOP_MADMK : VOP_MADK_Base { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1); + field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); + let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); + field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); + field string Asm32 = "$vdst, $src0, $imm, $src1"; + field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X"; + let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X"; + field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y"; field bit HasExt = 0; let IsSingle = 1; } @@ -537,31 +616,31 @@ let SubtargetPredicate = isGFX11Plus in defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>; -defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; +defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; let isCommutable = 1 in { let isReMaterializable = 1 in { -defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>; -defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, any_fsub>; -defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; -defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; -defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>; +defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>; +defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>; +defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">; +defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>; +defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>; defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; -defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; -defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; +defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>; +defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>; defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN, smin>; defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN, smax>; defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN, umin>; defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN, umax>; defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; -defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, clshl_rev_32, "v_lshl_b32">; -defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN, and>; +defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">; +defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN, 0x12, "v_and_b32", and>; defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN, or>; defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN, xor>; } // End isReMaterializable = 1 @@ -593,7 +672,7 @@ let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { -defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>; +defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>; defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; } @@ -783,7 +862,7 @@ DisableEncoding = "$src2", isConvertibleToThreeAddress = 1, isCommutable = 1 in -defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; +defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; } // End SubtargetPredicate = HasDLInsts @@ -811,7 +890,7 @@ isCommutable = 1, IsDOT = 1 in { let SubtargetPredicate = HasDot5Insts in - defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; + defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">; let SubtargetPredicate = HasDot6Insts in defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; @@ -849,10 +928,10 @@ } // End AddedComplexity = 30 let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in { -def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; +def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; let isCommutable = 1 in -def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">; +def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; } let SubtargetPredicate = isGFX10Plus in { diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td @@ -0,0 +1,160 @@ +//===-- VOPDInstructions.td - Vector Instruction Definitions --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Encodings +//===----------------------------------------------------------------------===// + +class VOPDe opX, bits<5> opY> : Enc64 { + bits<9> src0X; + bits<8> vsrc1X; + bits<8> vdstX; + bits<9> src0Y; + bits<8> vsrc1Y; + bits<8> vdstY; + + let Inst{8-0} = src0X; + let Inst{16-9} = vsrc1X; + let Inst{21-17} = opY; + let Inst{25-22} = opX; + let Inst{31-26} = 0x32; // encoding + let Inst{40-32} = src0Y; + let Inst{48-41} = vsrc1Y; + let Inst{55-49} = vdstY{7 - 1}; + let Inst{63-56} = vdstX; +} + +class VOPD_MADKe opX, bits<5> opY> : Enc96 { + bits<9> src0X; + bits<8> vsrc1X; + bits<8> vdstX; + bits<9> src0Y; + bits<8> vsrc1Y; + bits<8> vdstY; + bits<32> imm; + + let Inst{8-0} = src0X; + let Inst{16-9} = vsrc1X; + let Inst{21-17} = opY; + let Inst{25-22} = opX; + let Inst{31-26} = 0x32; // encoding + let Inst{40-32} = src0Y; + let Inst{48-41} = vsrc1Y; + let Inst{55-49} = vdstY{7 - 1}; + let Inst{63-56} = vdstX; + let Inst{95-64} = imm; +} + +//===----------------------------------------------------------------------===// +// VOPD classes +//===----------------------------------------------------------------------===// + +class VOPD_Base + : VOPAnyCommon, + VOP, + SIMCInstr { + // Fields for table indexing + Instruction Opcode = !cast(NAME); + bits<5> OpX = XasVC.VOPDOp; + bits<5> OpY = YasVC.VOPDOp; + + let VALU = 1; + + let DecoderNamespace = "GFX11"; + let AssemblerPredicate = isGFX11Plus; + let WaveSizePredicate = isWave32; + let isCodeGenOnly = 0; + let SubtargetPredicate = isGFX11Plus; + let AsmMatchConverter = "cvtVOPD"; + let Size = 8; + let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg); + let mayRaiseFPException = ReadsModeReg; + + let Uses = RegListUnion.ret; + let Defs = RegListUnion.ret; + let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW); +} + +class VOPD + : VOPD_Base, + VOPDe { + let Inst{16-9} = !if(!eq(VDX.Mnemonic,"v_mov_b32"),0x0,vsrc1X); + let Inst{48-41} = !if(!eq(VDY.Mnemonic,"v_mov_b32"),0x0,vsrc1Y); +} + +class VOPD_MADK + : VOPD_Base, + VOPD_MADKe { + let Inst{16-9} = !if(!eq(VDX.Mnemonic,"v_mov_b32"),0x0,vsrc1X); + let Inst{48-41} = !if(!eq(VDY.Mnemonic,"v_mov_b32"),0x0,vsrc1Y); + let Size = 12; +} + +// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is not. +// Since we generate the DUAL form by converting from the normal form we will +// never generate it. +defvar VOPDYPseudos = [ + "V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32", + "V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32", + "V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32", + "V_DOT2C_F32_F16_e32", "V_ADD_U32_e32", "V_LSHLREV_B32_e32", "V_AND_B32_e32" +]; + +def VOPDDstYOperand : RegisterOperand { + let DecoderMethod = "decodeOperandVOPDDstY"; +} + +// First 13 insts from vopdy are also vopdx, DOT2ACC_F32_BF16 is omitted +defvar VOPDXPseudos = VOPDYPseudos[0...12]; + +foreach x = VOPDXPseudos in { + foreach y = VOPDYPseudos in { + defvar xInst = !cast(x); + defvar yInst = !cast(y); + defvar XasVC = !cast(x); + defvar YasVC = !cast(y); + defvar isMADK = !or(!eq(x,"V_FMAAK_F32"),!eq(x,"V_FMAMK_F32"),!eq(y,"V_FMAAK_F32"),!eq(y,"V_FMAMK_F32")); + // If X or Y is MADK (have a mandatory immediate), all src operands which + // may contain an optional literal must use the VSrc_*_Deferred operand + // type. Optional literal operands in MADK VOPD components always use this + // operand form. If Both X and Y are MADK, the mandatory literal of X + // additionally must use an alternate operand format which defers to the + // 'real' Y literal + defvar isOpXMADK = !or(!eq(x,"V_FMAAK_F32"),!eq(x,"V_FMAMK_F32")); + defvar isOpYMADK = !or(!eq(y,"V_FMAAK_F32"),!eq(y,"V_FMAMK_F32")); + defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2); + defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY); + if !or(isOpXMADK, isOpYMADK) then { + if !and(isOpXMADK, isOpYMADK) then { + defvar X_MADK_Pfl = !cast(xInst.Pfl); + defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY); + defvar asm = XasVC.VOPDName #" "# X_MADK_Pfl.AsmVOPDXDeferred #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY; + def OpName : VOPD_MADK; + } else { + defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY; + if isOpXMADK then { + assert !not(isOpYMADK), "Expected only OpX as MADK"; + defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDYDeferred); + def OpName : VOPD_MADK; + } else { + assert !not(isOpXMADK), "Expected only OpY as MADK"; + defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY); + def OpName : VOPD_MADK; + } + } + } else { + defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDY); + defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY; + def OpName : VOPD; + } + } +} + diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -30,6 +30,13 @@ string OpName = opName; } +class VOPD_Component OpIn, string vOPDName> { + Instruction BaseVOP = !cast(NAME); + string VOPDName = "v_dual_" # !substr(vOPDName,2); + bits<5> VOPDOp = OpIn; + bit CanBeVOPDX = !le(VOPDOp, 13); +} + class VOPAnyCommon pattern> : InstSI { @@ -1417,6 +1424,7 @@ include "VOP2Instructions.td" include "VOP3Instructions.td" include "VOP3PInstructions.td" +include "VOPDInstructions.td" class VOPInfoTable : GenericTable { diff --git a/llvm/test/MC/AMDGPU/gfx11_err.s b/llvm/test/MC/AMDGPU/gfx11_err.s --- a/llvm/test/MC/AMDGPU/gfx11_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_err.s @@ -57,6 +57,10 @@ v_cvt_f16_u16_e64_dpp v5, s1 row_shl:1 row_mask:0xf bank_mask:0xf // GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +; disallow space between colons +v_dual_mul_f32 v0, v0, v2 : : v_dual_mul_f32 v1, v1, v3 +// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: unknown token in expression + // On GFX11, v_dot8_i32_i4 is a valid SP3 alias for v_dot8_i32_iu4. // However, we intentionally leave it unimplemented because on other // processors v_dot8_i32_i4 denotes an instruction of a different diff --git a/llvm/test/MC/AMDGPU/vopd.s b/llvm/test/MC/AMDGPU/vopd.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/vopd.s @@ -0,0 +1,86 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s + +v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 +// GFX11: encoding: [0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5 +// GFX11: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 +// GFX11: encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 +// GFX11: encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_min_f32 v0, v1 , v2 :: v_dual_max_f32 v3, v4, v5 +// GFX11: encoding: [0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_cndmask_b32 v20, v21, v22 :: v_dual_mov_b32 v41, v42 +// GFX11: encoding: [0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_fmac_f32 v0, v1, v2 :: v_dual_fmamk_f32 v3, v6, 0x3f700000, v1 +// GFX11: encoding: [0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, v160, v99 +// GFX11: encoding: [0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, v160, v98 +// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, v6, 2.741, v1 +// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 2.741 +// GFX11: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, v160 +// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0xa0,0x01,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, 2.741 +// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, 2 +// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_subrev_f32 v0, v1 , v2 :: v_dual_add_nc_u32 v3, v4, v5 +// GFX11: encoding: [0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_mul_dx9_zero_f32 v11, 0x24681357, v2 :: v_dual_dot2acc_f32_f16 v10, 0x24681357, v5 +// GFX11: encoding: [0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 +// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_fmamk_f32 v122, 255, 255, v161 :: v_dual_fmamk_f32 v123, 255, 255, v162 +// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v3 +// GFX11: encoding: [0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; +// GFX11: encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error + +;Illegal, but assembler does not check register or literal constraints for VOPD +;v_dual_fmamk_f32 v122, v74, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt @@ -14571,6 +14571,66 @@ # GFX11: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c] 0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c +# W32: v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf] +0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf + +# W32: v_dual_cndmask_b32 v20, v21, v22 :: v_dual_mov_b32 v41, v42 ; encoding: [0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14] +0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14 + +# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_and_b32 v247, v160, v98 ; encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] +0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40 + +# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_fmamk_f32 v3, v6, 0x402f6c8b, v1 ; encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40] +0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40 + +# W32: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 :: v_dual_add_f32 v5, 0xaf123456, v2 ; encoding: [0x03,0x03,0x48,0xc8,0xff,0x04,0x04,0x06,0x56,0x34,0x12,0xaf] +0x03,0x03,0x48,0xc8,0xff,0x04,0x04,0x06,0x56,0x34,0x12,0xaf + +# W32: v_dual_fmac_f32 v0, v1, v2 :: v_dual_fmamk_f32 v3, v6, 0x3f700000, v1 ; encoding: [0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f] +0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f + +# W32: v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 ; encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde] +0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde + +# W32: v_dual_fmamk_f32 v122, 0xff, 0xff, v161 :: v_dual_fmamk_f32 v123, 0xff, 0xff, v162 ; encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00] +0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00 + +# W32: v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, v160, v99 ; encoding: [0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0] +0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0 + +# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_mov_b32 v247, 0x402f6c8b ; encoding: [0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] +0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40 + +# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_mov_b32 v247, 2 ; encoding: [0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] +0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40 + +# W32: v_dual_min_f32 v0, v1, v2 :: v_dual_max_f32 v3, v4, v5 ; encoding: [0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00] +0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00 + +# W32: v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b ; encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40] +0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40 + +# W32: v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v3 ; encoding: [0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# W32: v_dual_mul_dx9_zero_f32 v11, 0x24681357, v2 :: v_dual_dot2acc_f32_f16 v10, 0x24681357, v5 ; encoding: [0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] +0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24 + +# W32: v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5 ; encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00] +0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00 + +# W32: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 ; encoding: [0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00] +0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00 + +# W32: v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 ; encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] +0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24 + +# W32: v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 ; encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] +0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24 + +# W32: v_dual_subrev_f32 v0, v1, v2 :: v_dual_add_nc_u32 v3, v4, v5 ; encoding: [0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00] +0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00 + # GFX11: v_exp_f32_e32 v255, v1 ; encoding: [0x01,0x4b,0xfe,0x7f] 0x01,0x4b,0xfe,0x7f