diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1542,7 +1542,7 @@ bool validateOpSel(const MCInst &Inst); bool validateDPP(const MCInst &Inst, const OperandVector &Operands); bool validateVccOperand(unsigned Reg) const; - bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); + bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); bool validateAGPRLdSt(const MCInst &Inst) const; bool validateVGPRAlign(const MCInst &Inst) const; @@ -1715,6 +1715,7 @@ switch (OperandType) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_AC_INT32: @@ -1723,6 +1724,7 @@ case AMDGPU::OPERAND_REG_IMM_V2FP32: case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: case AMDGPU::OPERAND_REG_IMM_V2INT32: + case AMDGPU::OPERAND_KIMM32: return &APFloat::IEEEsingle(); case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: @@ -1732,6 +1734,7 @@ return &APFloat::IEEEdouble(); case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: @@ -1742,6 +1745,7 @@ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_KIMM16: return &APFloat::IEEEhalf(); default: llvm_unreachable("unsupported fp type"); @@ -2017,12 +2021,14 @@ case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_AC_INT32: case AMDGPU::OPERAND_REG_INLINE_AC_FP32: case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: @@ -2036,7 +2042,9 @@ case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: case AMDGPU::OPERAND_REG_IMM_V2FP32: case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: - case AMDGPU::OPERAND_REG_IMM_V2INT32: { + case AMDGPU::OPERAND_REG_IMM_V2INT32: + case AMDGPU::OPERAND_KIMM32: + case AMDGPU::OPERAND_KIMM16: { bool lost; APFloat FPLiteral(APFloat::IEEEdouble(), Literal); // Convert literal to single precision @@ -2062,6 +2070,7 @@ switch (OpTy) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_AC_INT32: @@ -2101,6 +2110,7 @@ case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: @@ -2128,6 +2138,14 @@ Inst.addOperand(MCOperand::createImm(Val)); return; } + case AMDGPU::OPERAND_KIMM32: + Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); + setImmKindNone(); + return; + case AMDGPU::OPERAND_KIMM16: + Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); + setImmKindNone(); + return; default: llvm_unreachable("invalid operand size"); } @@ -3250,7 +3268,8 @@ SIInstrFlags::SDWA)) { // Check special imm operands (used by madmk, etc) if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { - ++ConstantBusUseCount; + ++NumLiterals; + LiteralSize = 4; } SmallDenseSet SGPRsUsed; @@ -3290,7 +3309,7 @@ // An instruction may use only one literal. // This has been validated on the previous step. - // See validateVOP3Literal. + // See validateVOPLiteral. // This literal may be used as more than one operand. // If all these operands are of the same size, // this literal counts as one scalar value. @@ -3981,26 +4000,29 @@ (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); } -// VOP3 literal is only allowed in GFX10+ and only one can be used -bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, - const OperandVector &Operands) { +// One unique literal can be used. VOP3 literal is only allowed in GFX10+ +bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, + const OperandVector &Operands) { unsigned Opcode = Inst.getOpcode(); const MCInstrDesc &Desc = MII.get(Opcode); - if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) + const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); + if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && + ImmIdx == -1) return true; const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); - const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; unsigned NumExprs = 0; unsigned NumLiterals = 0; uint32_t LiteralValue; for (int OpIdx : OpIndices) { - if (OpIdx == -1) break; + if (OpIdx == -1) + continue; const MCOperand &MO = Inst.getOperand(OpIdx); if (!MO.isImm() && !MO.isExpr()) @@ -4030,7 +4052,7 @@ if (!NumLiterals) return true; - if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { + if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { Error(getLitLoc(Operands), "literal operands are not supported"); return false; } @@ -4202,7 +4224,7 @@ "only one literal operand is allowed"); return false; } - if (!validateVOP3Literal(Inst, Operands)) { + if (!validateVOPLiteral(Inst, Operands)) { return false; } if (!validateConstantBusLimitations(Inst, Operands)) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -87,6 +87,7 @@ DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const; + DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const; DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; @@ -150,9 +151,11 @@ static MCOperand decodeIntImmed(unsigned Imm); static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm); + MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const; MCOperand decodeLiteralConstant() const; - MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, + bool MandatoryLiteral = false) const; MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -26,6 +26,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/AMDHSAKernelDescriptor.h" using namespace llvm; @@ -264,6 +265,34 @@ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm)); } +static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + const auto *DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm)); +} + +static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + const auto *DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm)); +} + +static DecodeStatus decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder) { + const auto *DAsm = static_cast(Decoder); + return addOperand( + Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true)); +} + +static DecodeStatus decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder) { + const auto *DAsm = static_cast(Decoder); + return addOperand( + Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true)); +} + static bool IsAGPROperand(const MCInst &Inst, int OpIdx, const MCRegisterInfo *MRI) { if (OpIdx < 0) @@ -626,6 +655,11 @@ } } + int ImmLitIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm); + if (Res && ImmLitIdx != -1) + Res = convertFMAanyK(MI, ImmLitIdx); + // if the opcode was not recognized we'll assume a Size of 4 bytes // (unless there are fewer bytes left) Size = Res ? (MaxInstBytesNum - Bytes.size()) @@ -810,6 +844,24 @@ return MCDisassembler::Success; } +DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI, + int ImmLitIdx) const { + assert(HasLiteral && "Should have decoded a literal"); + const MCInstrDesc &Desc = MCII->get(MI.getOpcode()); + unsigned DescNumOps = Desc.getNumOperands(); + assert(DescNumOps == MI.getNumOperands()); + for (unsigned I = 0; I < DescNumOps; ++I) { + auto &Op = MI.getOperand(I); + auto OpType = Desc.OpInfo[I].OperandType; + bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED || + OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED); + if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST && + IsDeferredOp) + Op.setImm(Literal); + } + return MCDisassembler::Success; +} + const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { return getContext().getRegisterInfo()-> getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); @@ -1019,6 +1071,18 @@ return decodeDstOp(OPW512, Val); } +// Decode Literals for insts which always have a literal in the encoding +MCOperand +AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { + if (HasLiteral) { + if (Literal != Val) + return errOperand(Val, "More than one unique literal is illegal"); + } + HasLiteral = true; + Literal = Val; + return MCOperand::createImm(Literal); +} + MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants @@ -1232,7 +1296,8 @@ return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; } -MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { +MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, + bool MandatoryLiteral) const { using namespace AMDGPU::EncValues; assert(Val < 1024); // enum10 @@ -1261,8 +1326,13 @@ if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) return decodeFPImmed(Width, Val); - if (Val == LITERAL_CONST) - return decodeLiteralConstant(); + if (Val == LITERAL_CONST) { + if (MandatoryLiteral) + // Keep a sentinel value for deferred setting + return MCOperand::createImm(LITERAL_CONST); + else + return decodeLiteralConstant(); + } switch (Width) { case OPW32: diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -605,6 +605,7 @@ switch (OpTy) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_AC_INT32: @@ -631,6 +632,7 @@ case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_FP16: case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: printImmediate16(Op.getImm(), STI, O); break; case AMDGPU::OPERAND_REG_IMM_V2INT16: diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -233,6 +233,7 @@ switch (OpInfo.OperandType) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_AC_INT32: @@ -255,6 +256,7 @@ case AMDGPU::OPERAND_REG_INLINE_AC_INT16: return getLit16IntEncoding(static_cast(Imm), STI); case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_FP16: // FIXME Is this correct? What do inline immediates do on SI for f16 src @@ -277,6 +279,9 @@ uint32_t Encoding = getLit16Encoding(Lo16, STI); return Encoding; } + case AMDGPU::OPERAND_KIMM32: + case AMDGPU::OPERAND_KIMM16: + return MO.getImm(); default: llvm_unreachable("invalid operand size"); } @@ -341,7 +346,13 @@ (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])) return; - // Check for additional literals in SRC0/1/2 (Op 1/2/3) + // Do not print literals from SISrc Operands for insts with mandatory literals + int ImmLitIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm); + if (ImmLitIdx != -1) + return; + + // Check for additional literals for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) { // Check if this operand should be encoded as [SV]Src diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -139,64 +139,67 @@ } namespace AMDGPU { - enum OperandType : unsigned { - /// Operands with register or 32-bit immediate - OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET, - OPERAND_REG_IMM_INT64, - OPERAND_REG_IMM_INT16, - OPERAND_REG_IMM_FP32, - OPERAND_REG_IMM_FP64, - OPERAND_REG_IMM_FP16, - OPERAND_REG_IMM_V2FP16, - OPERAND_REG_IMM_V2INT16, - OPERAND_REG_IMM_V2INT32, - OPERAND_REG_IMM_V2FP32, - - /// Operands with register or inline constant - OPERAND_REG_INLINE_C_INT16, - OPERAND_REG_INLINE_C_INT32, - OPERAND_REG_INLINE_C_INT64, - OPERAND_REG_INLINE_C_FP16, - OPERAND_REG_INLINE_C_FP32, - OPERAND_REG_INLINE_C_FP64, - OPERAND_REG_INLINE_C_V2INT16, - OPERAND_REG_INLINE_C_V2FP16, - OPERAND_REG_INLINE_C_V2INT32, - OPERAND_REG_INLINE_C_V2FP32, - - /// Operands with an AccVGPR register or inline constant - OPERAND_REG_INLINE_AC_INT16, - OPERAND_REG_INLINE_AC_INT32, - OPERAND_REG_INLINE_AC_FP16, - OPERAND_REG_INLINE_AC_FP32, - OPERAND_REG_INLINE_AC_FP64, - OPERAND_REG_INLINE_AC_V2INT16, - OPERAND_REG_INLINE_AC_V2FP16, - OPERAND_REG_INLINE_AC_V2INT32, - OPERAND_REG_INLINE_AC_V2FP32, - - OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, - OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32, - - OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16, - OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32, - - OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16, - OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32, - - OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32, - OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, - - // Operand for source modifiers for VOP instructions - OPERAND_INPUT_MODS, - - // Operand for SDWA instructions - OPERAND_SDWA_VOPC_DST, - - /// Operand with 32-bit immediate that uses the constant bus. - OPERAND_KIMM32, - OPERAND_KIMM16 - }; +enum OperandType : unsigned { + /// Operands with register or 32-bit immediate + OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET, + OPERAND_REG_IMM_INT64, + OPERAND_REG_IMM_INT16, + OPERAND_REG_IMM_FP32, + OPERAND_REG_IMM_FP64, + OPERAND_REG_IMM_FP16, + OPERAND_REG_IMM_FP16_DEFERRED, + OPERAND_REG_IMM_FP32_DEFERRED, + OPERAND_REG_IMM_V2FP16, + OPERAND_REG_IMM_V2INT16, + OPERAND_REG_IMM_V2INT32, + OPERAND_REG_IMM_V2FP32, + + /// Operands with register or inline constant + OPERAND_REG_INLINE_C_INT16, + OPERAND_REG_INLINE_C_INT32, + OPERAND_REG_INLINE_C_INT64, + OPERAND_REG_INLINE_C_FP16, + OPERAND_REG_INLINE_C_FP32, + OPERAND_REG_INLINE_C_FP64, + OPERAND_REG_INLINE_C_V2INT16, + OPERAND_REG_INLINE_C_V2FP16, + OPERAND_REG_INLINE_C_V2INT32, + OPERAND_REG_INLINE_C_V2FP32, + + /// Operand with 32-bit immediate that uses the constant bus. + OPERAND_KIMM32, + OPERAND_KIMM16, + + /// Operands with an AccVGPR register or inline constant + OPERAND_REG_INLINE_AC_INT16, + OPERAND_REG_INLINE_AC_INT32, + OPERAND_REG_INLINE_AC_FP16, + OPERAND_REG_INLINE_AC_FP32, + OPERAND_REG_INLINE_AC_FP64, + OPERAND_REG_INLINE_AC_V2INT16, + OPERAND_REG_INLINE_AC_V2FP16, + OPERAND_REG_INLINE_AC_V2INT32, + OPERAND_REG_INLINE_AC_V2FP32, + + OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, + OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32, + + OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16, + OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32, + + OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16, + OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32, + + OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32, + OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, + + // Operand for source modifiers for VOP instructions + OPERAND_INPUT_MODS, + + // Operand for SDWA instructions + OPERAND_SDWA_VOPC_DST + +}; } // Input operand modifiers bit-masks diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3405,6 +3405,7 @@ switch (OperandType) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_IMM_V2FP32: @@ -3443,6 +3444,7 @@ // This suffers the same problem as the scalar 16-bit cases. return AMDGPU::isInlinableIntLiteralV216(Imm); case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_FP16: { if (isInt<16>(Imm) || isUInt<16>(Imm)) { @@ -3836,6 +3838,7 @@ break; case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: break; case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1173,6 +1173,7 @@ let OperandType = "OPERAND_KIMM"#vt.Size; let PrintMethod = "printU"#vt.Size#"ImmOperand"; let ParserMatchClass = !cast("KImmFP"#vt.Size#"MatchClass"); + let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm"; } // 32-bit VALU immediate operand that uses the constant bus. diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1019,6 +1019,30 @@ let DecoderMethod = "DecodeVS_128RegisterClass"; } +//===----------------------------------------------------------------------===// +// VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use +// with FMAMK/FMAAK +//===----------------------------------------------------------------------===// + +multiclass SIRegOperand32_Deferred { + let OperandNamespace = "AMDGPU" in { + def _f16_Deferred : RegisterOperand(rc#rc_suffix)> { + let OperandType = opType#"_FP16_DEFERRED"; + let ParserMatchClass = RegImmMatcher; + let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred"; + } + + def _f32_Deferred : RegisterOperand(rc#rc_suffix)> { + let OperandType = opType#"_FP32_DEFERRED"; + let ParserMatchClass = RegImmMatcher; + let DecoderMethod = "decodeOperand_" # rc # "_32_Deferred"; + } + } +} + +defm VSrc : SIRegOperand32_Deferred<"VS", "VSrc", "OPERAND_REG_IMM">; + //===----------------------------------------------------------------------===// // VRegSrc_* Operands with a VGPR //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -789,6 +789,7 @@ switch (OpInfo.OperandType) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_AC_INT32: @@ -797,6 +798,8 @@ case AMDGPU::OPERAND_REG_IMM_V2FP32: case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: + case AMDGPU::OPERAND_KIMM32: + case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 return 4; case AMDGPU::OPERAND_REG_IMM_INT64: @@ -808,6 +811,7 @@ case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1574,8 +1574,10 @@ unsigned OpType = Desc.OpInfo[OpNo].OperandType; switch (OpType) { case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: case AMDGPU::OPERAND_REG_IMM_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP32: diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -270,12 +270,11 @@ class VOP_MADAK : VOPProfile <[vt, vt, vt, vt]> { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); field dag Ins32 = !if(!eq(vt.Size, 32), - (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm), - (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm)); + (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), + (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); + field string Asm32 = "$vdst, $src0, $src1, $imm"; field bit HasExt = 0; let IsSingle = 1; - - field string Asm32 = "$vdst, $src0, $src1, $imm"; } def VOP_MADAK_F16 : VOP_MADAK ; @@ -283,11 +282,10 @@ class VOP_MADMK : VOPProfile <[vt, vt, vt, vt]> { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); - field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1); + field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1); + field string Asm32 = "$vdst, $src0, $imm, $src1"; field bit HasExt = 0; let IsSingle = 1; - - field string Asm32 = "$vdst, $src0, $imm, $src1"; } def VOP_MADMK_F16 : VOP_MADMK ; diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_err.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s @@ -274,6 +274,26 @@ // GFX6-7: error: dpp variant of this instruction is not supported // GFX8-9: error: not a valid operand +//===----------------------------------------------------------------------===// +// VOP2 +//===----------------------------------------------------------------------===// + +v_fmaak_f32 v0, 0xff32ff, v0, 0x11213141 +// GFX6-9: error: instruction not supported on this GPU +// GFX10: error: only one literal operand is allowed + +v_fmamk_f32 v0, 0xff32ff, 0x11213141, v0 +// GFX6-9: error: instruction not supported on this GPU +// GFX10: error: only one literal operand is allowed + +v_fmaak_f32 v0, 0xff32, v0, 0x1122 +// GFX6-9: error: instruction not supported on this GPU +// GFX10: error: only one literal operand is allowed + +v_fmamk_f32 v0, 0xff32, 0x1122, v0 +// GFX6-9: error: instruction not supported on this GPU +// GFX10: error: only one literal operand is allowed + //===----------------------------------------------------------------------===// // VOP2 E64. //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s @@ -10229,9 +10229,15 @@ v_fmamk_f32 v5, v1, 0x11213141, v255 // GFX10: encoding: [0x01,0xff,0x0b,0x58,0x41,0x31,0x21,0x11] +v_fmamk_f32 v5, 0x11213141, 0x11213141, v255 +// GFX10: encoding: [0xff,0xfe,0x0b,0x58,0x41,0x31,0x21,0x11] + v_fmaak_f32 v5, v1, v2, 0x11213141 // GFX10: encoding: [0x01,0x05,0x0a,0x5a,0x41,0x31,0x21,0x11] +v_fmaak_f32 v5, 0x11213141, v2, 0x11213141 +// GFX10: encoding: [0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11] + v_fmaak_f32 v255, v1, v2, 0x11213141 // GFX10: encoding: [0x01,0x05,0xfe,0x5b,0x41,0x31,0x21,0x11] @@ -11969,6 +11975,9 @@ v_fmamk_f16 v255, v1, 0x1121, v3 // GFX10: encoding: [0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00] +v_fmamk_f16 v255, 0x1121, 0x1121, v3 +// GFX10: encoding: [0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00] + v_fmamk_f16 v5, v255, 0x1121, v3 // GFX10: encoding: [0xff,0x07,0x0a,0x6e,0x21,0x11,0x00,0x00] @@ -12014,6 +12023,9 @@ v_fmaak_f16 v5, v1, v255, 0x1121 // GFX10: encoding: [0x01,0xff,0x0b,0x70,0x21,0x11,0x00,0x00] +v_fmaak_f16 v5, 0x1121, v255, 0x1121 +// GFX10: encoding: [0xff,0xfe,0x0b,0x70,0x21,0x11,0x00,0x00] + v_fmaak_f16 v5, v1, v2, 0xa1b1 // GFX10: encoding: [0x01,0x05,0x0a,0x70,0xb1,0xa1,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s --- a/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s @@ -2337,6 +2337,9 @@ v_madmk_f16 v5, v1, 0x1121, v255 // CHECK: [0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00] +v_madmk_f16 v5, 0x1121, 0x1121, v255 +// CHECK: [0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00] + v_madak_f16 v5, v1, v2, 0x1121 // CHECK: [0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00] @@ -2367,6 +2370,9 @@ v_madak_f16 v5, v1, v2, 0xa1b1 // CHECK: [0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00] +v_madak_f16 v5, 0x1121, v2, 0x1121 +// CHECK: [0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00] + v_add_u16 v5, v1, v2 // CHECK: [0x01,0x05,0x0a,0x4c] diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -843,6 +843,20 @@ // NOGCN: error: invalid operand (violates constant bus restrictions) v_madak_f32 v0, scc, v0, 0x11213141 +// NOGCN: error: only one literal operand is allowed +v_madak_f32 v0, 0xff32ff, v0, 0x11213141 + +// NOGCN: error: only one literal operand is allowed +v_madmk_f32 v0, 0xff32ff, 0x11213141, v0 + +// NOSICI: error: instruction not supported on this GPU +// NOGFX89: error: only one literal operand is allowed +v_madak_f16 v0, 0xff32, v0, 0x1122 + +// NOSICI: error: instruction not supported on this GPU +// NOGFX89: error: only one literal operand is allowed +v_madmk_f16 v0, 0xff32, 0x1122, v0 + // NOSICIVI: error: register not available on this GPU // NOGFX9: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], private_base, private_limit diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -270,6 +270,14 @@ // VI: v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42] v_madak_f32 v1, v2, v3, 64.0 +// SICI: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x42,0x41,0x31,0x21,0x11] +// VI: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11] +v_madak_f32 v0, 0x11213141, v0, 0x11213141 + +// SICI: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x40,0x41,0x31,0x21,0x11] +// VI: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11] +v_madmk_f32 v0, 0x11213141, 0x11213141, v0 + // SICI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00] // VI: v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00] v_bcnt_u32_b32_e64 v1, v2, v3 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -73781,6 +73781,9 @@ # GFX10: v_fmaak_f16 v5, -1, v2, 0x1121 ; encoding: [0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00] 0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00 +# GFX10: v_fmaak_f16 v5, 0x1121, v2, 0x1121 ; encoding: [0xff,0x04,0x0a,0x70,0x21,0x11,0x00,0x00] +0xff,0x04,0x0a,0x70,0x21,0x11,0x00,0x00 + # GFX10: v_fmaak_f32 v5, -1, v2, 0x11213141 ; encoding: [0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11] 0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11 @@ -73796,6 +73799,9 @@ # GFX10: v_fmaak_f32 v5, 0, v2, 0x11213141 ; encoding: [0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11] 0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11 +# GFX10: v_fmaak_f32 v5, 0x11213141, v2, 0x11213141 ; encoding: [0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11] +0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11 + # GFX10: v_fmaak_f16 v5, 0.5, v2, 0x1121 ; encoding: [0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00] 0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00 @@ -74150,6 +74156,9 @@ # GFX10: v_fmamk_f16 v255, v1, 0x1121, v3 ; encoding: [0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00] 0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00 +# GFX10: v_fmamk_f16 v255, 0x1121, 0x1121, v3 ; encoding: [0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00] +0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00 + # GFX10: v_fmamk_f32 v255, v1, 0x11213141, v3 ; encoding: [0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11] 0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11 @@ -74159,6 +74168,9 @@ # GFX10: v_fmamk_f32 v5, -1, 0x11213141, v3 ; encoding: [0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11] 0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11 +# GFX10: v_fmamk_f32 v5, 0x11213141, 0x11213141, v3 ; encoding: [0xff,0x06,0x0a,0x58,0x41,0x31,0x21,0x11] +0xff,0x06,0x0a,0x58,0x41,0x31,0x21,0x11 + # GFX10: v_fmamk_f16 v5, -4.0, 0x1121, v3 ; encoding: [0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00] 0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt @@ -32160,6 +32160,9 @@ # CHECK: v_madmk_f32 v5, v1, 0x11213141, v255 ; encoding: [0x01,0xff,0x0b,0x2e,0x41,0x31,0x21,0x11] 0x01,0xff,0x0b,0x2e,0x41,0x31,0x21,0x11 +# CHECK: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11] +0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11 + # CHECK: v_madak_f32 v5, v1, v2, 0x11213141 ; encoding: [0x01,0x05,0x0a,0x30,0x41,0x31,0x21,0x11] 0x01,0x05,0x0a,0x30,0x41,0x31,0x21,0x11 @@ -32187,6 +32190,9 @@ # CHECK: v_madak_f32 v5, v1, v2, 0xa1b1c1d1 ; encoding: [0x01,0x05,0x0a,0x30,0xd1,0xc1,0xb1,0xa1] 0x01,0x05,0x0a,0x30,0xd1,0xc1,0xb1,0xa1 +# CHECK: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11] +0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11 + # CHECK: v_add_co_u32_e32 v5, vcc, v1, v2 ; encoding: [0x01,0x05,0x0a,0x32] 0x01,0x05,0x0a,0x32 @@ -33783,6 +33789,9 @@ # CHECK: v_madmk_f16 v5, v1, 0x1121, v255 ; encoding: [0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00] 0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00 +# CHECK: v_madmk_f16 v5, 0x1121, 0x1121, v255 ; encoding: [0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00] +0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00 + # CHECK: v_madak_f16 v5, v1, v2, 0x1121 ; encoding: [0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00] 0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00 @@ -33810,6 +33819,9 @@ # CHECK: v_madak_f16 v5, v1, v2, 0xa1b1 ; encoding: [0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00] 0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00 +# CHECK: v_madak_f16 v5, 0x1121, v2, 0x1121 ; encoding: [0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00] +0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00 + # CHECK: v_add_u16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4c] 0x01,0x05,0x0a,0x4c