diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1664,6 +1664,8 @@ bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); bool validateSOPLiteral(const MCInst &Inst) const; bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); + bool validateVOPDRegBankConstraints(const MCInst &Inst, + const OperandVector &Operands); bool validateIntClampSupported(const MCInst &Inst); bool validateMIMGAtomicDMask(const MCInst &Inst); bool validateMIMGGatherDMask(const MCInst &Inst); @@ -3575,6 +3577,44 @@ return false; } +bool AMDGPUAsmParser::validateVOPDRegBankConstraints( + const MCInst &Inst, const OperandVector &Operands) { + + const unsigned Opcode = Inst.getOpcode(); + if (!isVOPD(Opcode)) + return true; + + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + + auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { + const MCOperand &Opr = Inst.getOperand(OperandIdx); + return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) + ? Opr.getReg() + : MCRegister::NoRegister; + }; + + auto InstInfo = getVOPDInstInfo(Opcode, &MII); + auto InvalidOperandInfo = InstInfo.getInvalidOperandIndex(getVRegIdx); + if (!InvalidOperandInfo) + return true; + + auto OprIdx = *InvalidOperandInfo; + auto ParsedIdx = std::max(InstInfo[VOPD::X].getParsedOperandIndex(OprIdx), + InstInfo[VOPD::Y].getParsedOperandIndex(OprIdx)); + assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); + + auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); + if (OprIdx == VOPD::Component::DST) { + Error(Loc, "one dst register must be even and the other odd"); + } else { + auto SrcIdx = OprIdx - VOPD::Component::DST_NUM; + Error(Loc, Twine("src") + Twine(SrcIdx) + + " operands must use different VGPR banks"); + } + + return false; +} + bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { const unsigned Opc = Inst.getOpcode(); @@ -4626,6 +4666,9 @@ if (!validateConstantBusLimitations(Inst, Operands)) { return false; } + if (!validateVOPDRegBankConstraints(Inst, Operands)) { + return false; + } if (!validateIntClampSupported(Inst)) { Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), "integer clamping is not supported on this GPU"); @@ -8479,14 +8522,6 @@ } // Create VOPD MCInst operands using parsed assembler operands. -// Parsed VOPD operands are ordered as follows: -// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' -// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] -// If both OpX and OpY have an imm, the first imm has a different name: -// OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::' -// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] -// MCInst operands have the following order: -// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { auto addOp = [&](uint16_t i) { // NOLINT:function pointer AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); @@ -8498,71 +8533,23 @@ Op.addImmOperands(Inst, 1); return; } - // Handle tokens like 'offen' which are sometimes hard-coded into the - // asm string. There are no MCInst operands for these. - if (Op.isToken()) { - return; - } llvm_unreachable("Unhandled operand type in cvtVOPD"); }; - // Indices into MCInst.Operands - const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ... - const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ... - const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ... + auto InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); - unsigned Opc = Inst.getOpcode(); - bool HasVsrc1X = - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1; - bool HasImmX = - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || - (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == - FmamkOpXImmMCIndex || - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == - FmaakOpXImmMCIndex)); - - bool HasVsrc1Y = - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1; - bool HasImmY = - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >= - MinOpYImmMCIndex + HasVsrc1X; - - // Indices of parsed operands relative to dst - const auto DstIdx = 0; - const auto Src0Idx = 1; - const auto Vsrc1OrImmIdx = 2; - - const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X; - const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo) - - // Offsets into parsed operands - const auto OpXFirstOperandOffset = 1; - const auto OpYFirstOperandOffset = - OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize; - - // Order of addOp calls determines MC operand order - addOp(OpXFirstOperandOffset + DstIdx); // vdstX - addOp(OpYFirstOperandOffset + DstIdx); // vdstY - - addOp(OpXFirstOperandOffset + Src0Idx); // src0X - if (HasImmX) { - // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak - addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); - addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1); - } else { - if (HasVsrc1X) // all except v_mov - addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X + // MCInst operands are ordered as follows: + // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] + + for (auto CompIdx : VOPD::COMPONENTS) { + addOp(InstInfo[CompIdx].getParsedDstIndex()); } - addOp(OpYFirstOperandOffset + Src0Idx); // src0Y - if (HasImmY) { - // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak - addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); - addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1); - } else { - if (HasVsrc1Y) // all except v_mov - addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y + for (auto CompIdx : VOPD::COMPONENTS) { + auto SrcOperandsNum = InstInfo[CompIdx].getSrcOperandsNum(); + for (unsigned SrcIdx = 0; SrcIdx < SrcOperandsNum; ++SrcIdx) { + addOp(InstInfo[CompIdx].getParsedSrcIndex(SrcIdx)); + } } } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -3040,6 +3040,11 @@ let PrimaryKeyName = "getVOPDComponentHelper"; } +def getVOPDBaseFromComponent : SearchIndex { + let Table = VOPDComponentTable; + let Key = ["VOPDOp"]; +} + def VOPDPairs : GenericTable { let FilterClass = "VOPD_Base"; let CppTypeName = "VOPDInfo"; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -10,8 +10,12 @@ #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H #include "SIDefines.h" +#include "llvm/ADT/Optional.h" #include "llvm/IR/CallingConv.h" #include "llvm/Support/Alignment.h" +#include +#include +#include struct amd_kernel_code_t; @@ -22,6 +26,7 @@ class Function; class GCNSubtarget; class GlobalValue; +class MCInstrInfo; class MCRegisterClass; class MCRegisterInfo; class MCSubtargetInfo; @@ -499,6 +504,180 @@ LLVM_READONLY bool isVOPD(unsigned Opc); +namespace VOPD { + +enum Component : unsigned { + DST = 0, + SRC0, + SRC1, + SRC2, + + DST_NUM = 1, + MAX_SRC_NUM = 3, + MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM +}; + +// Number of VGPR banks per VOPD component operand. +constexpr unsigned BANKS_NUM[] = {2, 4, 4, 2}; + +enum ComponentIndex : unsigned { X = 0, Y = 1 }; +constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y}; +constexpr unsigned COMPONENTS_NUM = 2; + +enum ComponentKind : unsigned { + SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD. + COMPONENT_X, // A VOPD instruction, X component. + COMPONENT_Y, // A VOPD instruction, Y component. + MAX = COMPONENT_Y +}; + +// Location of operands in a MachineInstr/MCInst +// and position of operands in parsed operands array. +class ComponentLayout { +private: + // Regular MachineInstr/MCInst operands are ordered as follows: + // dst, src0 [, other src operands] + // VOPD MachineInstr/MCInst operands are ordered as follows: + // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] + // Each ComponentKind has operand indices defined below. + static constexpr unsigned MC_DST_IDX[] = {0, 0, 1}; + static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpXSrcNum */}; + + // Parsed operands of regular instructions are ordered as follows: + // Mnemo dst src0 [vsrc1 ...] + // Parsed VOPD operands are ordered as follows: + // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' + // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] + // Each ComponentKind has operand indices defined below. + static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 4 /* + OpXSrcNum */}; + static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {2, 2, + 5 /* + OpXSrcNum */}; + +private: + ComponentKind Kind; + unsigned OpXSrcNum; + +public: + ComponentLayout(ComponentKind Kind_ = ComponentKind::SINGLE, + unsigned OpXSrcNum_ = 0) + : Kind(Kind_), OpXSrcNum(OpXSrcNum_) { + assert(Kind <= ComponentKind::MAX); + assert((Kind == ComponentKind::COMPONENT_Y) == (OpXSrcNum > 0)); + } + +public: + unsigned getDstIndex() const { return MC_DST_IDX[Kind]; } + unsigned getSrcIndex(unsigned SrcIdx) const { + assert(SrcIdx < Component::MAX_SRC_NUM); + return FIRST_MC_SRC_IDX[Kind] + OpXSrcNum + SrcIdx; + } + + unsigned getParsedDstIndex() const { + return PARSED_DST_IDX[Kind] + OpXSrcNum; + } + unsigned getParsedSrcIndex(unsigned SrcIdx) const { + assert(SrcIdx < Component::MAX_SRC_NUM); + return FIRST_PARSED_SRC_IDX[Kind] + OpXSrcNum + SrcIdx; + } +}; + +// Properties of VOPD components. +class ComponentProps { +private: + unsigned SrcOperandsNum; + Optional MandatoryLiteralIdx; + bool HasSrc2Acc; + +public: + ComponentProps(const MCInstrDesc &OpDesc); + + unsigned getSrcOperandsNum() const { return SrcOperandsNum; } + bool hasMandatoryLiteral() const { return MandatoryLiteralIdx.has_value(); } + unsigned getMandatoryLiteralIndex() const { + assert(hasMandatoryLiteral()); + return *MandatoryLiteralIdx; + } + bool hasRegularSrcOperand(unsigned SrcIdx) const { + assert(SrcIdx < Component::MAX_SRC_NUM); + return SrcOperandsNum > SrcIdx && !hasMandatoryLiteralAt(SrcIdx); + } + bool hasSrc2Acc() const { return HasSrc2Acc; } + +private: + bool hasMandatoryLiteralAt(unsigned SrcIdx) const { + assert(SrcIdx < Component::MAX_SRC_NUM); + return hasMandatoryLiteral() && + *MandatoryLiteralIdx == Component::DST_NUM + SrcIdx; + } +}; + +// Layout and properties of VOPD components. +class ComponentInfo : public ComponentLayout, public ComponentProps { +public: + ComponentInfo(const MCInstrDesc &OpDesc, + ComponentKind Kind = ComponentKind::SINGLE, + unsigned OpXSrcNum = 0) + : ComponentLayout(Kind, OpXSrcNum), ComponentProps(OpDesc) {} + + // Map MC operand index to parsed operand index. + // Return 0 if the specified operand does not exist. + unsigned getParsedOperandIndex(unsigned OprIdx) const; +}; + +// Properties of VOPD instructions. +class InstInfo { +private: + const ComponentInfo CompInfo[COMPONENTS_NUM]; + +public: + using RegIndices = std::array; + + InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) + : CompInfo{OpX, OpY} {} + + InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY) + : CompInfo{OprInfoX, OprInfoY} {} + + const ComponentInfo &operator[](size_t ComponentIdx) const { + assert(ComponentIdx < COMPONENTS_NUM); + return CompInfo[ComponentIdx]; + } + + // Check VOPD operands constraints. + // GetRegIdx(Component, OperandIdx) must return a VGPR register index + // for the specified component and operand. The callback must return 0 + // if the operand is not a register or not a VGPR. + bool hasInvalidOperand( + std::function GetRegIdx) const { + return getInvalidOperandIndex(GetRegIdx).has_value(); + } + + // Check VOPD operands constraints. + // Return the index of an invalid component operand, if any. + Optional getInvalidOperandIndex( + std::function GetRegIdx) const; + +private: + RegIndices + getRegIndices(unsigned ComponentIdx, + std::function GetRegIdx) const; +}; + +} // namespace VOPD + +LLVM_READONLY +std::pair getVOPDComponents(unsigned VOPDOpcode); + +LLVM_READONLY +// Get properties of 2 single VOP1/VOP2 instructions +// used as components to create a VOPD instruction. +VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY); + +LLVM_READONLY +// Get properties of VOPD X and Y components. +VOPD::InstInfo +getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo); + LLVM_READONLY bool isTrue16Inst(unsigned Opc); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -460,6 +460,108 @@ return Info ? Info->Opcode : -1; } +std::pair getVOPDComponents(unsigned VOPDOpcode) { + const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode); + assert(Info); + auto OpX = getVOPDBaseFromComponent(Info->OpX); + auto OpY = getVOPDBaseFromComponent(Info->OpY); + assert(OpX && OpY); + return {OpX->BaseVOP, OpY->BaseVOP}; +} + +namespace VOPD { + +ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { + assert(OpDesc.getNumDefs() == Component::DST_NUM); + + assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); + assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); + auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO); + assert(TiedIdx == -1 || TiedIdx == Component::DST); + HasSrc2Acc = TiedIdx != -1; + + SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs() - HasSrc2Acc; + assert(SrcOperandsNum <= Component::MAX_SRC_NUM); + + auto OperandsNum = OpDesc.getNumOperands() - HasSrc2Acc; + for (unsigned OprIdx = Component::SRC1; OprIdx < OperandsNum; ++OprIdx) { + if (OpDesc.OpInfo[OprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { + MandatoryLiteralIdx = OprIdx; + break; + } + } +} + +unsigned ComponentInfo::getParsedOperandIndex(unsigned OprIdx) const { + assert(OprIdx < Component::MAX_OPR_NUM); + + if (OprIdx == Component::DST) + return getParsedDstIndex(); + + auto SrcIdx = OprIdx - Component::DST_NUM; + if (SrcIdx < getSrcOperandsNum()) + return getParsedSrcIndex(SrcIdx); + + // The specified operand does not exist. + return 0; +} + +Optional InstInfo::getInvalidOperandIndex( + std::function GetRegIdx) const { + + auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx); + auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx); + + for (unsigned OprIdx = 0; OprIdx < Component::MAX_OPR_NUM; ++OprIdx) { + unsigned BanksNum = BANKS_NUM[OprIdx]; + if (OpXRegs[OprIdx] && OpYRegs[OprIdx] && + (OpXRegs[OprIdx] % BanksNum == OpYRegs[OprIdx] % BanksNum)) + return OprIdx; + } + + return {}; +} + +InstInfo::RegIndices InstInfo::getRegIndices( + unsigned ComponentIdx, + std::function GetRegIdx) const { + assert(ComponentIdx < COMPONENTS_NUM); + + auto Comp = CompInfo[ComponentIdx]; + + unsigned DstReg = GetRegIdx(ComponentIdx, Comp.getDstIndex()); + unsigned Src0Reg = GetRegIdx(ComponentIdx, Comp.getSrcIndex(0)); + + unsigned Src1Reg = 0; + if (Comp.hasRegularSrcOperand(1)) + Src1Reg = GetRegIdx(ComponentIdx, Comp.getSrcIndex(1)); + + unsigned Src2Reg = 0; + if (Comp.hasRegularSrcOperand(2)) + Src2Reg = GetRegIdx(ComponentIdx, Comp.getSrcIndex(2)); + else if (Comp.hasSrc2Acc()) + Src2Reg = DstReg; + + return {DstReg, Src0Reg, Src1Reg, Src2Reg}; +} + +} // namespace VOPD + +VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { + return VOPD::InstInfo(OpX, OpY); +} + +VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, + const MCInstrInfo *InstrInfo) { + auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); + const auto &OpXDesc = InstrInfo->get(OpX); + const auto &OpYDesc = InstrInfo->get(OpY); + VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); + VOPD::ComponentInfo OpYInfo(OpYDesc, VOPD::ComponentKind::COMPONENT_Y, + OpXInfo.getSrcOperandsNum()); + return VOPD::InstInfo(OpXInfo, OpYInfo); +} + namespace IsaInfo { AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s @@ -74,7 +74,7 @@ // GFX11-NEXT:{{^}} ^ //===----------------------------------------------------------------------===// -// A VOPD instruction cannot use more than 2 scalar operands +// A VOPD instruction cannot use more than 2 scalar operands. //===----------------------------------------------------------------------===// // 2 different SGPRs + LITERAL @@ -141,3 +141,129 @@ // GFX11: error: invalid operand (violates constant bus restrictions) // GFX11-NEXT:{{^}}v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3 // GFX11-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// One dst register must be even and the other odd. +//===----------------------------------------------------------------------===// + +v_dual_add_f32 v0, v4, v2 :: v_dual_add_f32 v2, v1, v3 +// GFX11: error: one dst register must be even and the other odd +// GFX11-NEXT:{{^}}v_dual_add_f32 v0, v4, v2 :: v_dual_add_f32 v2, v1, v3 +// GFX11-NEXT:{{^}} ^ + +v_dual_mov_b32 v1, v4 :: v_dual_add_f32 v5, v1, v3 +// GFX11: error: one dst register must be even and the other odd +// GFX11-NEXT:{{^}}v_dual_mov_b32 v1, v4 :: v_dual_add_f32 v5, v1, v3 +// GFX11-NEXT:{{^}} ^ + +v_dual_cndmask_b32 v2, v4, v5 :: v_dual_add_f32 v8, v5, v6 +// GFX11: error: one dst register must be even and the other odd +// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v2, v4, v5 :: v_dual_add_f32 v8, v5, v6 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmac_f32 v3, v4, v5 :: v_dual_add_f32 v9, v5, v6 +// GFX11: error: one dst register must be even and the other odd +// GFX11-NEXT:{{^}}v_dual_fmac_f32 v3, v4, v5 :: v_dual_add_f32 v9, v5, v6 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmaak_f32 v4, v4, v5, 0xaf123456 :: v_dual_add_f32 v0, v5, v6 +// GFX11: error: one dst register must be even and the other odd +// GFX11-NEXT:{{^}}v_dual_fmaak_f32 v4, v4, v5, 0xaf123456 :: v_dual_add_f32 v0, v5, v6 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmamk_f32 v5, v4, 0xaf123456, v6 :: v_dual_add_f32 v1, v5, v6 +// GFX11: error: one dst register must be even and the other odd +// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v5, v4, 0xaf123456, v6 :: v_dual_add_f32 v1, v5, v6 +// GFX11-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// Src0 operands must use different VGPR banks. +//===----------------------------------------------------------------------===// + +v_dual_add_f32 v1, v1, v5 :: v_dual_mov_b32 v2, v1 +// GFX11: error: src0 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_add_f32 v1, v1, v5 :: v_dual_mov_b32 v2, v1 +// GFX11-NEXT:{{^}} ^ + +v_dual_mov_b32 v1, v2 :: v_dual_add_f32 v2, v6, v6 +// GFX11: error: src0 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_mov_b32 v1, v2 :: v_dual_add_f32 v2, v6, v6 +// GFX11-NEXT:{{^}} ^ + +v_dual_cndmask_b32 v1, v3, v5 :: v_dual_add_f32 v2, v11, v6 +// GFX11: error: src0 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v1, v3, v5 :: v_dual_add_f32 v2, v11, v6 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmac_f32 v1, v4, v5 :: v_dual_add_f32 v2, v44, v6 +// GFX11: error: src0 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_fmac_f32 v1, v4, v5 :: v_dual_add_f32 v2, v44, v6 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmaak_f32 v1, v5, v5, 0xaf123456 :: v_dual_add_f32 v2, v25, v6 +// GFX11: error: src0 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_fmaak_f32 v1, v5, v5, 0xaf123456 :: v_dual_add_f32 v2, v25, v6 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmamk_f32 v1, v6, 0xaf123456, v6 :: v_dual_add_f32 v2, v2, v6 +// GFX11: error: src0 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v1, v6, 0xaf123456, v6 :: v_dual_add_f32 v2, v2, v6 +// GFX11-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// Src1 operands must use different VGPR banks. +//===----------------------------------------------------------------------===// + +v_dual_add_f32 v1, v4, v0 :: v_dual_add_f32 v2, v5, v4 +// GFX11: error: src1 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_add_f32 v1, v4, v0 :: v_dual_add_f32 v2, v5, v4 +// GFX11-NEXT:{{^}} ^ + +v_dual_cndmask_b32 v1, v4, v1 :: v_dual_add_f32 v2, v5, v9 +// GFX11: error: src1 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v1, v4, v1 :: v_dual_add_f32 v2, v5, v9 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmac_f32 v1, v4, v2 :: v_dual_add_f32 v2, v5, v14 +// GFX11: error: src1 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_fmac_f32 v1, v4, v2 :: v_dual_add_f32 v2, v5, v14 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmaak_f32 v1, v4, v3, 0xaf123456 :: v_dual_add_f32 v2, v5, v23 +// GFX11: error: src1 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_fmaak_f32 v1, v4, v3, 0xaf123456 :: v_dual_add_f32 v2, v5, v23 +// GFX11-NEXT:{{^}} ^ + +v_dual_add_f32 v2, v4, v4 :: v_dual_cndmask_b32 v1, v5, v0 +// GFX11: error: src1 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_add_f32 v2, v4, v4 :: v_dual_cndmask_b32 v1, v5, v0 +// GFX11-NEXT:{{^}} ^ + +v_dual_add_f32 v2, v4, v5 :: v_dual_fmac_f32 v1, v5, v1 +// GFX11: error: src1 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_add_f32 v2, v4, v5 :: v_dual_fmac_f32 v1, v5, v1 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmaak_f32 v1, v4, v3, 0xaf123456 :: v_dual_fmaak_f32 v2, v5, v23, 0xaf123456 +// GFX11: error: src1 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_fmaak_f32 v1, v4, v3, 0xaf123456 :: v_dual_fmaak_f32 v2, v5, v23, 0xaf123456 +// GFX11-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// Src2 operands must use different VGPR banks. +//===----------------------------------------------------------------------===// + +v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmamk_f32 v5, v2, 0xaf123456, v5 +// GFX11: error: src2 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmamk_f32 v5, v2, 0xaf123456, v5 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v3 +// GFX11: error: src2 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v3 +// GFX11-NEXT:{{^}} ^ + +v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3 +// GFX11: error: src2 operands must use different VGPR banks +// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3 +// GFX11-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_features.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_features.s --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_features.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_features.s @@ -102,3 +102,79 @@ v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, v1, v3 // GFX11: encoding: [0x6a,0x04,0x12,0xc9,0x01,0x07,0x06,0xff] + +//===----------------------------------------------------------------------===// +// One dst register must be even and the other odd. +//===----------------------------------------------------------------------===// + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v21 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x00] + +v_dual_mul_f32 v1, v10, v20 :: v_dual_mul_f32 v0, v11, v21 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x01] + +//===----------------------------------------------------------------------===// +// srcX0 and srcY0 must use different VGPR banks. +//===----------------------------------------------------------------------===// + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v21 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x00] + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v12, v21 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0c,0x2b,0x00,0x00] + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v13, v21 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0d,0x2b,0x00,0x00] + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v15, v21 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0f,0x2b,0x00,0x00] + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v16, v21 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x10,0x2b,0x00,0x00] + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v17, v21 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x11,0x2b,0x00,0x00] + +//===----------------------------------------------------------------------===// +// srcX1 and srcY1 must use different VGPR banks. +//===----------------------------------------------------------------------===// + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v21 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x00] + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v22 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2d,0x00,0x00] + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v23 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2f,0x00,0x00] + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v25 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x33,0x00,0x00] + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v26 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x35,0x00,0x00] + +v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v27 +// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x37,0x00,0x00] + +//===----------------------------------------------------------------------===// +// srcX2 and srcY2 must use different VGPR banks. +//===----------------------------------------------------------------------===// + +v_dual_fmamk_f32 v6, v1, 0xaf123456, v0 :: v_dual_fmamk_f32 v5, v2, 0xaf123456, v1 +// GFX11: encoding: [0x01,0x01,0x84,0xc8,0x02,0x03,0x04,0x06,0x56,0x34,0x12,0xaf] + +v_dual_fmamk_f32 v6, v1, 0xaf123456, v1 :: v_dual_fmamk_f32 v5, v2, 0xaf123456, v0 +// GFX11: encoding: [0x01,0x03,0x84,0xc8,0x02,0x01,0x04,0x06,0x56,0x34,0x12,0xaf] + +v_dual_fmac_f32 v6, v1, v2 :: v_dual_fmamk_f32 v7, v2, 0xaf123456, v7 +// GFX11: encoding: [0x01,0x05,0x04,0xc8,0x02,0x0f,0x06,0x06,0x56,0x34,0x12,0xaf] + +v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v6 +// GFX11: encoding: [0x01,0x05,0x04,0xc8,0x02,0x0d,0x06,0x07,0x56,0x34,0x12,0xaf] + +v_dual_fmamk_f32 v5, v1, 0xaf123456, v5 :: v_dual_fmac_f32 v6, v2, v3 +// GFX11: encoding: [0x01,0x0b,0x80,0xc8,0x02,0x07,0x06,0x05,0x56,0x34,0x12,0xaf] + +v_dual_fmamk_f32 v6, v1, 0xaf123456, v6 :: v_dual_fmac_f32 v5, v2, v3 +// GFX11: encoding: [0x01,0x0d,0x80,0xc8,0x02,0x07,0x04,0x06,0x56,0x34,0x12,0xaf]