diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -326,6 +326,7 @@ } bool isVRegWithInputMods() const; + bool isT16VRegWithInputMods() const; bool isSDWAOperand(MVT type) const; bool isSDWAFP16Operand() const; @@ -505,6 +506,10 @@ return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); } + bool isVCSrcTB16_F128() const { + return isRegOrInlineNoMods(AMDGPU::VS_32_F128RegClassID, MVT::i16); + } + bool isVCSrcB16() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); } @@ -521,6 +526,10 @@ return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); } + bool isVCSrcTF16_F128() const { + return isRegOrInlineNoMods(AMDGPU::VS_32_F128RegClassID, MVT::f16); + } + bool isVCSrcF16() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); } @@ -537,6 +546,10 @@ return isVCSrcF64() || isLiteralImm(MVT::i64); } + bool isVSrcTB16_F128() const { + return isVCSrcTB16_F128() || isLiteralImm(MVT::i16); + } + bool isVSrcB16() const { return isVCSrcB16() || isLiteralImm(MVT::i16); } @@ -569,6 +582,10 @@ return isVCSrcF64() || isLiteralImm(MVT::f64); } + bool isVSrcTF16_F128() const { + return isVCSrcTF16_F128() || isLiteralImm(MVT::f16); + } + bool isVSrcF16() const { return isVCSrcF16() || isLiteralImm(MVT::f16); } @@ -2028,6 +2045,10 @@ AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); } +bool AMDGPUOperand::isT16VRegWithInputMods() const { + return isRegClass(AMDGPU::VGPR_32_F128RegClassID); +} + bool AMDGPUOperand::isSDWAOperand(MVT type) const { if (AsmParser->isVI()) return isVReg32(); @@ -8241,19 +8262,16 @@ // we don't allow modifiers for this operand in assembler so src2_modifiers // should be 0. if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || - Opc == AMDGPU::V_MAC_F32_e64_gfx10 || - Opc == AMDGPU::V_MAC_F32_e64_vi || + Opc == AMDGPU::V_MAC_F32_e64_gfx10 || Opc == AMDGPU::V_MAC_F32_e64_vi || Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || - Opc == AMDGPU::V_MAC_F16_e64_vi || - Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || + Opc == AMDGPU::V_MAC_F16_e64_vi || Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || - Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || - Opc == AMDGPU::V_FMAC_F32_e64_vi || + Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F32_e64_vi || Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || - Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { + Opc == AMDGPU::V_FMAC_F16_T16_e64_gfx11) { auto it = Inst.begin(); std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -167,6 +167,7 @@ DecodeStatus convertVOPCDPPInst(MCInst &MI) const; MCOperand decodeOperand_VGPR_32(unsigned Val) const; + MCOperand decodeOperand_VGPR_32_F128(unsigned Val) const; MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const; MCOperand decodeOperand_VS_32(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -119,6 +119,7 @@ DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) DECODE_OPERAND_REG(VGPR_32) +DECODE_OPERAND_REG(VGPR_32_F128) DECODE_OPERAND_REG(VRegOrLds_32) DECODE_OPERAND_REG(VS_32) DECODE_OPERAND_REG(VS_64) @@ -604,7 +605,7 @@ MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || MI.getOpcode() == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10 || - MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx11)) { + MI.getOpcode() == AMDGPU::V_FMAC_F16_T16_e64_gfx11)) { // Insert dummy unused src2_modifiers. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src2_modifiers); @@ -1139,6 +1140,10 @@ return decodeSrcOp(OPWV232, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32_F128(unsigned Val) const { + return createRegOperand(AMDGPU::VGPR_32_F128RegClassID, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { // Some instructions have operand restrictions beyond what the encoding // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -123,6 +123,8 @@ LLVM_DEBUG(dbgs() << " Inst hasn't e32 equivalent\n"); return false; } + if (llvm::AMDGPU::isTrue16Inst(Op)) + return false; if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) { // Give up if there are any uses of the sdst in carry-out or VOPC. // The shrunken form of the instruction would write it to vcc instead of to @@ -601,6 +603,9 @@ LLVM_DEBUG(dbgs() << " try: " << OrigMI); auto OrigOp = OrigMI.getOpcode(); + assert( + (TII->get(OrigOp).Size != 4 || !llvm::AMDGPU::isTrue16Inst(OrigOp)) && + "There should not be e32 True16 instructions pre-RA"); if (OrigOp == AMDGPU::REG_SEQUENCE) { Register FwdReg = OrigMI.getOperand(0).getReg(); unsigned FwdSubReg = 0; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1297,6 +1297,7 @@ switch (Op) { case AMDGPU::V_MAX_F32_e64: case AMDGPU::V_MAX_F16_e64: + case AMDGPU::V_MAX_F16_T16_e64: case AMDGPU::V_MAX_F64_e64: case AMDGPU::V_PK_MAX_F16: { if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm()) @@ -1391,7 +1392,8 @@ return SIOutMods::NONE; } } - case AMDGPU::V_MUL_F16_e64: { + case AMDGPU::V_MUL_F16_e64: + case AMDGPU::V_MUL_F16_T16_e64: { switch (static_cast(Val)) { case 0x3800: // 0.5 return SIOutMods::DIV2; @@ -1449,10 +1451,12 @@ } case AMDGPU::V_ADD_F64_e64: case AMDGPU::V_ADD_F32_e64: - case AMDGPU::V_ADD_F16_e64: { + case AMDGPU::V_ADD_F16_e64: + case AMDGPU::V_ADD_F16_T16_e64: { // If output denormals are enabled, omod is ignored. if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32OutputDenormals) || - ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64) && + ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 || + Op == AMDGPU::V_ADD_F16_T16_e64) && MFI->getMode().FP64FP16OutputDenormals)) return std::make_pair(nullptr, SIOutMods::NONE); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3276,13 +3276,19 @@ return MIB; } + assert(Opc != AMDGPU::V_FMAC_F16_T16_e32 && + "V_FMAC_F16_T16_e32 is not supported and not expected to be present " + "pre-RA"); + // Handle MAC/FMAC. bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64; + Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_T16_e64; bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_T16_e64 || Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 || @@ -3296,6 +3302,7 @@ return nullptr; case AMDGPU::V_MAC_F16_e64: case AMDGPU::V_FMAC_F16_e64: + case AMDGPU::V_FMAC_F16_T16_e64: case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_LEGACY_F32_e64: case AMDGPU::V_FMAC_F32_e64: @@ -3359,7 +3366,9 @@ int64_t Imm; if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) { unsigned NewOpc = - IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32) + IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_T16 + : AMDGPU::V_FMAAK_F16) + : AMDGPU::V_FMAAK_F32) : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); if (pseudoToMCOpcode(NewOpc) != -1) { MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) @@ -3374,9 +3383,11 @@ return MIB; } } - unsigned NewOpc = IsFMA - ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32) - : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); + unsigned NewOpc = + IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_T16 + : AMDGPU::V_FMAMK_F16) + : AMDGPU::V_FMAMK_F32) + : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) { if (pseudoToMCOpcode(NewOpc) != -1) { MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1400,10 +1400,20 @@ let PredicateMethod = "isVRegWithInputMods"; } +def FPT16VRegInputModsMatchClass : AsmOperandClass { + let Name = "T16VRegWithFPInputMods"; + let ParserMethod = "parseRegWithFPInputMods"; + let PredicateMethod = "isT16VRegWithInputMods"; +} + def FPVRegInputMods : InputMods { let PrintMethod = "printOperandAndFPInputMods"; } +def FPT16VRegInputMods : InputMods { + let PrintMethod = "printOperandAndFPInputMods"; +} + class IntSDWAInputModsMatchClass : AsmOperandClass { let Name = "SDWAWithInt"#opSize#"InputMods"; let ParserMethod = "parseRegOrImmWithIntInputMods"; @@ -1432,6 +1442,16 @@ let PredicateMethod = "isVRegWithInputMods"; } +def IntT16VRegInputModsMatchClass : AsmOperandClass { + let Name = "T16VRegWithIntInputMods"; + let ParserMethod = "parseRegWithIntInputMods"; + let PredicateMethod = "isT16VRegWithInputMods"; +} + +def IntT16VRegInputMods : InputMods { + let PrintMethod = "printOperandAndIntInputMods"; +} + def IntVRegInputMods : InputMods { let PrintMethod = "printOperandAndIntInputMods"; } @@ -1598,6 +1618,14 @@ VOPDstS64orS32)))); // else VT == i1 } +class getVALUDstForVT_T16 { + RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, + !if(!eq(VT.Size, 128), VOPDstOperand, + !if(!eq(VT.Size, 64), VOPDstOperand, + !if(!eq(VT.Size, 16), VOPDstOperand, + VOPDstS64orS32)))); // else VT == i1 +} + // Returns the register class to use for the destination of VOP[12C] // instructions with SDWA extension class getSDWADstForVT { @@ -1608,7 +1636,7 @@ // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. -class getVOPSrc0ForVT { +class getVOPSrc0ForVT { bit isFP = isFloatType.ret; RegisterOperand ret = @@ -1616,7 +1644,10 @@ !if(!eq(VT.Size, 64), VSrc_f64, !if(!eq(VT.Value, f16.Value), - VSrc_f16, + !if(IsTrue16, + VSrcT_f16_F128, + VSrc_f16 + ), !if(!eq(VT.Value, v2f16.Value), VSrc_v2f16, !if(!eq(VT.Value, v4f16.Value), @@ -1629,7 +1660,10 @@ !if(!eq(VT.Size, 64), VSrc_b64, !if(!eq(VT.Value, i16.Value), - VSrc_b16, + !if(IsTrue16, + VSrcT_b16_F128, + VSrc_b16 + ), !if(!eq(VT.Value, v2i16.Value), VSrc_v2b16, VSrc_b32 @@ -1652,6 +1686,15 @@ VGPR_32)))); } +class getVregSrcForVT_T16 { + RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, + !if(!eq(VT.Size, 96), VReg_96, + !if(!eq(VT.Size, 64), VReg_64, + !if(!eq(VT.Size, 48), VReg_64, + !if(!eq(VT.Size, 16), VGPR_32_F128, + VGPR_32))))); +} + class getSDWASrcForVT { bit isFP = isFloatType.ret; RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); @@ -1759,6 +1802,16 @@ Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } +class getSrcModDPP_T16 { + bit isFP = isFloatType.ret; + Operand ret = + !if (isFP, + !if (!eq(VT.Value, f16.Value), FPT16VRegInputMods, + FPVRegInputMods), + !if (!eq(VT.Value, i16.Value), IntT16VRegInputMods, + IntVRegInputMods)); +} + // Return type of input modifiers operand for specified input operand for DPP class getSrcModVOP3DPP { bit isFP = isFloatType.ret; @@ -2382,6 +2435,7 @@ field list ArgVT = _ArgVT; field bit EnableF32SrcMods = _EnableF32SrcMods; field bit EnableClamp = _EnableClamp; + field bit IsTrue16 = 0; field ValueType DstVT = ArgVT[0]; field ValueType Src0VT = ArgVT[1]; @@ -2392,7 +2446,7 @@ field RegisterOperand DstRC64 = DstRC; field RegisterOperand DstRCVOP3DPP = DstRC64; field RegisterOperand DstRCSDWA = getSDWADstForVT.ret; - field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; + field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; field RegisterOperand Src1RC32 = RegisterOperand.ret>; field RegisterOperand Src0RC64 = getVOP3SrcForVT.ret; field RegisterOperand Src1RC64 = getVOP3SrcForVT.ret; @@ -2411,6 +2465,8 @@ field Operand Src0ModDPP = getSrcModDPP.ret; field Operand Src1ModDPP = getSrcModDPP.ret; field Operand Src2ModDPP = getSrcModDPP.ret; + field Operand Src0ModVOP3DPP = getSrcModDPP.ret; + field Operand Src1ModVOP3DPP = getSrcModDPP.ret; field Operand Src2ModVOP3DPP = getSrcModVOP3DPP.ret; field Operand Src0ModSDWA = getSrcModSDWA.ret; field Operand Src1ModSDWA = getSrcModSDWA.ret; @@ -2513,7 +2569,7 @@ Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; field dag InsVOP3Base = getInsVOP3Base.ret; + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret; field dag InsVOP3DPP = getInsVOP3DPP.ret; field dag InsVOP3DPP16 = getInsVOP3DPP16.ret; field dag InsVOP3DPP8 = getInsVOP3DPP8.ret; @@ -2569,7 +2625,25 @@ class VOP_PAT_GEN : VOPProfile { let NeedPatGen = mode; } -def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; + +// VOPC_Profile_T16, VOPC_NoSdst_Profile_T16, VOPC_Class_Profile_T16, +// VOPC_Class_NoSdst_Profile_T16, and VOP_MAC_F16_T16 do not inherit from this +// class, so copy changes to this class in those profiles +class VOPProfile_True16 : VOPProfile { + let IsTrue16 = 1; + // Most DstVT are 16-bit, but not all + let DstRC = getVALUDstForVT_T16.ret; + let DstRC64 = getVALUDstForVT.ret; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_T16.ret; + let Src1DPP = getVregSrcForVT_T16.ret; + let Src2DPP = getVregSrcForVT_T16.ret; + let Src0ModDPP = getSrcModDPP_T16.ret; + let Src1ModDPP = getSrcModDPP_T16.ret; + let Src2ModDPP = getSrcModDPP_T16.ret; +} + +def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>; def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -921,62 +921,70 @@ } // End OtherPredicates = [UnsafeFPMath] -// f16_to_fp patterns -def : GCNPat < - (f32 (f16_to_fp i32:$src0)), - (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0) ->; +multiclass f16_fp_Pats { + // f16_to_fp patterns + def : GCNPat < + (f32 (f16_to_fp i32:$src0)), + (cvt32to16_e64 SRCMODS.NONE, $src0) + >; -def : GCNPat < - (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))), - (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0) ->; + def : GCNPat < + (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))), + (cvt32to16_e64 SRCMODS.ABS, $src0) + >; -def : GCNPat < - (f32 (f16_to_fp (i32 (srl_oneuse (and_oneuse i32:$src0, 0x7fff0000), (i32 16))))), - (V_CVT_F32_F16_e64 SRCMODS.ABS, (i32 (V_LSHRREV_B32_e64 (i32 16), i32:$src0))) ->; + def : GCNPat < + (f32 (f16_to_fp (i32 (srl_oneuse (and_oneuse i32:$src0, 0x7fff0000), (i32 16))))), + (cvt32to16_e64 SRCMODS.ABS, (i32 (V_LSHRREV_B32_e64 (i32 16), i32:$src0))) + >; -def : GCNPat < - (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))), - (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0) ->; + def : GCNPat < + (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))), + (cvt32to16_e64 SRCMODS.NEG_ABS, $src0) + >; -def : GCNPat < - (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))), - (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0) ->; + def : GCNPat < + (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))), + (cvt32to16_e64 SRCMODS.NEG, $src0) + >; -def : GCNPat < - (f64 (fpextend f16:$src)), - (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src)) ->; + def : GCNPat < + (f64 (fpextend f16:$src)), + (V_CVT_F64_F32_e32 (cvt32to16_e32 $src)) + >; -// fp_to_fp16 patterns -def : GCNPat < - (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), - (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0) ->; + // fp_to_fp16 patterns + def : GCNPat < + (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), + (cvt16to32_e64 $src0_modifiers, f32:$src0) + >; -def : GCNPat < - (i32 (fp_to_sint f16:$src)), - (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src)) ->; + def : GCNPat < + (i32 (fp_to_sint f16:$src)), + (V_CVT_I32_F32_e32 (cvt32to16_e32 VSrc_b32:$src)) + >; -def : GCNPat < - (i32 (fp_to_uint f16:$src)), - (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src)) ->; + def : GCNPat < + (i32 (fp_to_uint f16:$src)), + (V_CVT_U32_F32_e32 (cvt32to16_e32 VSrc_b32:$src)) + >; -def : GCNPat < - (f16 (sint_to_fp i32:$src)), - (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 VSrc_b32:$src)) ->; + def : GCNPat < + (f16 (sint_to_fp i32:$src)), + (cvt16to32_e32 (V_CVT_F32_I32_e32 VSrc_b32:$src)) + >; -def : GCNPat < - (f16 (uint_to_fp i32:$src)), - (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 VSrc_b32:$src)) ->; + def : GCNPat < + (f16 (uint_to_fp i32:$src)), + (cvt16to32_e32 (V_CVT_F32_U32_e32 VSrc_b32:$src)) + >; +} + +let SubtargetPredicate = NotHasTrue16BitInsts in +defm : f16_fp_Pats; + +let SubtargetPredicate = HasTrue16BitInsts in +defm : f16_fp_Pats; //===----------------------------------------------------------------------===// // VOP2 Patterns @@ -1503,7 +1511,10 @@ def : ClampPat; def : ClampPat; +let SubtargetPredicate = NotHasTrue16BitInsts in def : ClampPat; +let SubtargetPredicate = HasTrue16BitInsts in +def : ClampPat; let SubtargetPredicate = HasVOP3PInsts in { def : GCNPat < @@ -2268,6 +2279,7 @@ ) >; +let SubtargetPredicate = NotHasTrue16BitInsts in def : GCNPat < (f16 (sint_to_fp i1:$src)), (V_CVT_F16_F32_e32 ( @@ -2276,6 +2288,16 @@ SSrc_i1:$src)) >; +let SubtargetPredicate = HasTrue16BitInsts in +def : GCNPat < + (f16 (sint_to_fp i1:$src)), + (V_CVT_F16_F32_T16_e32 ( + V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE), + SSrc_i1:$src)) +>; + +let SubtargetPredicate = NotHasTrue16BitInsts in def : GCNPat < (f16 (uint_to_fp i1:$src)), (V_CVT_F16_F32_e32 ( @@ -2283,6 +2305,14 @@ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), SSrc_i1:$src)) >; +let SubtargetPredicate = HasTrue16BitInsts in +def : GCNPat < + (f16 (uint_to_fp i1:$src)), + (V_CVT_F16_F32_T16_e32 ( + V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), + SSrc_i1:$src)) +>; def : GCNPat < (f32 (sint_to_fp i1:$src)), @@ -2501,6 +2531,8 @@ // Prefer selecting to max when legal, but using mul is always valid. let AddedComplexity = -5 in { + +let OtherPredicates = [NotHasTrue16BitInsts] in { def : GCNPat< (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src) @@ -2510,6 +2542,19 @@ (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))), (V_MUL_F16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src) >; +} // End OtherPredicates + +let OtherPredicates = [HasTrue16BitInsts] in { +def : GCNPat< + (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), + (V_MUL_F16_T16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src) +>; + +def : GCNPat< + (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))), + (V_MUL_F16_T16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src) +>; +} // End OtherPredicates def : GCNPat< (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), @@ -2552,8 +2597,13 @@ def : GCNPat< (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), (V_MAX_F16_e64 $src_mods, $src, $src_mods, $src, 0, 0)> { - // FIXME: Should have 16-bit inst subtarget predicate - let OtherPredicates = f16_preds; + let OtherPredicates = !listconcat(f16_preds, [Has16BitInsts, NotHasTrue16BitInsts]); + } + + def : GCNPat< + (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), + (V_MAX_F16_T16_e64 $src_mods, $src, $src_mods, $src, 0, 0)> { + let OtherPredicates = !listconcat(f16_preds, [Has16BitInsts, HasTrue16BitInsts]); } def : GCNPat< @@ -2600,9 +2650,10 @@ >; } // End OtherPredicates = [HasDLInsts] -let SubtargetPredicate = isGFX10Plus in +let SubtargetPredicate = isGFX10Plus in { // Don't allow source modifiers. If there are any source modifiers then it's // better to select fma instead of fmac. +let OtherPredicates = [NotHasTrue16BitInsts] in def : GCNPat < (fma (f16 (VOP3NoMods f32:$src0)), (f16 (VOP3NoMods f32:$src1)), @@ -2610,6 +2661,15 @@ (V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2) >; +let OtherPredicates = [HasTrue16BitInsts] in +def : GCNPat < + (fma (f16 (VOP3NoMods f32:$src0)), + (f16 (VOP3NoMods f32:$src1)), + (f16 (VOP3NoMods f32:$src2))), + (V_FMAC_F16_T16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, + SRCMODS.NONE, $src2) +>; +} let SubtargetPredicate = isGFX90APlus in // Don't allow source modifiers. If there are any source modifiers then it's diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2877,6 +2877,10 @@ unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const { + if (Idx == AMDGPU::RegisterPressureSets::VGPR_32_F128) { + return getRegPressureLimit(&AMDGPU::VGPR_32_F128RegClass, + const_cast(MF)); + } if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 || Idx == AMDGPU::RegisterPressureSets::AGPR_32) return getRegPressureLimit(&AMDGPU::VGPR_32RegClass, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -553,6 +553,13 @@ let Size = 32; let Weight = 1; } + +def VGPR_32_F128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32, + (add (sequence "VGPR%u", 0, 127))> { + let AllocationPriority = 0; + let Size = 32; + let Weight = 1; +} } // End HasVGPR = 1 // VGPR 64-bit registers @@ -876,6 +883,13 @@ let HasSGPR = 1; } +def VS_32_F128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add VGPR_32_F128, SReg_32, LDS_DIRECT_CLASS)> { + let isAllocatable = 0; + let HasVGPR = 1; + let HasSGPR = 1; +} + def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> { let isAllocatable = 0; let HasVGPR = 1; @@ -922,6 +936,25 @@ let RenderMethod = "addRegOrImmOperands"; } +// For VOP1,2,C True16 instructions. Uses first 128 32-bit VGPRs only +multiclass SIRegOperand16 { + let OperandNamespace = "AMDGPU" in { + def _b16_F128 : RegisterOperand(rc#rc_suffix#"_F128")> { + let OperandType = opType#"_INT16"; + let ParserMatchClass = RegImmMatcher; + let DecoderMethod = "decodeOperand_VSrc16"; + } + + def _f16_F128 : RegisterOperand(rc#rc_suffix#"_F128")> { + let OperandType = opType#"_FP16"; + let ParserMatchClass = RegImmMatcher; + let DecoderMethod = "decodeOperand_" # rc # "_16"; + } + } +} + + multiclass SIRegOperand32 { let OperandNamespace = "AMDGPU" in { @@ -1041,6 +1074,7 @@ //===----------------------------------------------------------------------===// defm VSrc : RegImmOperand<"VS", "VSrc">; +defm VSrcT : SIRegOperand16<"VS", "VSrcT", "OPERAND_REG_IMM">; def VSrc_128 : RegisterOperand { let DecoderMethod = "DecodeVS_128RegisterClass"; @@ -1051,6 +1085,17 @@ // with FMAMK/FMAAK //===----------------------------------------------------------------------===// +multiclass SIRegOperand16_Deferred { + let OperandNamespace = "AMDGPU" in { + def _f16_F128_Deferred : RegisterOperand(rc#rc_suffix#"_F128")> { + let OperandType = opType#"_FP16_DEFERRED"; + let ParserMatchClass = RegImmMatcher; + let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred"; + } + } +} + multiclass SIRegOperand32_Deferred { let OperandNamespace = "AMDGPU" in { @@ -1069,6 +1114,7 @@ } defm VSrc : SIRegOperand32_Deferred<"VS", "VSrc", "OPERAND_REG_IMM">; +defm VSrcT : SIRegOperand16_Deferred<"VS", "VSrcT", "OPERAND_REG_IMM">; //===----------------------------------------------------------------------===// // VRegSrc_* Operands with a VGPR @@ -1101,6 +1147,9 @@ def VGPRSrc_32 : RegisterOperand { let DecoderMethod = "DecodeVGPR_32RegisterClass"; } +def VGPRSrc_32_F128 : RegisterOperand { + let DecoderMethod = "DecodeVGPR_32RegisterClass"; +} //===----------------------------------------------------------------------===// // ASrc_* Operands with an AccVGPR @@ -1116,6 +1165,7 @@ //===----------------------------------------------------------------------===// defm VCSrc : RegInlineOperand<"VS", "VCSrc">; +defm VCSrcT : SIRegOperand16<"VS", "VCSrcT", "OPERAND_REG_INLINE_C">; //===----------------------------------------------------------------------===// // VISrc_* Operands with a VGPR or an inline constant diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -11,6 +11,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -39,6 +40,7 @@ } bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const; + bool shouldShrinkTrue16(MachineInstr &MI) const; bool isKImmOperand(const MachineOperand &Src) const; bool isKUImmOperand(const MachineOperand &Src) const; bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const; @@ -139,6 +141,26 @@ return false; } +/// Only shrink 16 bit registers after RA. +/// Do not shrink the instruction if its registers are not expressible in the +/// shrunk encoding. +bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const { + for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (MO.isReg()) { + Register Reg = MO.getReg(); + if (Reg.isVirtual()) { + return false; + } else { + if (AMDGPU::VGPR_32RegClass.contains(Reg) && + !AMDGPU::VGPR_32_F128RegClass.contains(Reg)) + return false; + } + } + } + return true; +} + bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const { return isInt<16>(Src.getImm()) && !TII->isInlineConstant(*Src.getParent(), @@ -383,7 +405,8 @@ break; case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: - NewOpcode = AMDGPU::V_FMAAK_F16; + NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_T16 + : AMDGPU::V_FMAAK_F16; break; } } @@ -411,7 +434,8 @@ break; case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: - NewOpcode = AMDGPU::V_FMAMK_F16; + NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_T16 + : AMDGPU::V_FMAMK_F16; break; } } @@ -419,6 +443,9 @@ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) return; + if (llvm::AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI)) + return; + if (Swap) { // Swap Src0 and Src1 by building a new instruction. BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode), @@ -945,6 +972,9 @@ if (Next) continue; } + if (ST->hasTrue16BitInsts() && + llvm::AMDGPU::isTrue16Inst(MI.getOpcode()) && !shouldShrinkTrue16(MI)) + continue; // We can shrink this instruction LLVM_DEBUG(dbgs() << "Shrinking " << MI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -496,6 +496,9 @@ LLVM_READONLY int getVOPDFull(unsigned OpX, unsigned OpY); +LLVM_READONLY +bool isTrue16Inst(unsigned Opc); + LLVM_READONLY unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -33,11 +33,6 @@ llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4)); -// TODO-GFX11: Remove this when full 16-bit codegen is implemented. -static llvm::cl::opt - LimitTo128VGPRs("amdgpu-limit-to-128-vgprs", llvm::cl::Hidden, - llvm::cl::desc("Never use more than 128 VGPRs")); - namespace { /// \returns Bit mask for given bit \p Shift and bit \p Width. @@ -289,6 +284,11 @@ uint16_t OpY; }; +struct VOPTrue16Info { + uint16_t Opcode; + bool IsTrue16; +}; + #define GET_MTBUFInfoTable_DECL #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL @@ -309,6 +309,8 @@ #define GET_VOPDComponentTable_IMPL #define GET_VOPDPairs_DECL #define GET_VOPDPairs_IMPL +#define GET_VOPTrue16Table_DECL +#define GET_VOPTrue16Table_IMPL #define GET_WMMAOpcode2AddrMappingTable_DECL #define GET_WMMAOpcode2AddrMappingTable_IMPL #define GET_WMMAOpcode3AddrMappingTable_DECL @@ -427,6 +429,11 @@ return Info ? Info->VOPDOp : ~0u; } +bool isTrue16Inst(unsigned Opc) { + const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); + return Info ? Info->IsTrue16 : false; +} + unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); return Info ? Info->Opcode3Addr : ~0u; @@ -860,15 +867,6 @@ } unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { - if (LimitTo128VGPRs.getNumOccurrences() ? LimitTo128VGPRs - : isGFX11Plus(*STI)) { - // GFX11 changes the encoding of 16-bit operands in VOP1/2/C instructions - // such that values 128..255 no longer mean v128..v255, they mean - // v0.hi..v127.hi instead. Until the compiler understands this, it is not - // safe to use v128..v255. - // TODO-GFX11: Remove this when full 16-bit codegen is implemented. - return 128; - } if (STI->getFeatureBits().test(FeatureGFX90AInsts)) return 512; return 256; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -142,7 +142,18 @@ def : MnemonicAlias, LetDummies; foreach _ = BoolToList.ret in - def : MnemonicAlias, LetDummies; + def : MnemonicAlias, LetDummies; +} + +multiclass VOP1Inst_T16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOP1Inst; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _T16 : VOP1Inst, node>; + } } // Special profile for instructions which have clamp @@ -151,7 +162,19 @@ VOPProfile<[dstVt, srcVt, untyped, untyped]> { let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); - let InsVOP3Base = (ins Src0DPP:$src0, clampmod:$clamp, omod:$omod); + let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); + let Asm64 = "$vdst, $src0$clamp$omod"; + let AsmVOP3DPPBase = Asm64; + + let HasModifiers = 0; + let HasClamp = 1; +} + +class VOPProfileI2F_True16 : + VOPProfile_True16> { + + let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); + let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); let Asm64 = "$vdst, $src0$clamp$omod"; let AsmVOP3DPPBase = Asm64; @@ -162,6 +185,7 @@ def VOP1_F64_I32 : VOPProfileI2F ; def VOP1_F32_I32 : VOPProfileI2F ; def VOP1_F16_I16 : VOPProfileI2F ; +def VOP1_F16_I16_T16 : VOPProfileI2F_True16 ; def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ let HasExtVOP3DPP = 0; @@ -177,6 +201,9 @@ def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; +def VOP_I16_F16_SPECIAL_OMOD_T16 : VOPProfile_True16 { + let HasOMod = 1; +} //===----------------------------------------------------------------------===// // VOP1 Instructions @@ -264,10 +291,16 @@ defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; let FPDPRounding = 1, isReMaterializable = 0 in { -defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; + let OtherPredicates = [NotHasTrue16BitInsts] in + defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; + let OtherPredicates = [HasTrue16BitInsts] in + defm V_CVT_F16_F32_T16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16, fpround>; } // End FPDPRounding = 1, isReMaterializable = 0 +let OtherPredicates = [NotHasTrue16BitInsts] in defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; +let OtherPredicates = [HasTrue16BitInsts] in +defm V_CVT_F32_F16_T16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16, fpextend>; let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; @@ -425,48 +458,68 @@ } // End SubtargetPredicate = isGFX7Plus } // End isReMaterializable = 1 -let SubtargetPredicate = Has16BitInsts in { - let FPDPRounding = 1 in { +let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; +} +let OtherPredicates = [HasTrue16BitInsts] in { +defm V_CVT_F16_U16_T16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_T16, uint_to_fp>; +defm V_CVT_F16_I16_T16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_T16, sint_to_fp>; +} } // End FPDPRounding = 1 // OMod clears exceptions when set in these two instructions +let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>; defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>; +} +let OtherPredicates = [HasTrue16BitInsts] in { +defm V_CVT_U16_F16_T16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_T16, fp_to_uint>; +defm V_CVT_I16_F16_T16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_T16, fp_to_sint>; +} let TRANS = 1, SchedRW = [WriteTrans32] in { -defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; -defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; -defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; -defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; -defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; -defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; -defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; +defm V_RCP_F16 : VOP1Inst_T16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; +defm V_SQRT_F16 : VOP1Inst_T16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; +defm V_RSQ_F16 : VOP1Inst_T16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; +defm V_LOG_F16 : VOP1Inst_T16 <"v_log_f16", VOP_F16_F16, flog2>; +defm V_EXP_F16 : VOP1Inst_T16 <"v_exp_f16", VOP_F16_F16, fexp2>; +defm V_SIN_F16 : VOP1Inst_T16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; +defm V_COS_F16 : VOP1Inst_T16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } // End TRANS = 1, SchedRW = [WriteTrans32] -defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; +defm V_FREXP_MANT_F16 : VOP1Inst_T16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; +let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>; -defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; -defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; -defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; -defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; +} +let OtherPredicates = [HasTrue16BitInsts] in { +defm V_FREXP_EXP_I16_F16_T16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_T16, int_amdgcn_frexp_exp>; +} +defm V_FLOOR_F16 : VOP1Inst_T16 <"v_floor_f16", VOP_F16_F16, ffloor>; +defm V_CEIL_F16 : VOP1Inst_T16 <"v_ceil_f16", VOP_F16_F16, fceil>; +defm V_TRUNC_F16 : VOP1Inst_T16 <"v_trunc_f16", VOP_F16_F16, ftrunc>; +defm V_RNDNE_F16 : VOP1Inst_T16 <"v_rndne_f16", VOP_F16_F16, frint>; let FPDPRounding = 1 in { -defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; +defm V_FRACT_F16 : VOP1Inst_T16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; } // End FPDPRounding = 1 -} - -let OtherPredicates = [Has16BitInsts] in { - +let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { def : GCNPat< (f32 (f16_to_fp i16:$src)), (V_CVT_F32_F16_e32 $src) >; - def : GCNPat< (i16 (AMDGPUfp_to_f16 f32:$src)), (V_CVT_F16_F32_e32 $src) >; - +} +let OtherPredicates = [HasTrue16BitInsts] in { +def : GCNPat< + (f32 (f16_to_fp i16:$src)), + (V_CVT_F32_F16_T16_e32 $src) +>; +def : GCNPat< + (i16 (AMDGPUfp_to_f16 f32:$src)), + (V_CVT_F16_F32_T16_e32 $src) +>; } def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { @@ -489,8 +542,14 @@ defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; let mayRaiseFPException = 0 in { - defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; - defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; + let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { + defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; + defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm V_CVT_NORM_I16_F16_T16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_T16>; + defm V_CVT_NORM_U16_F16_T16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_T16>; + } } // End mayRaiseFPException = 0 } // End SubtargetPredicate = isGFX9Plus @@ -584,9 +643,9 @@ getVOP1Pat64.ret, /*VOP1Only=*/ 1>; - defm V_NOT_B16 : VOP1Inst<"v_not_b16", VOP_I16_I16>; - defm V_CVT_I32_I16 : VOP1Inst<"v_cvt_i32_i16", VOP_I32_I16>; - defm V_CVT_U32_U16 : VOP1Inst<"v_cvt_u32_u16", VOP_I32_I16>; + defm V_NOT_B16 : VOP1Inst_T16<"v_not_b16", VOP_I16_I16>; + defm V_CVT_I32_I16 : VOP1Inst_T16<"v_cvt_i32_i16", VOP_I32_I16>; + defm V_CVT_U32_U16 : VOP1Inst_T16<"v_cvt_u32_u16", VOP_I32_I16>; } // End SubtargetPredicate = isGFX11Plus //===----------------------------------------------------------------------===// @@ -650,8 +709,7 @@ string asmName> { defvar ps = !cast(opName#"_e32"); let AsmString = asmName # ps.AsmOperands in { - defm NAME : VOP1_Real_e32_gfx11, - MnemonicAlias, Requires<[isGFX11Plus]>; + defm NAME : VOP1_Real_e32_gfx11; } } multiclass VOP1_Real_e64_gfx11 op> { @@ -669,8 +727,7 @@ string asmName> { defvar ps = !cast(opName#"_e32"); let AsmString = asmName # ps.Pfl.AsmDPP16, DecoderNamespace = "DPPGFX11" in { - defm NAME : VOP1_Real_dpp_gfx11, - MnemonicAlias, Requires<[isGFX11Plus]>; + defm NAME : VOP1_Real_dpp_gfx11; } } multiclass VOP1_Real_dpp8_gfx11 op, string opName = NAME> { @@ -683,8 +740,7 @@ string asmName> { defvar ps = !cast(opName#"_e32"); let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = "DPP8GFX11" in { - defm NAME : VOP1_Real_dpp8_gfx11, - MnemonicAlias, Requires<[isGFX11Plus]>; + defm NAME : VOP1_Real_dpp8_gfx11; } } } // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" @@ -703,16 +759,24 @@ VOP1_Real_dpp_gfx11, VOP1_Real_dpp8_gfx11; multiclass VOP1_Real_NO_VOP3_with_name_gfx11 op, string opName, - string asmName> : - VOP1_Real_e32_with_name_gfx11, - VOP1_Real_dpp_with_name_gfx11, - VOP1_Real_dpp8_with_name_gfx11; + string asmName> { + defm NAME : VOP1_Real_e32_with_name_gfx11, + VOP1_Real_dpp_with_name_gfx11, + VOP1_Real_dpp8_with_name_gfx11; + defvar ps = !cast(opName#"_e32"); + def gfx11_alias : MnemonicAlias, + Requires<[isGFX11Plus]>; +} multiclass VOP1_Real_FULL_with_name_gfx11 op, string opName, string asmName> : VOP1_Real_NO_VOP3_with_name_gfx11, VOP1_Realtriple_e64_with_name_gfx11; +multiclass VOP1_Real_FULL_T16_gfx11 op, string asmName, + string opName = NAME> : + VOP1_Real_FULL_with_name_gfx11; + multiclass VOP1_Real_NO_DPP_gfx11 op> : VOP1_Real_e32_gfx11, VOP1_Real_e64_gfx11; @@ -727,9 +791,33 @@ defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11<0x03b, "V_FFBH_I32", "v_cls_i32">; defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11<0x067>; -defm V_NOT_B16 : VOP1_Real_FULL_gfx11<0x069>; -defm V_CVT_I32_I16 : VOP1_Real_FULL_gfx11<0x06a>; -defm V_CVT_U32_U16 : VOP1_Real_FULL_gfx11<0x06b>; +defm V_NOT_B16_T16 : VOP1_Real_FULL_T16_gfx11<0x069, "v_not_b16">; +defm V_CVT_I32_I16_T16 : VOP1_Real_FULL_T16_gfx11<0x06a, "v_cvt_i32_i16">; +defm V_CVT_U32_U16_T16 : VOP1_Real_FULL_T16_gfx11<0x06b, "v_cvt_u32_u16">; + +defm V_CVT_F16_U16_T16 : VOP1_Real_FULL_T16_gfx11<0x050, "v_cvt_f16_u16">; +defm V_CVT_F16_I16_T16 : VOP1_Real_FULL_T16_gfx11<0x051, "v_cvt_f16_i16">; +defm V_CVT_U16_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x052, "v_cvt_u16_f16">; +defm V_CVT_I16_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x053, "v_cvt_i16_f16">; +defm V_RCP_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x054, "v_rcp_f16">; +defm V_SQRT_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x055, "v_sqrt_f16">; +defm V_RSQ_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x056, "v_rsq_f16">; +defm V_LOG_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x057, "v_log_f16">; +defm V_EXP_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x058, "v_exp_f16">; +defm V_FREXP_MANT_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x059, "v_frexp_mant_f16">; +defm V_FREXP_EXP_I16_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x05a, "v_frexp_exp_i16_f16">; +defm V_FLOOR_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x05b, "v_floor_f16">; +defm V_CEIL_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x05c, "v_ceil_f16">; +defm V_TRUNC_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x05d, "v_trunc_f16">; +defm V_RNDNE_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x05e, "v_rndne_f16">; +defm V_FRACT_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x05f, "v_fract_f16">; +defm V_SIN_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x060, "v_sin_f16">; +defm V_COS_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x061, "v_cos_f16">; +defm V_CVT_NORM_I16_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x063, "v_cvt_norm_i16_f16">; +defm V_CVT_NORM_U16_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x064, "v_cvt_norm_u16_f16">; + +defm V_CVT_F16_F32_T16 : VOP1_Real_FULL_T16_gfx11<0x00a, "v_cvt_f16_f32">; +defm V_CVT_F32_F16_T16 : VOP1_Real_FULL_T16_gfx11<0x00b, "v_cvt_f32_f16">; //===----------------------------------------------------------------------===// // GFX10. @@ -789,27 +877,27 @@ defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11<0x01b>; defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11<0x048>; -defm V_CVT_F16_U16 : VOP1_Real_gfx10_FULL_gfx11<0x050>; -defm V_CVT_F16_I16 : VOP1_Real_gfx10_FULL_gfx11<0x051>; -defm V_CVT_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x052>; -defm V_CVT_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x053>; -defm V_RCP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x054>; -defm V_SQRT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x055>; -defm V_RSQ_F16 : VOP1_Real_gfx10_FULL_gfx11<0x056>; -defm V_LOG_F16 : VOP1_Real_gfx10_FULL_gfx11<0x057>; -defm V_EXP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x058>; -defm V_FREXP_MANT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x059>; -defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05a>; -defm V_FLOOR_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05b>; -defm V_CEIL_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05c>; -defm V_TRUNC_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05d>; -defm V_RNDNE_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05e>; -defm V_FRACT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05f>; -defm V_SIN_F16 : VOP1_Real_gfx10_FULL_gfx11<0x060>; -defm V_COS_F16 : VOP1_Real_gfx10_FULL_gfx11<0x061>; +defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; +defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; +defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; +defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; +defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; +defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; +defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; +defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; +defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; +defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; +defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; +defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; +defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; +defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; +defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; +defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; +defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; +defm V_COS_F16 : VOP1_Real_gfx10<0x061>; defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10_FULL_gfx11<0x062>; -defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x063>; -defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x064>; +defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; +defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11<0x065>; defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11<0x068>; @@ -893,8 +981,8 @@ defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x006>; defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x007>; defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x008>; -defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00a>; -defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00b>; +defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; +defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00e>; @@ -1077,17 +1165,17 @@ // indexing mode. vdst can't be treated as a def for codegen purposes, // and an implicit use and def of the super register should be added. def V_MOV_B32_indirect_write : VPseudoInstSI<(outs), - (ins getVALUDstForVT.ret:$vdst, getVOPSrc0ForVT.ret:$src0)>, + (ins getVALUDstForVT.ret:$vdst, getVOPSrc0ForVT.ret:$src0)>, PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT.ret:$vdst, - getVOPSrc0ForVT.ret:$src0)>; + getVOPSrc0ForVT.ret:$src0)>; // Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the // super register should be added. def V_MOV_B32_indirect_read : VPseudoInstSI< (outs getVALUDstForVT.ret:$vdst), - (ins getVOPSrc0ForVT.ret:$src0)>, + (ins getVOPSrc0ForVT.ret:$src0)>, PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT.ret:$vdst, - getVOPSrc0ForVT.ret:$src0)>; + getVOPSrc0ForVT.ret:$src0)>; } // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0] diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -186,6 +186,36 @@ } } +multiclass VOP2Inst_T16 { + let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { + defm NAME : VOP2Inst; + } + let SubtargetPredicate = HasTrue16BitInsts in { + defm _T16 : VOP2Inst, node, revOp#"_t16", GFX9Renamed>; + } +} + +// Creating a _T16_e32 pseudo when there is no corresponding real instruction on +// any subtarget is a problem. It makes getMCOpcodeGen return -1, which we +// assume means the instruction is already a real. The fix is to not create that +// _T16_e32 pseudo +multiclass VOP2Inst_e64_T16 { + let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { + defm NAME : VOP2Inst; + } + let SubtargetPredicate = HasTrue16BitInsts in { + defm _T16 : VOP2Inst_e64, node, revOp#"_t16", GFX9Renamed>; + } +} + multiclass VOP2Inst_VOPD VOPDOp, @@ -341,11 +371,18 @@ } def VOP_MADAK_F16 : VOP_MADAK ; +def VOP_MADAK_F16_T16 : VOP_MADAK { + let IsTrue16 = 1; + let DstRC = VOPDstOperand; + let Ins32 = (ins VSrcT_f16_F128_Deferred:$src0, VGPR_32_F128:$src1, ImmOpType:$imm); +} def VOP_MADAK_F32 : VOP_MADAK ; class VOP_MADMK : VOP_MADK_Base { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); - field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1); + field dag Ins32 = !if(!eq(vt.Size, 32), + (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1), + (ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1)); field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); @@ -359,6 +396,11 @@ } def VOP_MADMK_F16 : VOP_MADMK ; +def VOP_MADMK_F16_T16 : VOP_MADMK { + let IsTrue16 = 1; + let DstRC = VOPDstOperand; + let Ins32 = (ins VSrcT_f16_F128_Deferred:$src0, ImmOpType:$imm, VGPR_32_F128:$src1); +} def VOP_MADMK_F32 : VOP_MADMK ; class getRegisterOperandForVT { @@ -411,6 +453,28 @@ } def VOP_MAC_F16 : VOP_MAC ; +def VOP_MAC_F16_T16 : VOP_MAC { + let IsTrue16 = 1; + let DstRC = VOPDstOperand; + let DstRC64 = VOPDstOperand; + let Src1RC32 = VGPRSrc_32_F128; + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT_T16.ret:$src2); + let Src0DPP = getVregSrcForVT_T16.ret; + let Src1DPP = getVregSrcForVT_T16.ret; + let Src2DPP = getVregSrcForVT_T16.ret; + let Src0ModDPP = getSrcModDPP_T16.ret; + let Src1ModDPP = getSrcModDPP_T16.ret; + let Src2ModDPP = getSrcModDPP_T16.ret; + let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + Src1ModDPP:$src1_modifiers, Src1DPP:$src1, + getVregSrcForVT_T16.ret:$src2, // stub argument + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + Src1ModDPP:$src1_modifiers, Src1DPP:$src1, + getVregSrcForVT_T16.ret:$src2, // stub argument + dpp8:$dpp8, FI:$fi); +} def VOP_MAC_F32 : VOP_MAC ; let HasExtDPP = 0, HasExt32BitDPP = 0 in def VOP_MAC_LEGACY_F32 : VOP_MAC ; @@ -752,39 +816,82 @@ def : divergent_i64_BinOp ; def : divergent_i64_BinOp ; +//===----------------------------------------------------------------------===// +// 16-Bit Operand Instructions +//===----------------------------------------------------------------------===// -let SubtargetPredicate = Has16BitInsts in { let isReMaterializable = 1 in { let FPDPRounding = 1 in { -def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; -defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; +defm V_LDEXP_F16 : VOP2Inst_T16 <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; } // End FPDPRounding = 1 - -defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; -defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; -defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; - +// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions +defm V_LSHLREV_B16 : VOP2Inst_e64_T16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; +defm V_LSHRREV_B16 : VOP2Inst_e64_T16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; +defm V_ASHRREV_I16 : VOP2Inst_e64_T16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; let isCommutable = 1 in { let FPDPRounding = 1 in { -defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, any_fadd>; -defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; -defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; -defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; +defm V_ADD_F16 : VOP2Inst_T16 <"v_add_f16", VOP_F16_F16_F16, any_fadd>; +defm V_SUB_F16 : VOP2Inst_T16 <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; +defm V_SUBREV_F16 : VOP2Inst_T16 <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; +defm V_MUL_F16 : VOP2Inst_T16 <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; +} // End FPDPRounding = 1 +defm V_MUL_LO_U16 : VOP2Inst_e64_T16 <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; +defm V_MAX_F16 : VOP2Inst_T16 <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; +defm V_MIN_F16 : VOP2Inst_T16 <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; +defm V_MAX_U16 : VOP2Inst_e64_T16 <"v_max_u16", VOP_I16_I16_I16, umax>; +defm V_MAX_I16 : VOP2Inst_e64_T16 <"v_max_i16", VOP_I16_I16_I16, smax>; +defm V_MIN_U16 : VOP2Inst_e64_T16 <"v_min_u16", VOP_I16_I16_I16, umin>; +defm V_MIN_I16 : VOP2Inst_e64_T16 <"v_min_i16", VOP_I16_I16_I16, smin>; +} // End isCommutable = 1 +} // End isReMaterializable = 1 -let mayRaiseFPException = 0 in { -def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; +let SubtargetPredicate = isGFX11Plus in { + let isCommutable = 1 in { + defm V_AND_B16_T16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16, and>; + defm V_OR_B16_T16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16, or>; + defm V_XOR_B16_T16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16, xor>; + } // End isCommutable = 1 +} // End SubtargetPredicate = isGFX11Plus + +let FPDPRounding = 1, isReMaterializable = 1 in { +let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { +def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; +} +let SubtargetPredicate = HasTrue16BitInsts in { +def V_FMAMK_F16_T16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_T16, [], "">; } -} // End FPDPRounding = 1 +let isCommutable = 1 in { +let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { +def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; +} +let SubtargetPredicate = HasTrue16BitInsts in { +def V_FMAAK_F16_T16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_T16, [], "">; +} +} // End isCommutable = 1 +} // End FPDPRounding = 1, isReMaterializable = 1 -defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; -defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; -defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; -defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>; -defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>; -defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>; -defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>; +let Constraints = "$vdst = $src2", + DisableEncoding="$src2", + isConvertibleToThreeAddress = 1, + isCommutable = 1 in { +let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { +defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; +} +let SubtargetPredicate = HasTrue16BitInsts in { +defm V_FMAC_F16_T16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_T16>; +} +} // End FMAC Constraints +let SubtargetPredicate = Has16BitInsts in { +let isReMaterializable = 1 in { +let FPDPRounding = 1 in { +def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; +} // End FPDPRounding = 1 +let isCommutable = 1 in { +let mayRaiseFPException = 0 in { +def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; +} let SubtargetPredicate = isGFX8GFX9 in { defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; @@ -800,6 +907,7 @@ } } // End SubtargetPredicate = Has16BitInsts + let SubtargetPredicate = HasDLInsts in { let isReMaterializable = 1 in @@ -840,7 +948,6 @@ isConvertibleToThreeAddress = 1, isCommutable = 1 in defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; - } // End SubtargetPredicate = HasDLInsts let SubtargetPredicate = HasFmaLegacy32 in { @@ -911,24 +1018,6 @@ def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; } -let SubtargetPredicate = isGFX10Plus in { - -let FPDPRounding = 1, isReMaterializable = 1 in { -def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; - -let isCommutable = 1 in -def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; -} // End FPDPRounding = 1, isReMaterializable = 1 - -let Constraints = "$vdst = $src2", - DisableEncoding="$src2", - isConvertibleToThreeAddress = 1, - isCommutable = 1 in { -defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; -} - -} // End SubtargetPredicate = isGFX10Plus - let SubtargetPredicate = HasPkFmacF16Inst in { defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; } // End SubtargetPredicate = HasPkFmacF16Inst @@ -1034,14 +1123,6 @@ def : VOPBinOpClampPat; } -let SubtargetPredicate = isGFX11Plus in { - let isCommutable = 1 in { - defm V_AND_B16 : VOP2Inst <"v_and_b16", VOP_I16_I16_I16, and>; - defm V_OR_B16 : VOP2Inst <"v_or_b16", VOP_I16_I16_I16, or>; - defm V_XOR_B16 : VOP2Inst <"v_xor_b16", VOP_I16_I16_I16, xor>; - } // End isCommutable = 1 -} // End SubtargetPredicate = isGFX11Plus - //===----------------------------------------------------------------------===// // DPP Encodings //===----------------------------------------------------------------------===// @@ -1108,6 +1189,15 @@ VOP2_Real(NAME), SIEncodingFamily.GFX11>, VOP2_MADKe(NAME).Pfl>; } + multiclass VOP2Only_Real_MADK_gfx11_with_name op, string asmName, + string opName = NAME> { + def _gfx11 : + VOP2_Real(opName), SIEncodingFamily.GFX11>, + VOP2_MADKe(opName).Pfl> { + VOP2_Pseudo ps = !cast(opName); + let AsmString = asmName # ps.AsmOperands; + } + } multiclass VOP2_Real_e32_gfx11 op> { def _e32_gfx11 : VOP2_Real(NAME#"_e32"), SIEncodingFamily.GFX11>, @@ -1141,8 +1231,7 @@ defvar ps = !cast(opName#"_e32"); def _e32_gfx11 : VOP2_Real, - VOP2e, - MnemonicAlias, Requires<[isGFX11Plus]> { + VOP2e { let AsmString = asmName # ps.AsmOperands; let IsSingle = single; } @@ -1152,8 +1241,7 @@ defvar ps = !cast(opName#"_e64"); def _e64_gfx11 : VOP3_Real, - VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl>, - MnemonicAlias, Requires<[isGFX11Plus]> { + VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl> { let AsmString = asmName # ps.AsmOperands; } } @@ -1274,23 +1362,33 @@ VOP2_Realtriple_e64_gfx11, VOP2_Real_NO_VOP3_gfx11; multiclass VOP2_Real_NO_VOP3_with_name_gfx11 op, string opName, - string asmName, bit isSingle = 0> : - VOP2_Real_e32_with_name_gfx11, + string asmName, bit isSingle = 0> { + + defm NAME : VOP2_Real_e32_with_name_gfx11, VOP2_Real_dpp_with_name_gfx11, VOP2_Real_dpp8_with_name_gfx11; + defvar ps = !cast(opName#"_e32"); + def _gfx11_alias : MnemonicAlias, Requires<[isGFX11Plus]>; +} multiclass VOP2_Real_FULL_with_name_gfx11 op, string opName, string asmName> : VOP2_Realtriple_e64_with_name_gfx11, VOP2_Real_NO_VOP3_with_name_gfx11; +multiclass VOP2_Real_FULL_T16_gfx11 op, string asmName, string opName = NAME> + : VOP2_Real_FULL_with_name_gfx11; + multiclass VOP2_Real_NO_DPP_gfx11 op> : VOP2_Real_e32_gfx11, VOP2_Real_e64_gfx11; multiclass VOP2_Real_NO_DPP_with_name_gfx11 op, string opName, - string asmName> : - VOP2_Real_e32_with_name_gfx11, - VOP2_Real_e64_with_name_gfx11; + string asmName> { + defm NAME : VOP2_Real_e32_with_name_gfx11, + VOP2_Real_e64_with_name_gfx11; + defvar ps = !cast(opName#"_e32"); + def _gfx11_alias : MnemonicAlias, Requires<[isGFX11Plus]>; +} defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; @@ -1314,6 +1412,17 @@ "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">; defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>; +defm V_ADD_F16_T16 : VOP2_Real_FULL_T16_gfx11<0x032, "v_add_f16">; +defm V_SUB_F16_T16 : VOP2_Real_FULL_T16_gfx11<0x033, "v_sub_f16">; +defm V_SUBREV_F16_T16 : VOP2_Real_FULL_T16_gfx11<0x034, "v_subrev_f16">; +defm V_MUL_F16_T16 : VOP2_Real_FULL_T16_gfx11<0x035, "v_mul_f16">; +defm V_FMAC_F16_T16 : VOP2_Real_FULL_T16_gfx11<0x036, "v_fmac_f16">; +defm V_LDEXP_F16_T16 : VOP2_Real_FULL_T16_gfx11<0x03b, "v_ldexp_f16">; +defm V_MAX_F16_T16 : VOP2_Real_FULL_T16_gfx11<0x039, "v_max_f16">; +defm V_MIN_F16_T16 : VOP2_Real_FULL_T16_gfx11<0x03a, "v_min_f16">; +defm V_FMAMK_F16_T16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_f16">; +defm V_FMAAK_F16_T16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">; + // VOP3 only. defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>; defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>; @@ -1604,16 +1713,16 @@ defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11<0x02b>; defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02c>; defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02d>; -defm V_ADD_F16 : VOP2_Real_gfx10_gfx11<0x032>; -defm V_SUB_F16 : VOP2_Real_gfx10_gfx11<0x033>; -defm V_SUBREV_F16 : VOP2_Real_gfx10_gfx11<0x034>; -defm V_MUL_F16 : VOP2_Real_gfx10_gfx11<0x035>; -defm V_FMAC_F16 : VOP2_Real_gfx10_gfx11<0x036>; -defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10_gfx11<0x037>; -defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10_gfx11<0x038>; -defm V_MAX_F16 : VOP2_Real_gfx10_gfx11<0x039>; -defm V_MIN_F16 : VOP2_Real_gfx10_gfx11<0x03a>; -defm V_LDEXP_F16 : VOP2_Real_gfx10_gfx11<0x03b>; +defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; +defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; +defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; +defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; +defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; +defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; +defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; +defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; +defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; +defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; let IsSingle = 1 in { defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -767,8 +767,8 @@ let HasSrc0Mods = 1; let HasSrc1Mods = 1; let HasSrc2Mods = 1; - let Src0ModDPP = FPVRegInputMods; - let Src1ModDPP = FPVRegInputMods; + let Src0ModVOP3DPP = FPVRegInputMods; + let Src1ModVOP3DPP = FPVRegInputMods; let Src2ModVOP3DPP = FP16InputMods; let InsVOP3OpSel = getInsVOP3OpSel; defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11<0x303>; defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11<0x304>; -defm V_MUL_LO_U16 : VOP3Only_Realtriple_gfx11<0x305>; +defm V_MUL_LO_U16_T16 : VOP3Only_Realtriple_T16_gfx11<0x305, "v_mul_lo_u16">; defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11<0x306>; defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11<0x307>; -defm V_MAX_U16 : VOP3Only_Realtriple_gfx11<0x309>; -defm V_MAX_I16 : VOP3Only_Realtriple_gfx11<0x30a>; -defm V_MIN_U16 : VOP3Only_Realtriple_gfx11<0x30b>; -defm V_MIN_I16 : VOP3Only_Realtriple_gfx11<0x30c>; +defm V_MAX_U16_T16 : VOP3Only_Realtriple_T16_gfx11<0x309, "v_max_u16">; +defm V_MAX_I16_T16 : VOP3Only_Realtriple_T16_gfx11<0x30a, "v_max_i16">; +defm V_MIN_U16_T16 : VOP3Only_Realtriple_T16_gfx11<0x30b, "v_min_u16">; +defm V_MIN_I16_T16 : VOP3Only_Realtriple_T16_gfx11<0x30c, "v_min_i16">; defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11<0x30d, "V_ADD_I16", "v_add_nc_i16">; defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11<0x30e, "V_SUB_I16", "v_sub_nc_i16">; defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11<0x311>; @@ -945,9 +945,9 @@ defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11<0x32d>; defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11<0x32e>; defm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11<0x32f>; -defm V_LSHLREV_B16 : VOP3Only_Realtriple_gfx11<0x338>; -defm V_LSHRREV_B16 : VOP3Only_Realtriple_gfx11<0x339>; -defm V_ASHRREV_I16 : VOP3Only_Realtriple_gfx11<0x33a>; +defm V_LSHLREV_B16_T16 : VOP3Only_Realtriple_T16_gfx11<0x338, "v_lshlrev_b16">; +defm V_LSHRREV_B16_T16 : VOP3Only_Realtriple_T16_gfx11<0x339, "v_lshrrev_b16">; +defm V_ASHRREV_I16_T16 : VOP3Only_Realtriple_T16_gfx11<0x33a, "v_ashrrev_i16">; defm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>; defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11<0x33d>; defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11<0x33e>; @@ -955,9 +955,9 @@ let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11<0x361>; // Pseudo in VOP2 } // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) -defm V_AND_B16 : VOP3Only_Realtriple_gfx11<0x362>; -defm V_OR_B16 : VOP3Only_Realtriple_gfx11<0x363>; -defm V_XOR_B16 : VOP3Only_Realtriple_gfx11<0x364>; +defm V_AND_B16_T16 : VOP3Only_Realtriple_T16_gfx11<0x362, "v_and_b16">; +defm V_OR_B16_T16 : VOP3Only_Realtriple_T16_gfx11<0x363, "v_or_b16">; +defm V_XOR_B16_T16 : VOP3Only_Realtriple_T16_gfx11<0x364, "v_xor_b16">; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -84,6 +84,20 @@ list Schedule = sched; } +multiclass VOPC_Profile_T16 sched, ValueType vt0, ValueType vt1 = vt0> { + def NAME : VOPC_Profile; + def _T16 : VOPC_Profile { + let IsTrue16 = 1; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_T16.ret; + let Src1DPP = getVregSrcForVT_T16.ret; + let Src2DPP = getVregSrcForVT_T16.ret; + let Src0ModDPP = getSrcModDPP_T16.ret; + let Src1ModDPP = getSrcModDPP_T16.ret; + let Src2ModDPP = getSrcModDPP_T16.ret; + } +} + class VOPC_NoSdst_Profile sched, ValueType vt0, ValueType vt1 = vt0> : VOPC_Profile { @@ -101,6 +115,20 @@ let EmitDst = 0; } +multiclass VOPC_NoSdst_Profile_T16 sched, ValueType vt0, ValueType vt1 = vt0> { + def NAME : VOPC_NoSdst_Profile; + def _T16 : VOPC_NoSdst_Profile { + let IsTrue16 = 1; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_T16.ret; + let Src1DPP = getVregSrcForVT_T16.ret; + let Src2DPP = getVregSrcForVT_T16.ret; + let Src0ModDPP = getSrcModDPP_T16.ret; + let Src1ModDPP = getSrcModDPP_T16.ret; + let Src2ModDPP = getSrcModDPP_T16.ret; + } +} + class VOPC_Pseudo pattern=[], bit DefVcc = 1> : InstSI<(outs), P.Ins32, "", pattern>, @@ -197,30 +225,30 @@ let SubtargetPredicate = AssemblerPredicate; } -multiclass VOPCInstAliases { +multiclass VOPCInstAliases { def : VOPCInstAlias (old_name#"_e64"), !cast(real_name#"_e32_"#Arch), !cast(old_name#"_e64").Pfl.Asm32, - real_name>; + mnemonic_from>; let WaveSizePredicate = isWave32 in { def : VOPCInstAlias (old_name#"_e64"), !cast(real_name#"_e32_"#Arch), "vcc_lo, "#!cast(old_name#"_e64").Pfl.Asm32, - real_name>; + mnemonic_from>; } let WaveSizePredicate = isWave64 in { def : VOPCInstAlias (old_name#"_e64"), !cast(real_name#"_e32_"#Arch), "vcc, "#!cast(old_name#"_e64").Pfl.Asm32, - real_name>; + mnemonic_from>; } } -multiclass VOPCXInstAliases { +multiclass VOPCXInstAliases { def : VOPCInstAlias (old_name#"_e64"), !cast(real_name#"_e32_"#Arch), !cast(old_name#"_e64").Pfl.Asm32, - real_name>; + mnemonic_from>; } class getVOPCPat64 : LetDummies { @@ -363,23 +391,29 @@ } } // End SubtargetPredicate = HasSdstCMPX -def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>; +defm VOPC_I1_F16_F16 : VOPC_Profile_T16<[Write32Bit], f16>; def VOPC_I1_F32_F32 : VOPC_Profile<[Write32Bit], f32>; def VOPC_I1_F64_F64 : VOPC_Profile<[WriteDoubleAdd], f64>; -def VOPC_I1_I16_I16 : VOPC_Profile<[Write32Bit], i16>; +defm VOPC_I1_I16_I16 : VOPC_Profile_T16<[Write32Bit], i16>; def VOPC_I1_I32_I32 : VOPC_Profile<[Write32Bit], i32>; def VOPC_I1_I64_I64 : VOPC_Profile<[Write64Bit], i64>; -def VOPC_F16_F16 : VOPC_NoSdst_Profile<[Write32Bit], f16>; +defm VOPC_F16_F16 : VOPC_NoSdst_Profile_T16<[Write32Bit], f16>; def VOPC_F32_F32 : VOPC_NoSdst_Profile<[Write32Bit], f32>; def VOPC_F64_F64 : VOPC_NoSdst_Profile<[Write64Bit], f64>; -def VOPC_I16_I16 : VOPC_NoSdst_Profile<[Write32Bit], i16>; +defm VOPC_I16_I16 : VOPC_NoSdst_Profile_T16<[Write32Bit], i16>; def VOPC_I32_I32 : VOPC_NoSdst_Profile<[Write32Bit], i32>; def VOPC_I64_I64 : VOPC_NoSdst_Profile<[Write64Bit], i64>; multiclass VOPC_F16 : - VOPC_Pseudos ; + string revOp = opName> { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPC_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _T16 : VOPC_Pseudos ; + } +} multiclass VOPC_F32 : VOPC_Pseudos ; @@ -387,8 +421,15 @@ multiclass VOPC_F64 : VOPC_Pseudos ; -multiclass VOPC_I16 : - VOPC_Pseudos ; +multiclass VOPC_I16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPC_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _T16 : VOPC_Pseudos ; + } +} multiclass VOPC_I32 : VOPC_Pseudos ; @@ -396,8 +437,14 @@ multiclass VOPC_I64 : VOPC_Pseudos ; -multiclass VOPCX_F16 : - VOPCX_Pseudos ; +multiclass VOPCX_F16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPCX_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _T16 : VOPCX_Pseudos ; + } +} multiclass VOPCX_F32 : VOPCX_Pseudos ; @@ -405,8 +452,14 @@ multiclass VOPCX_F64 : VOPCX_Pseudos ; -multiclass VOPCX_I16 : - VOPCX_Pseudos ; +multiclass VOPCX_I16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPCX_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _T16 : VOPCX_Pseudos ; + } +} multiclass VOPCX_I32 : VOPCX_Pseudos ; @@ -709,11 +762,11 @@ // Class instructions //===----------------------------------------------------------------------===// -class VOPC_Class_Profile sched, ValueType vt> : - VOPC_Profile { +class VOPC_Class_Profile sched, ValueType src0VT, ValueType src1VT = i32> : + VOPC_Profile { let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let AsmDPP16 = AsmDPP#"$fi"; - let InsDPP = (ins FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); // DPP8 forbids modifiers and can inherit from VOPC_Profile @@ -734,8 +787,22 @@ let HasOMod = 0; } -class VOPC_Class_NoSdst_Profile sched, ValueType vt> : - VOPC_Class_Profile { +multiclass VOPC_Class_Profile_T16 sched> { + def NAME : VOPC_Class_Profile; + def _T16 : VOPC_Class_Profile { + let IsTrue16 = 1; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_T16.ret; + let Src1DPP = getVregSrcForVT_T16.ret; + let Src2DPP = getVregSrcForVT_T16.ret; + let Src0ModDPP = getSrcModDPP_T16.ret; + let Src1ModDPP = getSrcModDPP_T16.ret; + let Src2ModDPP = getSrcModDPP_T16.ret; + } +} + +class VOPC_Class_NoSdst_Profile sched, ValueType src0VT, ValueType src1VT = i32> : + VOPC_Class_Profile { let Outs64 = (outs ); let OutsSDWA = (outs ); let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, @@ -747,14 +814,29 @@ let EmitDst = 0; } +multiclass VOPC_Class_NoSdst_Profile_T16 sched> { + def NAME : VOPC_Class_NoSdst_Profile; + def _T16 : VOPC_Class_NoSdst_Profile { + let IsTrue16 = 1; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_T16.ret; + let Src1DPP = getVregSrcForVT_T16.ret; + let Src2DPP = getVregSrcForVT_T16.ret; + let Src0ModDPP = getSrcModDPP_T16.ret; + let Src1ModDPP = getSrcModDPP_T16.ret; + let Src2ModDPP = getSrcModDPP_T16.ret; + } +} + class getVOPCClassPat64 { list ret = [(set i1:$sdst, (AMDGPUfp_class (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers)), - P.Src1VT:$src1))]; + i32:$src1))]; } + // Special case for class instructions which only have modifiers on // the 1st source operand. multiclass VOPC_Class_Pseudos ; +defm VOPC_I1_F16_I16 : VOPC_Class_Profile_T16<[Write32Bit]>; def VOPC_I1_F32_I32 : VOPC_Class_Profile<[Write32Bit], f32>; def VOPC_I1_F64_I32 : VOPC_Class_Profile<[WriteDoubleAdd], f64>; -def VOPC_F16_I32 : VOPC_Class_NoSdst_Profile<[Write32Bit], f16>; +defm VOPC_F16_I16 : VOPC_Class_NoSdst_Profile_T16<[Write32Bit]>; def VOPC_F32_I32 : VOPC_Class_NoSdst_Profile<[Write32Bit], f32>; def VOPC_F64_I32 : VOPC_Class_NoSdst_Profile<[Write64Bit], f64>; -multiclass VOPC_CLASS_F16 : - VOPC_Class_Pseudos ; +multiclass VOPC_CLASS_F16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPC_Class_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _T16 : VOPC_Class_Pseudos ; + } +} -multiclass VOPCX_CLASS_F16 : - VOPCX_Class_Pseudos ; +multiclass VOPCX_CLASS_F16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPCX_Class_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _T16 : VOPCX_Class_Pseudos ; + } +} multiclass VOPC_CLASS_F32 : VOPC_Class_Pseudos ; @@ -882,10 +976,8 @@ defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">; defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">; -let SubtargetPredicate = Has16BitInsts in { defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">; defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">; -} } // End ReadsModeReg = 0, mayRaiseFPException = 0 //===----------------------------------------------------------------------===// @@ -1265,7 +1357,7 @@ } multiclass VOPC_Real_with_name_gfx11 op, string OpName, - string asm_name> { + string asm_name, string pseudo_mnemonic = ""> { defvar ps32 = !cast(OpName#"_e32"); defvar ps64 = !cast(OpName#"_e64"); let DecoderNamespace = "GFX11" in { @@ -1276,11 +1368,17 @@ // the destination-less 32bit forms add it to the asmString here. VOPC_Real, VOPCe, - MnemonicAlias, Requires<[isGFX11Plus]>; + MnemonicAlias, + Requires<[isGFX11Plus]>; def _e64_gfx11 : VOP3_Real, VOP3a_gfx11<{0, op}, ps64.Pfl>, - MnemonicAlias, Requires<[isGFX11Plus]> { + MnemonicAlias, + Requires<[isGFX11Plus]> { // Encoding used for VOPC instructions encoded as VOP3 differs from // VOP3e by destination name (sdst) as VOPC doesn't have vector dst. bits<8> sdst; @@ -1288,7 +1386,7 @@ } } // End DecoderNamespace = "GFX11" - defm : VOPCInstAliases; + defm : VOPCInstAliases; foreach _ = BoolToList.ret in { defvar psDPP = !cast(OpName #"_e32" #"_dpp"); @@ -1363,9 +1461,11 @@ } } } - } + multiclass VOPC_Real_T16_gfx11 op, string asm_name, + string OpName = NAME> : VOPC_Real_with_name_gfx11; + multiclass VOPCX_Real_gfx11 op> { defvar ps32 = !cast(NAME#"_nosdst_e32"); defvar ps64 = !cast(NAME#"_nosdst_e64"); @@ -1426,20 +1526,24 @@ } multiclass VOPCX_Real_with_name_gfx11 op, string OpName, - string asm_name> { + string asm_name, string pseudo_mnemonic = ""> { defvar ps32 = !cast(OpName#"_nosdst_e32"); defvar ps64 = !cast(OpName#"_nosdst_e64"); let DecoderNamespace = "GFX11" in { def _e32_gfx11 : VOPC_Real, - MnemonicAlias, + MnemonicAlias, Requires<[isGFX11Plus]>, VOPCe { let AsmString = asm_name # "{_e32} " # ps32.AsmOperands; } def _e64_gfx11 : VOP3_Real, - MnemonicAlias, + MnemonicAlias, Requires<[isGFX11Plus]>, VOP3a_gfx11<{0, op}, ps64.Pfl> { let Inst{7-0} = ? ; // sdst @@ -1447,7 +1551,7 @@ } } // End DecoderNamespace = "GFX11" - defm : VOPCXInstAliases; + defm : VOPCXInstAliases; foreach _ = BoolToList.ret in { defvar psDPP = !cast(OpName#"_nosdst_e32"#"_dpp"); @@ -1476,26 +1580,30 @@ } } } - } + + multiclass VOPCX_Real_T16_gfx11 op, string asm_name, + string OpName = NAME> : VOPCX_Real_with_name_gfx11; + + } // End AssemblerPredicate = isGFX11Only -defm V_CMP_F_F16 : VOPC_Real_gfx11<0x000>; -defm V_CMP_LT_F16 : VOPC_Real_gfx11<0x001>; -defm V_CMP_EQ_F16 : VOPC_Real_gfx11<0x002>; -defm V_CMP_LE_F16 : VOPC_Real_gfx11<0x003>; -defm V_CMP_GT_F16 : VOPC_Real_gfx11<0x004>; -defm V_CMP_LG_F16 : VOPC_Real_gfx11<0x005>; -defm V_CMP_GE_F16 : VOPC_Real_gfx11<0x006>; -defm V_CMP_O_F16 : VOPC_Real_gfx11<0x007>; -defm V_CMP_U_F16 : VOPC_Real_gfx11<0x008>; -defm V_CMP_NGE_F16 : VOPC_Real_gfx11<0x009>; -defm V_CMP_NLG_F16 : VOPC_Real_gfx11<0x00a>; -defm V_CMP_NGT_F16 : VOPC_Real_gfx11<0x00b>; -defm V_CMP_NLE_F16 : VOPC_Real_gfx11<0x00c>; -defm V_CMP_NEQ_F16 : VOPC_Real_gfx11<0x00d>; -defm V_CMP_NLT_F16 : VOPC_Real_gfx11<0x00e>; -defm V_CMP_T_F16 : VOPC_Real_with_name_gfx11<0x00f, "V_CMP_TRU_F16", "v_cmp_t_f16">; +defm V_CMP_F_F16_T16 : VOPC_Real_T16_gfx11<0x000, "v_cmp_f_f16">; +defm V_CMP_LT_F16_T16 : VOPC_Real_T16_gfx11<0x001, "v_cmp_lt_f16">; +defm V_CMP_EQ_F16_T16 : VOPC_Real_T16_gfx11<0x002, "v_cmp_eq_f16">; +defm V_CMP_LE_F16_T16 : VOPC_Real_T16_gfx11<0x003, "v_cmp_le_f16">; +defm V_CMP_GT_F16_T16 : VOPC_Real_T16_gfx11<0x004, "v_cmp_gt_f16">; +defm V_CMP_LG_F16_T16 : VOPC_Real_T16_gfx11<0x005, "v_cmp_lg_f16">; +defm V_CMP_GE_F16_T16 : VOPC_Real_T16_gfx11<0x006, "v_cmp_ge_f16">; +defm V_CMP_O_F16_T16 : VOPC_Real_T16_gfx11<0x007, "v_cmp_o_f16">; +defm V_CMP_U_F16_T16 : VOPC_Real_T16_gfx11<0x008, "v_cmp_u_f16">; +defm V_CMP_NGE_F16_T16 : VOPC_Real_T16_gfx11<0x009, "v_cmp_nge_f16">; +defm V_CMP_NLG_F16_T16 : VOPC_Real_T16_gfx11<0x00a, "v_cmp_nlg_f16">; +defm V_CMP_NGT_F16_T16 : VOPC_Real_T16_gfx11<0x00b, "v_cmp_ngt_f16">; +defm V_CMP_NLE_F16_T16 : VOPC_Real_T16_gfx11<0x00c, "v_cmp_nle_f16">; +defm V_CMP_NEQ_F16_T16 : VOPC_Real_T16_gfx11<0x00d, "v_cmp_neq_f16">; +defm V_CMP_NLT_F16_T16 : VOPC_Real_T16_gfx11<0x00e, "v_cmp_nlt_f16">; +defm V_CMP_T_F16_T16 : VOPC_Real_with_name_gfx11<0x00f, "V_CMP_TRU_F16_T16", "v_cmp_t_f16", "v_cmp_tru_f16">; defm V_CMP_F_F32 : VOPC_Real_gfx11<0x010>; defm V_CMP_LT_F32 : VOPC_Real_gfx11<0x011>; defm V_CMP_EQ_F32 : VOPC_Real_gfx11<0x012>; @@ -1513,18 +1621,18 @@ defm V_CMP_NLT_F32 : VOPC_Real_gfx11<0x01e>; defm V_CMP_T_F32 : VOPC_Real_with_name_gfx11<0x01f, "V_CMP_TRU_F32", "v_cmp_t_f32">; defm V_CMP_T_F64 : VOPC_Real_with_name_gfx11<0x02f, "V_CMP_TRU_F64", "v_cmp_t_f64">; -defm V_CMP_LT_I16 : VOPC_Real_gfx11<0x031>; -defm V_CMP_EQ_I16 : VOPC_Real_gfx11<0x032>; -defm V_CMP_LE_I16 : VOPC_Real_gfx11<0x033>; -defm V_CMP_GT_I16 : VOPC_Real_gfx11<0x034>; -defm V_CMP_NE_I16 : VOPC_Real_gfx11<0x035>; -defm V_CMP_GE_I16 : VOPC_Real_gfx11<0x036>; -defm V_CMP_LT_U16 : VOPC_Real_gfx11<0x039>; -defm V_CMP_EQ_U16 : VOPC_Real_gfx11<0x03a>; -defm V_CMP_LE_U16 : VOPC_Real_gfx11<0x03b>; -defm V_CMP_GT_U16 : VOPC_Real_gfx11<0x03c>; -defm V_CMP_NE_U16 : VOPC_Real_gfx11<0x03d>; -defm V_CMP_GE_U16 : VOPC_Real_gfx11<0x03e>; +defm V_CMP_LT_I16_T16 : VOPC_Real_T16_gfx11<0x031, "v_cmp_lt_i16">; +defm V_CMP_EQ_I16_T16 : VOPC_Real_T16_gfx11<0x032, "v_cmp_eq_i16">; +defm V_CMP_LE_I16_T16 : VOPC_Real_T16_gfx11<0x033, "v_cmp_le_i16">; +defm V_CMP_GT_I16_T16 : VOPC_Real_T16_gfx11<0x034, "v_cmp_gt_i16">; +defm V_CMP_NE_I16_T16 : VOPC_Real_T16_gfx11<0x035, "v_cmp_ne_i16">; +defm V_CMP_GE_I16_T16 : VOPC_Real_T16_gfx11<0x036, "v_cmp_ge_i16">; +defm V_CMP_LT_U16_T16 : VOPC_Real_T16_gfx11<0x039, "v_cmp_lt_u16">; +defm V_CMP_EQ_U16_T16 : VOPC_Real_T16_gfx11<0x03a, "v_cmp_eq_u16">; +defm V_CMP_LE_U16_T16 : VOPC_Real_T16_gfx11<0x03b, "v_cmp_le_u16">; +defm V_CMP_GT_U16_T16 : VOPC_Real_T16_gfx11<0x03c, "v_cmp_gt_u16">; +defm V_CMP_NE_U16_T16 : VOPC_Real_T16_gfx11<0x03d, "v_cmp_ne_u16">; +defm V_CMP_GE_U16_T16 : VOPC_Real_T16_gfx11<0x03e, "v_cmp_ge_u16">; defm V_CMP_F_I32 : VOPC_Real_gfx11<0x040>; defm V_CMP_LT_I32 : VOPC_Real_gfx11<0x041>; defm V_CMP_EQ_I32 : VOPC_Real_gfx11<0x042>; @@ -1559,26 +1667,26 @@ defm V_CMP_GE_U64 : VOPC_Real_gfx11<0x05e>; defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>; -defm V_CMP_CLASS_F16 : VOPC_Real_gfx11<0x07d>; +defm V_CMP_CLASS_F16_T16 : VOPC_Real_T16_gfx11<0x07d, "v_cmp_class_f16">; defm V_CMP_CLASS_F32 : VOPC_Real_gfx11<0x07e>; defm V_CMP_CLASS_F64 : VOPC_Real_gfx11<0x07f>; -defm V_CMPX_F_F16 : VOPCX_Real_gfx11<0x080>; -defm V_CMPX_LT_F16 : VOPCX_Real_gfx11<0x081>; -defm V_CMPX_EQ_F16 : VOPCX_Real_gfx11<0x082>; -defm V_CMPX_LE_F16 : VOPCX_Real_gfx11<0x083>; -defm V_CMPX_GT_F16 : VOPCX_Real_gfx11<0x084>; -defm V_CMPX_LG_F16 : VOPCX_Real_gfx11<0x085>; -defm V_CMPX_GE_F16 : VOPCX_Real_gfx11<0x086>; -defm V_CMPX_O_F16 : VOPCX_Real_gfx11<0x087>; -defm V_CMPX_U_F16 : VOPCX_Real_gfx11<0x088>; -defm V_CMPX_NGE_F16 : VOPCX_Real_gfx11<0x089>; -defm V_CMPX_NLG_F16 : VOPCX_Real_gfx11<0x08a>; -defm V_CMPX_NGT_F16 : VOPCX_Real_gfx11<0x08b>; -defm V_CMPX_NLE_F16 : VOPCX_Real_gfx11<0x08c>; -defm V_CMPX_NEQ_F16 : VOPCX_Real_gfx11<0x08d>; -defm V_CMPX_NLT_F16 : VOPCX_Real_gfx11<0x08e>; -defm V_CMPX_T_F16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16", "v_cmpx_t_f16">; +defm V_CMPX_F_F16_T16 : VOPCX_Real_T16_gfx11<0x080, "v_cmpx_f_f16">; +defm V_CMPX_LT_F16_T16 : VOPCX_Real_T16_gfx11<0x081, "v_cmpx_lt_f16">; +defm V_CMPX_EQ_F16_T16 : VOPCX_Real_T16_gfx11<0x082, "v_cmpx_eq_f16">; +defm V_CMPX_LE_F16_T16 : VOPCX_Real_T16_gfx11<0x083, "v_cmpx_le_f16">; +defm V_CMPX_GT_F16_T16 : VOPCX_Real_T16_gfx11<0x084, "v_cmpx_gt_f16">; +defm V_CMPX_LG_F16_T16 : VOPCX_Real_T16_gfx11<0x085, "v_cmpx_lg_f16">; +defm V_CMPX_GE_F16_T16 : VOPCX_Real_T16_gfx11<0x086, "v_cmpx_ge_f16">; +defm V_CMPX_O_F16_T16 : VOPCX_Real_T16_gfx11<0x087, "v_cmpx_o_f16">; +defm V_CMPX_U_F16_T16 : VOPCX_Real_T16_gfx11<0x088, "v_cmpx_u_f16">; +defm V_CMPX_NGE_F16_T16 : VOPCX_Real_T16_gfx11<0x089, "v_cmpx_nge_f16">; +defm V_CMPX_NLG_F16_T16 : VOPCX_Real_T16_gfx11<0x08a, "v_cmpx_nlg_f16">; +defm V_CMPX_NGT_F16_T16 : VOPCX_Real_T16_gfx11<0x08b, "v_cmpx_ngt_f16">; +defm V_CMPX_NLE_F16_T16 : VOPCX_Real_T16_gfx11<0x08c, "v_cmpx_nle_f16">; +defm V_CMPX_NEQ_F16_T16 : VOPCX_Real_T16_gfx11<0x08d, "v_cmpx_neq_f16">; +defm V_CMPX_NLT_F16_T16 : VOPCX_Real_T16_gfx11<0x08e, "v_cmpx_nlt_f16">; +defm V_CMPX_T_F16_T16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16_T16", "v_cmpx_t_f16", "v_cmpx_tru_f16">; defm V_CMPX_F_F32 : VOPCX_Real_gfx11<0x090>; defm V_CMPX_LT_F32 : VOPCX_Real_gfx11<0x091>; defm V_CMPX_EQ_F32 : VOPCX_Real_gfx11<0x092>; @@ -1613,18 +1721,18 @@ defm V_CMPX_NLT_F64 : VOPCX_Real_gfx11<0x0ae>; defm V_CMPX_T_F64 : VOPCX_Real_with_name_gfx11<0x0af, "V_CMPX_TRU_F64", "v_cmpx_t_f64">; -defm V_CMPX_LT_I16 : VOPCX_Real_gfx11<0x0b1>; -defm V_CMPX_EQ_I16 : VOPCX_Real_gfx11<0x0b2>; -defm V_CMPX_LE_I16 : VOPCX_Real_gfx11<0x0b3>; -defm V_CMPX_GT_I16 : VOPCX_Real_gfx11<0x0b4>; -defm V_CMPX_NE_I16 : VOPCX_Real_gfx11<0x0b5>; -defm V_CMPX_GE_I16 : VOPCX_Real_gfx11<0x0b6>; -defm V_CMPX_LT_U16 : VOPCX_Real_gfx11<0x0b9>; -defm V_CMPX_EQ_U16 : VOPCX_Real_gfx11<0x0ba>; -defm V_CMPX_LE_U16 : VOPCX_Real_gfx11<0x0bb>; -defm V_CMPX_GT_U16 : VOPCX_Real_gfx11<0x0bc>; -defm V_CMPX_NE_U16 : VOPCX_Real_gfx11<0x0bd>; -defm V_CMPX_GE_U16 : VOPCX_Real_gfx11<0x0be>; +defm V_CMPX_LT_I16_T16 : VOPCX_Real_T16_gfx11<0x0b1, "v_cmpx_lt_i16">; +defm V_CMPX_EQ_I16_T16 : VOPCX_Real_T16_gfx11<0x0b2, "v_cmpx_eq_i16">; +defm V_CMPX_LE_I16_T16 : VOPCX_Real_T16_gfx11<0x0b3, "v_cmpx_le_i16">; +defm V_CMPX_GT_I16_T16 : VOPCX_Real_T16_gfx11<0x0b4, "v_cmpx_gt_i16">; +defm V_CMPX_NE_I16_T16 : VOPCX_Real_T16_gfx11<0x0b5, "v_cmpx_ne_i16">; +defm V_CMPX_GE_I16_T16 : VOPCX_Real_T16_gfx11<0x0b6, "v_cmpx_ge_i16">; +defm V_CMPX_LT_U16_T16 : VOPCX_Real_T16_gfx11<0x0b9, "v_cmpx_lt_u16">; +defm V_CMPX_EQ_U16_T16 : VOPCX_Real_T16_gfx11<0x0ba, "v_cmpx_eq_u16">; +defm V_CMPX_LE_U16_T16 : VOPCX_Real_T16_gfx11<0x0bb, "v_cmpx_le_u16">; +defm V_CMPX_GT_U16_T16 : VOPCX_Real_T16_gfx11<0x0bc, "v_cmpx_gt_u16">; +defm V_CMPX_NE_U16_T16 : VOPCX_Real_T16_gfx11<0x0bd, "v_cmpx_ne_u16">; +defm V_CMPX_GE_U16_T16 : VOPCX_Real_T16_gfx11<0x0be, "v_cmpx_ge_u16">; defm V_CMPX_F_I32 : VOPCX_Real_gfx11<0x0c0>; defm V_CMPX_LT_I32 : VOPCX_Real_gfx11<0x0c1>; defm V_CMPX_EQ_I32 : VOPCX_Real_gfx11<0x0c2>; @@ -1658,7 +1766,7 @@ defm V_CMPX_NE_U64 : VOPCX_Real_gfx11<0x0dd>; defm V_CMPX_GE_U64 : VOPCX_Real_gfx11<0x0de>; defm V_CMPX_T_U64 : VOPCX_Real_gfx11<0x0df>; -defm V_CMPX_CLASS_F16 : VOPCX_Real_gfx11<0x0fd>; +defm V_CMPX_CLASS_F16_T16 : VOPCX_Real_T16_gfx11<0x0fd, "v_cmpx_class_f16">; defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11<0x0fe>; defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11<0x0ff>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -24,6 +24,9 @@ list SchedRW; list Uses; list Defs; + list OtherPredicates; + Predicate AssemblerPredicate; + string DecoderNamespace; } class VOP { @@ -61,6 +64,8 @@ let UseNamedOperandTable = 1; string Mnemonic = opName; + Instruction Opcode = !cast(NAME); + bit IsTrue16 = P.IsTrue16; VOPProfile Pfl = P; string AsmOperands; @@ -1351,14 +1356,13 @@ foreach _ = BoolToList.ret in def _e64_gfx11 : VOP3_Real, - VOP3OpSel_gfx11, - MnemonicAlias, Requires<[isGFX11Plus]>; + VOP3OpSel_gfx11; foreach _ = BoolToList.ret in def _e64_gfx11 : VOP3_Real, - VOP3e_gfx11, - MnemonicAlias, Requires<[isGFX11Plus]>; + VOP3e_gfx11; } + def _gfx11_VOP3_alias : MnemonicAlias, Requires<[isGFX11Plus]>, LetDummies; } // for READLANE/WRITELANE multiclass VOP3_Real_No_Suffix_gfx11 op, string opName = NAME> { @@ -1461,6 +1465,10 @@ string asmName> : VOP3_Realtriple_with_name_gfx11; +multiclass VOP3Only_Realtriple_T16_gfx11 op, string asmName, + string opName = NAME> + : VOP3Only_Realtriple_with_name_gfx11; + multiclass VOP3be_Realtriple_gfx11< bits<10> op, bit isSingle = 0, string opName = NAME, string asmName = !cast(opName#"_e64").Mnemonic> : @@ -1503,3 +1511,12 @@ def VOPC64DPPTable : VOPC64Table<"DPP">; def VOPC64DPP8Table : VOPC64Table<"DPP8">; + +def VOPTrue16Table : GenericTable { + let FilterClass = "VOP_Pseudo"; + let CppTypeName = "VOPTrue16Info"; + let Fields = ["Opcode", "IsTrue16"]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getTrue16OpcodeHelper"; +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll @@ -116,7 +116,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX11-NEXT: v_fma_f16 v0, v1, v0, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fma = call half @llvm.fma.f16(half %x, half %y, half %z) ret half %fma diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -108,8 +108,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX11-NEXT: [[V_ASHRREV_I16_T16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -201,8 +201,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX11-NEXT: [[V_ASHRREV_I16_T16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -247,8 +247,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec + ; GFX11-NEXT: [[V_ASHRREV_I16_T16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_T16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -446,8 +446,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX11-NEXT: [[V_ASHRREV_I16_T16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_T16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -39,7 +39,7 @@ ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_T16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -82,7 +82,7 @@ ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_T16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir @@ -73,7 +73,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -110,7 +110,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -147,7 +147,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -184,7 +184,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -221,7 +221,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -257,7 +257,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -294,7 +294,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -331,7 +331,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -368,7 +368,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -405,7 +405,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -442,7 +442,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -479,7 +479,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -516,7 +516,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -553,7 +553,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir @@ -25,7 +25,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -56,7 +56,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_T16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %5 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir @@ -25,7 +25,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -56,7 +56,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_T16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %5 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir @@ -25,7 +25,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -56,7 +56,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_T16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %5 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir @@ -25,7 +25,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_T16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -56,7 +56,7 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_T16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %5 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir @@ -32,8 +32,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_EQ_U16_T16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_T16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_T16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 @@ -71,8 +71,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_EQ_U16_T16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_T16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -110,8 +110,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_EQ_U16_T16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_T16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -149,8 +149,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_NE_U16_T16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_T16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -188,8 +188,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]] + ; GFX11-NEXT: [[V_CMP_LT_I16_T16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_T16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -227,8 +227,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]] + ; GFX11-NEXT: [[V_CMP_LE_I16_T16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_T16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -266,8 +266,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_LT_U16_T16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_T16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -305,8 +305,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_LE_U16_T16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_T16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -106,8 +106,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHRREV_B16_T16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -199,8 +199,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHRREV_B16_T16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -245,8 +245,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec + ; GFX11-NEXT: [[V_LSHRREV_B16_T16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_T16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -444,8 +444,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHRREV_B16_T16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_T16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir @@ -90,9 +90,9 @@ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_I16_T16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_T16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -143,9 +143,9 @@ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MIN_I16_T16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_T16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -197,10 +197,10 @@ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX11-NEXT: [[V_MIN_I16_T16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_T16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_I16_T16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_T16_e64 [[V_MIN_I16_T16_e64_]], [[COPY2]], implicit $exec ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir @@ -90,9 +90,9 @@ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_U16_T16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_T16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -143,9 +143,9 @@ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MIN_U16_T16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_T16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -197,10 +197,10 @@ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX11-NEXT: [[V_MIN_U16_T16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_T16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_U16_T16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_T16_e64 [[V_MIN_U16_T16_e64_]], [[COPY2]], implicit $exec ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -106,8 +106,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHLREV_B16_T16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -199,8 +199,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHLREV_B16_T16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_T16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -245,8 +245,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec + ; GFX11-NEXT: [[V_LSHLREV_B16_T16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_T16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -444,8 +444,8 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHLREV_B16_T16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_T16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_T16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir @@ -88,8 +88,9 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: %1:vgpr_32_f128 = nofpexcept V_CVT_F16_F32_T16_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY %1 + ; GFX11-NEXT: $vgpr0 = COPY [[COPY1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_SITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -125,8 +126,9 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: %1:vgpr_32_f128 = nofpexcept V_CVT_F16_F32_T16_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY %1 + ; GFX11-NEXT: $vgpr0 = COPY [[COPY1]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_SITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir @@ -98,8 +98,9 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: %1:vgpr_32_f128 = nofpexcept V_CVT_F16_F32_T16_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY %1 + ; GFX11-NEXT: $vgpr0 = COPY [[COPY1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_UITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -135,8 +136,9 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: %1:vgpr_32_f128 = nofpexcept V_CVT_F16_F32_T16_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY %1 + ; GFX11-NEXT: $vgpr0 = COPY [[COPY1]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_UITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -51,7 +51,7 @@ define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1966091 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1966091 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] @@ -79,7 +79,7 @@ define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -91,7 +91,7 @@ define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -104,7 +104,7 @@ define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 1835018 /* regdef:VGPR_32 */, def %1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %0, 1966090 /* regdef:VGPR_32 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] @@ -121,7 +121,7 @@ define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 2949130 /* regdef:VReg_64 */, def %1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %0, 3211274 /* regdef:VReg_64 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) @@ -153,7 +153,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1966089 /* reguse:VGPR_32 */, [[COPY]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42) ret void @@ -164,7 +164,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1966089 /* reguse:SReg_32 */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2097161 /* reguse:SReg_32 */, [[COPY]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42) ret void @@ -188,7 +188,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835017 /* reguse:VGPR_32 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %1, 1966089 /* reguse:VGPR_32 */, [[COPY1]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -203,7 +203,7 @@ ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 196622 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1966090 /* regdef:VGPR_32 */, def %1, 196622 /* mem:m */, [[COPY]](p3) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -220,7 +220,7 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -232,13 +232,13 @@ define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %2 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %4, 1966089 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %4, 2097161 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -261,7 +261,7 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def %3, 1966090 /* regdef:VGPR_32 */, def %4, 1966090 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 @@ -282,10 +282,10 @@ define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll @@ -536,10 +536,10 @@ ; GFX10WGP-WAVE64: NumVgprs: 256 ; GFX10CU-WAVE32: NumVgprs: 256 ; GFX10CU-WAVE64: NumVgprs: 256 -; GFX11WGP-WAVE32: NumVgprs: 128 -; GFX11WGP-WAVE64: NumVgprs: 128 -; GFX11CU-WAVE32: NumVgprs: 128 -; GFX11CU-WAVE64: NumVgprs: 128 +; GFX11WGP-WAVE32: NumVgprs: 256 +; GFX11WGP-WAVE64: NumVgprs: 256 +; GFX11CU-WAVE32: NumVgprs: 256 +; GFX11CU-WAVE64: NumVgprs: 256 define amdgpu_kernel void @f256() #256 { call void @use256vgprs() ret void @@ -555,8 +555,8 @@ ; GFX10WGP-WAVE64: NumVgprs: 256 ; GFX10CU-WAVE32: NumVgprs: 128 ; GFX10CU-WAVE64: NumVgprs: 128 -; GFX11WGP-WAVE32: NumVgprs: 128 -; GFX11WGP-WAVE64: NumVgprs: 128 +; GFX11WGP-WAVE32: NumVgprs: 256 +; GFX11WGP-WAVE64: NumVgprs: 256 ; GFX11CU-WAVE32: NumVgprs: 128 ; GFX11CU-WAVE64: NumVgprs: 128 define amdgpu_kernel void @f512() #512 { diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir b/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir --- a/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir @@ -20,7 +20,7 @@ ; CHECK-LABEL: name: foo1 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0:vgpr_32, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1:vgpr_32 @@ -41,7 +41,7 @@ ; CHECK-LABEL: name: foo2 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1:vgpr_32, 1835018 /* regdef:VGPR_32 */, def %0:vgpr_32 @@ -62,7 +62,7 @@ ; CHECK-LABEL: name: foo3 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1:vgpr_32, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0:vgpr_32 @@ -83,7 +83,7 @@ ; CHECK-LABEL: name: foo4 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0:vgpr_32, 1835018 /* regdef:VGPR_32 */, def %1:vgpr_32 diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir @@ -0,0 +1,84 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX10 %s + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16 +# GFX10: %2:vgpr_32 = IMPLICIT_DEF +# GFX10-NOT: V_MOV_B32 +# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +--- +name: test_fmamk_reg_imm_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit $exec + %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmamk_imm_reg_f16 +# GFX10: %2:vgpr_32 = IMPLICIT_DEF +# GFX10-NOT: V_MOV_B32 +# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +--- +name: test_fmamk_imm_reg_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit $exec + %3 = V_FMAC_F16_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmaak_f16 +# GFX10: %1:vgpr_32 = IMPLICIT_DEF +# GFX10-NOT: V_MOV_B32 +# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec +--- +name: test_fmaak_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = V_MOV_B32_e32 1078523331, implicit $exec + %2 = V_FMAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit $mode, implicit $exec +... + +# GFX10-LABEL: name: test_fmaak_inline_literal_f16 +# GFX10: %1:vgpr_32 = IMPLICIT_DEF +# GFX10-NOT: V_MOV_B32 +# GFX10: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec + +--- +name: test_fmaak_inline_literal_f16 +tracksRegLiveness: true +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } +body: | + bb.0: + liveins: $vgpr0 + + %0:vgpr_32 = COPY killed $vgpr0 + + %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec + %2:vgpr_32 = V_FMAC_F16_e32 16384, killed %0, %1, implicit $mode, implicit $exec + S_ENDPGM 0 + +... + diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir @@ -0,0 +1,101 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1100 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX11 %s + +--- +name: test_fmamk_reg_imm_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } +body: | + bb.0: + + ; GFX11-LABEL: name: test_fmamk_reg_imm_f16 + ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = COPY %0.sub0 + %3 = V_MOV_B32_e32 1078523331, implicit $exec + %4 = V_FMAC_F16_T16_e64 0, killed %2, 0, %3, 0, killed %1, 0, 0, implicit $mode, implicit $exec + +... + +--- +name: test_fmamk_imm_reg_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } +body: | + bb.0: + + ; GFX11-LABEL: name: test_fmamk_imm_reg_f16 + ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = COPY %0.sub0 + %3 = V_MOV_B32_e32 1078523331, implicit $exec + %4 = V_FMAC_F16_T16_e64 0, %2, 0, killed %3, 0, killed %1, 0, 0, implicit $mode, implicit $exec + +... + +--- +name: test_fmaak_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } +body: | + bb.0: + + ; GFX11-LABEL: name: test_fmaak_f16 + ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + %0 = IMPLICIT_DEF + %1 = COPY %0.sub0 + %2 = COPY %0.sub1 + %3 = V_MOV_B32_e32 1078523331, implicit $exec + %4 = V_FMAC_F16_T16_e64 0, killed %1, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec +... + +--- +name: test_fmaak_inline_literal_f16 +tracksRegLiveness: true +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } +body: | + bb.0: + liveins: $vgpr0 + + ; GFX11-LABEL: name: test_fmaak_inline_literal_f16 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY killed $vgpr0 + + %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec + %2:vgpr_32 = V_FMAC_F16_T16_e64 0, 16384, 0, killed %0, 0, %1, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0 + +... + diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -8,15 +8,15 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -26,15 +26,15 @@ define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5832713 /* reguse:VReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6160393 /* reguse:VReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -44,15 +44,15 @@ define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:AReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:AReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:AReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4915210 /* regdef:AReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:AReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll --- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll @@ -10,10 +10,10 @@ ; REGALLOC-GFX908: bb.0 (%ir-block.0): ; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5 ; REGALLOC-GFX908-NEXT: {{ $}} - ; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef %5:agpr_32 - ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %26 + ; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef %5:agpr_32 + ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def %26 ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26 - ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def %23 + ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3211274 /* regdef:VReg_64 */, def %23 ; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]] ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) @@ -34,10 +34,10 @@ ; PEI-GFX908-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 - ; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef renamable $agpr0 - ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 + ; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef renamable $agpr0 + ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec - ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1 + ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3211274 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1 ; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; PEI-GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec @@ -57,10 +57,10 @@ ; REGALLOC-GFX90A: bb.0 (%ir-block.0): ; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5 ; REGALLOC-GFX90A-NEXT: {{ $}} - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef %5:agpr_32 - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %25 + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef %5:agpr_32 + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def %25 ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25 - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def %23 + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64_Align2 */, def %23 ; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset.cast, addrspace 4) @@ -79,10 +79,10 @@ ; PEI-GFX90A-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 - ; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef renamable $agpr0 - ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 + ; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef renamable $agpr0 + ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec - ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1 + ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1 ; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; PEI-GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll --- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll @@ -215,7 +215,7 @@ ; GFX9: v_fma_f16 [[FMA:v[0-9]+]], v0, v1, v2 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, [[FMA]] -; GFX10Plus: v_fmac_f16_e32 [[FMA:v[0-9]+]], v0, v1 +; GFX10Plus: v_fmac_f16{{_e64|_e32}} [[FMA:v[0-9]+]], v0, v1 ; GFX10Plus-NEXT: v_and_b32_e32 v0, 0xffff, [[FMA]] define i32 @zext_fma_f16(half %x, half %y, half %z) { %fma = call half @llvm.fma.f16(half %x, half %y, half %z) diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir rename from llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir rename to llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir --- a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX10 +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX11 --- name: mad_cvv_f32 @@ -10,6 +11,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_MADMK_F32_:%[0-9]+]]:vgpr_32 = V_MADMK_F32 [[DEF]], 1092616192, [[DEF1]], implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F32_]] + ; GFX11-LABEL: name: mad_cvv_f32 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_MADMK_F32_:%[0-9]+]]:vgpr_32 = V_MADMK_F32 [[DEF]], 1092616192, [[DEF1]], implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_MADMK_F32_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F32_e64 0, 1092616192, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -25,6 +31,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_MADMK_F32_:%[0-9]+]]:vgpr_32 = V_MADMK_F32 [[DEF]], 1092616192, [[DEF1]], implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F32_]] + ; GFX11-LABEL: name: mad_vcv_f32 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_MADMK_F32_:%[0-9]+]]:vgpr_32 = V_MADMK_F32 [[DEF]], 1092616192, [[DEF1]], implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_MADMK_F32_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F32_e64 0, %0, 0, 1092616192, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -40,6 +51,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_MADAK_F32_:%[0-9]+]]:vgpr_32 = V_MADAK_F32 [[DEF]], [[DEF1]], 1092616192, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F32_]] + ; GFX11-LABEL: name: mad_vvc_f32 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_MADAK_F32_:%[0-9]+]]:vgpr_32 = V_MADAK_F32 [[DEF]], [[DEF1]], 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_MADAK_F32_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F32_e64 0, %0, 0, %1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec @@ -55,6 +71,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_MADAK_F32_:%[0-9]+]]:vgpr_32 = V_MADAK_F32 [[DEF1]], [[DEF]], 1092616192, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F32_]] + ; GFX11-LABEL: name: mad_vsc_f32 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_MADAK_F32_:%[0-9]+]]:vgpr_32 = V_MADAK_F32 [[DEF1]], [[DEF]], 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_MADAK_F32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F32_e64 0, %0, 0, %1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec @@ -70,6 +91,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = V_FMAMK_F32 [[DEF]], 1092616192, [[DEF1]], implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F32_]] + ; GFX11-LABEL: name: fma_cvv_f32 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = V_FMAMK_F32 [[DEF]], 1092616192, [[DEF1]], implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_FMAMK_F32_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F32_e64 0, 1092616192, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -85,6 +111,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = V_FMAMK_F32 [[DEF]], 1092616192, [[DEF1]], implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F32_]] + ; GFX11-LABEL: name: fma_vcv_f32 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = V_FMAMK_F32 [[DEF]], 1092616192, [[DEF1]], implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_FMAMK_F32_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F32_e64 0, %0, 0, 1092616192, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -100,6 +131,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = V_FMAAK_F32 [[DEF]], [[DEF1]], 1092616192, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F32_]] + ; GFX11-LABEL: name: fma_vvc_f32 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = V_FMAAK_F32 [[DEF]], [[DEF1]], 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_FMAAK_F32_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F32_e64 0, %0, 0, %1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec @@ -115,6 +151,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = V_FMAAK_F32 [[DEF1]], [[DEF]], 1092616192, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F32_]] + ; GFX11-LABEL: name: fma_vsc_f32 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = V_FMAAK_F32 [[DEF1]], [[DEF]], 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_FMAAK_F32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F32_e64 0, %0, 0, %1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec @@ -130,6 +171,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]] + ; GFX11-LABEL: name: mad_cvv_f16 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_MADMK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -145,6 +191,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]] + ; GFX11-LABEL: name: mad_vcv_f16 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_MADMK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -160,6 +211,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]] + ; GFX11-LABEL: name: mad_vvc_f16 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_MADAK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -175,6 +231,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]] + ; GFX11-LABEL: name: mad_vsc_f16 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_MADAK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -190,6 +251,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]] + ; GFX11-LABEL: name: fma_cvv_f16 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_FMA_F16_gfx9_e64_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -205,6 +271,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]] + ; GFX11-LABEL: name: fma_vcv_f16 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_FMA_F16_gfx9_e64_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -220,6 +291,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]] + ; GFX11-LABEL: name: fma_vvc_f16 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_FMA_F16_gfx9_e64_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -235,6 +311,11 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]] + ; GFX11-LABEL: name: fma_vsc_f16 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit [[V_FMA_F16_gfx9_e64_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll --- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll @@ -12,14 +12,14 @@ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %22.sub0 + ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def undef %22.sub0 ; GCN-NEXT: undef %24.sub0:av_64 = COPY %22.sub0 ; GCN-NEXT: SI_SPILL_AV64_SAVE %24, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]] ; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %16:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: undef %23.sub0:vreg_64 = COPY [[SI_SPILL_AV64_RESTORE]].sub0 - ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2949129 /* reguse:VReg_64 */, %23 + ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3211273 /* reguse:VReg_64 */, %23 ; GCN-NEXT: S_ENDPGM 0 %v0 = call i32 asm sideeffect "; def $0", "=v"() %tmp = insertelement <2 x i32> undef, i32 %v0, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll @@ -22,7 +22,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX11-NEXT: v_fma_f16 v0, v1, v0, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") ret half %val @@ -98,7 +98,7 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX11-NEXT: v_fma_f16 v1, v1, v3, v5 +; GFX11-NEXT: v_fma_f16 v1, v3, v1, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") ret <3 x half> %val @@ -173,10 +173,10 @@ ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v4 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v0 -; GFX11-NEXT: v_fmac_f16_e32 v4, v0, v2 -; GFX11-NEXT: v_fmac_f16_e32 v5, v1, v3 -; GFX11-NEXT: v_fmac_f16_e32 v6, v8, v7 -; GFX11-NEXT: v_fmac_f16_e32 v9, v11, v10 +; GFX11-NEXT: v_fmac_f16_e64 v4, v0, v2 +; GFX11-NEXT: v_fmac_f16_e64 v5, v1, v3 +; GFX11-NEXT: v_fmac_f16_e64 v6, v8, v7 +; GFX11-NEXT: v_fmac_f16_e64 v9, v11, v10 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v4 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v5 ; GFX11-NEXT: v_lshl_or_b32 v0, v9, 16, v0 diff --git a/llvm/test/CodeGen/AMDGPU/true16-ra-f128-fail.mir b/llvm/test/CodeGen/AMDGPU/true16-ra-f128-fail.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/true16-ra-f128-fail.mir @@ -0,0 +1,34 @@ +# RUN: not llc -march=amdgcn -mcpu=gfx1100 -debug-only=regalloc -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck --check-prefixes=CHECK %s +# REQUIRES: asserts + +--- | + define amdgpu_ps void @e32() { + ret void + } +... + + +--- +name: e32 +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127 + + ; CHECK: error: ran out of registers during register allocation + ; CHECK: [[REG1:vgpr[0-9]+]] = V_ADD_F16_T16_e32 + ; CHECK: SI_SPILL_V32_SAVE $[[REG1]] + %0:vgpr_32_f128 = V_ADD_F16_T16_e32 $vgpr0, $vgpr1, implicit $exec, implicit $mode + S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 + S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 + S_NOP 0, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 + S_NOP 0, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95 + S_NOP 0, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111 + S_NOP 0, implicit $vgpr112, implicit $vgpr113, implicit $vgpr114, implicit $vgpr115, implicit $vgpr116, implicit $vgpr117, implicit $vgpr118, implicit $vgpr119, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 + S_ENDPGM 0, implicit %0 +... + diff --git a/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir b/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir @@ -0,0 +1,55 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s + +--- | + define amdgpu_ps void @e32() #0 { + ret void + } + + define amdgpu_ps void @e64() #0 { + ret void + } + +... + + +--- +name: e32 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127 + + ; GCN-LABEL: name: e32 + ; GCN: renamable $vgpr128 = V_ADD_F16_e32 $vgpr0, $vgpr1, implicit $exec, implicit $mode + %0:vgpr_32 = V_ADD_F16_e32 $vgpr0, $vgpr1, implicit $exec, implicit $mode + S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 + S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 + S_NOP 0, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 + S_NOP 0, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95 + S_NOP 0, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111 + S_NOP 0, implicit $vgpr112, implicit $vgpr113, implicit $vgpr114, implicit $vgpr115, implicit $vgpr116, implicit $vgpr117, implicit $vgpr118, implicit $vgpr119, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 + S_ENDPGM 0, implicit %0 +... + +--- +name: e64 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127 + + ; GCN-LABEL: name: e64 + ; GCN: renamable $vgpr128 = V_ADD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec, implicit $mode + %0:vgpr_32 = V_ADD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec, implicit $mode + S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 + S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 + S_NOP 0, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 + S_NOP 0, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95 + S_NOP 0, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111 + S_NOP 0, implicit $vgpr112, implicit $vgpr113, implicit $vgpr114, implicit $vgpr115, implicit $vgpr116, implicit $vgpr117, implicit $vgpr118, implicit $vgpr119, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 + S_ENDPGM 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir --- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir @@ -1,5 +1,5 @@ -# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=gfx1100 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GCN %s # GCN-LABEL: name: test_fmamk_reg_imm_f32 # GCN: %2:vgpr_32 = IMPLICIT_DEF @@ -62,65 +62,6 @@ ... -# GCN-LABEL: name: test_fmamk_reg_imm_f16 -# GCN: %2:vgpr_32 = IMPLICIT_DEF -# GCN-NOT: V_MOV_B32 -# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec ---- -name: test_fmamk_reg_imm_f16 -registers: - - { id: 0, class: vreg_64 } - - { id: 1, class: vgpr_32 } - - { id: 2, class: vgpr_32 } - - { id: 3, class: vgpr_32 } -body: | - bb.0: - - %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 - %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec - -... - -# GCN-LABEL: name: test_fmamk_imm_reg_f16 -# GCN: %2:vgpr_32 = IMPLICIT_DEF -# GCN-NOT: V_MOV_B32 -# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec ---- -name: test_fmamk_imm_reg_f16 -registers: - - { id: 0, class: vreg_64 } - - { id: 1, class: vgpr_32 } - - { id: 2, class: vgpr_32 } - - { id: 3, class: vgpr_32 } -body: | - bb.0: - - %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 - %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_FMAC_F16_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec - -... - -# GCN-LABEL: name: test_fmaak_f16 -# GCN: %1:vgpr_32 = IMPLICIT_DEF -# GCN-NOT: V_MOV_B32 -# GCN: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec ---- -name: test_fmaak_f16 -registers: - - { id: 0, class: vreg_64 } - - { id: 1, class: vgpr_32 } - - { id: 2, class: vgpr_32 } -body: | - bb.0: - - %0 = IMPLICIT_DEF - %1 = V_MOV_B32_e32 1078523331, implicit $exec - %2 = V_FMAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit $mode, implicit $exec -... # GCN-LABEL: name: test_fmaak_sgpr_src0_f32 # GCN: %1:vgpr_32 = IMPLICIT_DEF @@ -207,27 +148,6 @@ ... -# GCN-LABEL: name: test_fmaak_inline_literal_f16 -# GCN: %1:vgpr_32 = IMPLICIT_DEF -# GCN-NOT: V_MOV_B32 -# GCN: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec - ---- -name: test_fmaak_inline_literal_f16 -tracksRegLiveness: true -liveins: - - { reg: '$vgpr0', virtual-reg: '%0' } -body: | - bb.0: - liveins: $vgpr0 - - %0:vgpr_32 = COPY killed $vgpr0 - - %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec - %2:vgpr_32 = V_FMAC_F16_e32 16384, killed %0, %1, implicit $mode, implicit $exec - S_ENDPGM 0 - -... # GCN-LABEL: name: test_fmamk_reg_imm_f32_2_folds # GCN: %2:vgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir --- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir @@ -18,15 +18,17 @@ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: V_CMPX_EQ_I16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN-NEXT: V_CMP_CLASS_F16_e32_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[V_CMP_GE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec + ; GCN-NEXT: V_CMPX_EQ_I16_T16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_CLASS_F16_T16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_T16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_GE_F16_T16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_T16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec ; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec - ; GCN-NEXT: V_CMP_NGE_F16_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN-NEXT: [[V_CMP_NGE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F16_e64_dpp]], 10101, implicit-def $scc + ; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_NGE_F16_T16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_T16_e64 0, [[V_CMP_NGE_F16_T16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc ; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 @@ -38,33 +40,37 @@ ; unsafe to combine cmpx %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - V_CMPX_EQ_I16_e32 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec + V_CMPX_EQ_I16_T16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - V_CMP_CLASS_F16_e32 %6, %0, implicit-def $vcc, implicit $mode, implicit $exec + %7:sgpr_32 = V_CMP_CLASS_F16_T16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec - %7:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - %8:sgpr_32 = V_CMP_GE_F16_e64 1, %7, 0, %0, 1, implicit $mode, implicit $exec + %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %9:sgpr_32 = V_CMP_GE_F16_T16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec ; unsafe to combine cmpx - %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - V_CMPX_GT_U32_nosdst_e64 %9, %0, implicit-def $exec, implicit $mode, implicit $exec + %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec %12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec ; shrink %13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - %14:sgpr_32 = V_CMP_NGE_F16_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec + %14:sgpr_32 = V_CMP_NGE_F32_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec - ; do not shrink, sdst used + ; do not shrink True16 instructions %15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - %16:sgpr_32 = V_CMP_NGE_F16_e64 0, %15, 0, %0, 0, implicit $mode, implicit $exec - %17:sgpr_32 = S_AND_B32 %16, 10101, implicit-def $scc + %16:sgpr_32 = V_CMP_NGE_F16_T16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec + + ; do not shrink, sdst used + %17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %18:sgpr_32 = V_CMP_NGE_F32_e64 0, %17, 0, %0, 0, implicit $mode, implicit $exec + %19:sgpr_32 = S_AND_B32 %18, 10101, implicit-def $scc ; commute - %18:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - V_CMP_LT_I32_e32 %0, %18, implicit-def $vcc, implicit $exec + %20:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + V_CMP_LT_I32_e32 %0, %20, implicit-def $vcc, implicit $exec ... --- @@ -83,9 +89,9 @@ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec - ; GCN-NEXT: V_CMP_CLASS_F16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_CLASS_F16_T16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_T16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec - ; GCN-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = IMPLICIT_DEF @@ -94,9 +100,9 @@ ; Do not combine VOPC when row_mask or bank_mask is not 0xf ; All cases are covered by generic rules for creating DPP instructions %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec - V_CMP_CLASS_F16_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec + %99:sgpr_32 = V_CMP_CLASS_F16_T16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec - %6:sgpr_32 = V_CMP_GE_F16_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec + %6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec ... diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s @@ -0,0 +1,498 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s + +v_ceil_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_ceil_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_ceil_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_f32_e32 v128, 0xaf123456 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_f32_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_f32_e32 v255, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_i16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_i16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_i16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_u16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_u16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_u16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f32_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_i16_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_i16_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_i16_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_i32_i16_e32 v5, v199 +// GFX11: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_norm_i16_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_norm_i16_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_norm_u16_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_norm_u16_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_norm_u16_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_u16_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_u16_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_u16_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_u32_u16_e32 v5, v199 +// GFX11: error: invalid operand for instruction + +v_exp_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_exp_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_exp_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_fract_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_fract_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_fract_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_exp_i16_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_exp_i16_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_exp_i16_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_mant_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_mant_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_mant_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_log_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_log_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_log_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_not_b16_e32 v128, 0xfe0b +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v255, v1 +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v5, v199 +// GFX11: error: invalid operand for instruction + +v_rcp_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_rcp_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_rcp_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_rndne_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_rndne_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_rndne_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_rsq_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_rsq_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_rsq_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_sin_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_sin_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_sin_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_sqrt_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_sqrt_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_sqrt_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_trunc_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_trunc_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_trunc_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_ceil_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_ceil_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cos_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cos_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128, 0xaf123456 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i32_i16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_exp_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_exp_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_floor_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_floor_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_fract_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_fract_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_log_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_log_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rcp_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rcp_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rndne_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rndne_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rsq_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rsq_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sin_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sin_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sqrt_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sqrt_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_trunc_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_trunc_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_ceil_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_ceil_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cos_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cos_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128, 0xaf123456 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i32_i16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_exp_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_exp_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_floor_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_floor_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_fract_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_fract_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_log_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_log_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rcp_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rcp_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rndne_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rndne_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rsq_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rsq_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sin_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sin_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sqrt_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sqrt_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_trunc_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_trunc_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s @@ -0,0 +1,1473 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s + +v_ceil_f16 v128, 0xfe0b +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, -1 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, 0.5 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, exec_hi +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, exec_lo +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, m0 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, null +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, s1 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, s105 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, src_scc +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, ttmp15 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, v1 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, v127 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, vcc_hi +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, vcc_lo +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v5, v199 +// GFX11: v_ceil_f16_e64 + +v_cos_f16 v128, 0xfe0b +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, -1 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, 0.5 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, exec_hi +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, exec_lo +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, m0 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, null +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, s1 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, s105 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, src_scc +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, ttmp15 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, v1 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, v127 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, vcc_hi +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, vcc_lo +// GFX11: v_cos_f16_e64 + +v_cos_f16 v5, v199 +// GFX11: v_cos_f16_e64 + +v_cvt_f16_f32 v128, 0xaf123456 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, -1 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, 0.5 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, exec_hi +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, exec_lo +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, m0 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, null +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, s1 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, s105 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, src_scc +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, ttmp15 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, v1 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, v255 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, vcc_hi +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, vcc_lo +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_i16 v128, 0xfe0b +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, -1 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, 0.5 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, exec_hi +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, exec_lo +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, m0 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, null +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, s1 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, s105 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, src_scc +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, ttmp15 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, v1 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, v127 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, vcc_hi +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, vcc_lo +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v5, v199 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_u16 v128, 0xfe0b +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, -1 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, 0.5 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, exec_hi +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, exec_lo +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, m0 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, null +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, s1 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, s105 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, src_scc +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, ttmp15 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, v1 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, v127 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, vcc_hi +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, vcc_lo +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v5, v199 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f32_f16 v5, v199 +// GFX11: v_cvt_f32_f16_e64 + +v_cvt_i16_f16 v128, 0xfe0b +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, -1 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, 0.5 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, exec_hi +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, exec_lo +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, m0 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, null +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, s1 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, s105 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, src_scc +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, ttmp15 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, v1 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, v127 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, vcc_hi +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, vcc_lo +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v5, v199 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i32_i16 v5, v199 +// GFX11: v_cvt_i32_i16_e64 + +v_cvt_norm_i16_f16 v128, 0xfe0b +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, -1 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, 0.5 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, exec_hi +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, exec_lo +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, m0 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, null +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, s1 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, s105 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, src_scc +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, ttmp15 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, v1 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, v127 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, vcc_hi +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, vcc_lo +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v5, v199 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_u16_f16 v128, 0xfe0b +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, -1 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, 0.5 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, exec_hi +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, exec_lo +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, m0 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, null +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, s1 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, s105 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, src_scc +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, ttmp15 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, v1 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, v127 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, vcc_hi +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, vcc_lo +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v5, v199 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_u16_f16 v128, 0xfe0b +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, -1 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, 0.5 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, exec_hi +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, exec_lo +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, m0 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, null +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, s1 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, s105 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, src_scc +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, ttmp15 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, v1 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, v127 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, vcc_hi +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, vcc_lo +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v5, v199 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u32_u16 v5, v199 +// GFX11: v_cvt_u32_u16_e64 + +v_exp_f16 v128, 0xfe0b +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, -1 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, 0.5 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, exec_hi +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, exec_lo +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, m0 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, null +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, s1 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, s105 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, src_scc +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, ttmp15 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, v1 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, v127 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, vcc_hi +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, vcc_lo +// GFX11: v_exp_f16_e64 + +v_exp_f16 v5, v199 +// GFX11: v_exp_f16_e64 + +v_floor_f16 v128, 0xfe0b +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, -1 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, 0.5 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, exec_hi +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, exec_lo +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, m0 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, null +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, s1 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, s105 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, src_scc +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, ttmp15 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, v1 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, v127 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, vcc_hi +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, vcc_lo +// GFX11: v_floor_f16_e64 + +v_floor_f16 v5, v199 +// GFX11: v_floor_f16_e64 + +v_fract_f16 v128, 0xfe0b +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, -1 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, 0.5 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, exec_hi +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, exec_lo +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, m0 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, null +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, s1 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, s105 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, src_scc +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, ttmp15 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, v1 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, v127 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, vcc_hi +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, vcc_lo +// GFX11: v_fract_f16_e64 + +v_fract_f16 v5, v199 +// GFX11: v_fract_f16_e64 + +v_frexp_exp_i16_f16 v128, 0xfe0b +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, -1 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, 0.5 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, exec_hi +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, exec_lo +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, m0 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, null +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, s1 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, s105 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, src_scc +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, ttmp15 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, v1 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, v127 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, vcc_hi +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, vcc_lo +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v5, v199 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_mant_f16 v128, 0xfe0b +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, -1 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, 0.5 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, exec_hi +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, exec_lo +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, m0 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, null +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, s1 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, s105 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, src_scc +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, ttmp15 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, v1 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, v127 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, vcc_hi +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, vcc_lo +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v5, v199 +// GFX11: v_frexp_mant_f16_e64 + +v_log_f16 v128, 0xfe0b +// GFX11: v_log_f16_e64 + +v_log_f16 v255, -1 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, 0.5 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, exec_hi +// GFX11: v_log_f16_e64 + +v_log_f16 v255, exec_lo +// GFX11: v_log_f16_e64 + +v_log_f16 v255, m0 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, null +// GFX11: v_log_f16_e64 + +v_log_f16 v255, s1 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, s105 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, src_scc +// GFX11: v_log_f16_e64 + +v_log_f16 v255, ttmp15 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, v1 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, v127 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, vcc_hi +// GFX11: v_log_f16_e64 + +v_log_f16 v255, vcc_lo +// GFX11: v_log_f16_e64 + +v_log_f16 v5, v199 +// GFX11: v_log_f16_e64 + +v_not_b16 v128, 0xfe0b +// GFX11: v_not_b16_e64 + +v_not_b16 v255, -1 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, 0.5 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, exec_hi +// GFX11: v_not_b16_e64 + +v_not_b16 v255, exec_lo +// GFX11: v_not_b16_e64 + +v_not_b16 v255, m0 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, null +// GFX11: v_not_b16_e64 + +v_not_b16 v255, s1 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, s105 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, src_scc +// GFX11: v_not_b16_e64 + +v_not_b16 v255, ttmp15 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, v1 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, v127 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, vcc_hi +// GFX11: v_not_b16_e64 + +v_not_b16 v255, vcc_lo +// GFX11: v_not_b16_e64 + +v_not_b16 v5, v199 +// GFX11: v_not_b16_e64 + +v_rcp_f16 v128, 0xfe0b +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, -1 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, 0.5 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, exec_hi +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, exec_lo +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, m0 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, null +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, s1 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, s105 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, src_scc +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, ttmp15 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, v1 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, v127 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, vcc_hi +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, vcc_lo +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v5, v199 +// GFX11: v_rcp_f16_e64 + +v_rndne_f16 v128, 0xfe0b +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, -1 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, 0.5 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, exec_hi +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, exec_lo +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, m0 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, null +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, s1 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, s105 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, src_scc +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, ttmp15 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, v1 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, v127 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, vcc_hi +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, vcc_lo +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v5, v199 +// GFX11: v_rndne_f16_e64 + +v_rsq_f16 v128, 0xfe0b +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, -1 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, 0.5 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, exec_hi +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, exec_lo +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, m0 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, null +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, s1 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, s105 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, src_scc +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, ttmp15 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, v1 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, v127 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, vcc_hi +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, vcc_lo +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v5, v199 +// GFX11: v_rsq_f16_e64 + +v_sin_f16 v128, 0xfe0b +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, -1 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, 0.5 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, exec_hi +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, exec_lo +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, m0 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, null +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, s1 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, s105 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, src_scc +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, ttmp15 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, v1 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, v127 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, vcc_hi +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, vcc_lo +// GFX11: v_sin_f16_e64 + +v_sin_f16 v5, v199 +// GFX11: v_sin_f16_e64 + +v_sqrt_f16 v128, 0xfe0b +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, -1 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, 0.5 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, exec_hi +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, exec_lo +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, m0 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, null +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, s1 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, s105 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, src_scc +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, ttmp15 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, v1 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, v127 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, vcc_hi +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, vcc_lo +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v5, v199 +// GFX11: v_sqrt_f16_e64 + +v_trunc_f16 v128, 0xfe0b +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, -1 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, 0.5 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, exec_hi +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, exec_lo +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, m0 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, null +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, s1 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, s105 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, src_scc +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, ttmp15 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, v1 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, v127 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, vcc_hi +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, vcc_lo +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v5, v199 +// GFX11: v_trunc_f16_e64 + +v_ceil_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_cos_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cos_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cvt_f16_f32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, v255 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_i16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_u16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f32_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f32_f16_e64 + +v_cvt_i16_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i32_i16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_i32_i16_e64 + +v_cvt_norm_i16_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_u16_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_u16_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u32_u16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_u32_u16_e64 + +v_exp_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_exp_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_floor_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_floor_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_fract_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_fract_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_frexp_exp_i16_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_mant_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_log_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_log_f16_e64 + +v_log_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_log_f16_e64 + +v_log_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_log_f16_e64 + +v_not_b16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_not_b16_e64 + +v_not_b16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_not_b16_e64 + +v_not_b16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_not_b16_e64 + +v_rcp_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rndne_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rsq_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_sin_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sin_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sqrt_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_trunc_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_trunc_f16_e64 + +v_ceil_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_cos_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cos_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cvt_f16_f32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_i16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_u16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f32_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f32_f16_e64 + +v_cvt_i16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i32_i16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_i32_i16_e64 + +v_cvt_norm_i16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_u16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_u16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u32_u16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u32_u16_e64 + +v_exp_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_exp_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_floor_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_floor_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_fract_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_fract_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_frexp_exp_i16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_mant_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_log_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_log_f16_e64 + +v_log_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_log_f16_e64 + +v_log_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_log_f16_e64 + +v_not_b16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_not_b16_e64 + +v_not_b16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_not_b16_e64 + +v_not_b16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_not_b16_e64 + +v_rcp_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rndne_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rsq_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_sin_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sin_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sqrt_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_trunc_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_trunc_f16_e64 + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s @@ -0,0 +1,228 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s + +v_add_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmaak_f16_e32 v255, v1, v2, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmamk_f16_e32 v255, v1, 0xfe0b, v3 +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmaak_f16_e32 v5, v255, v2, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmamk_f16_e32 v5, v255, 0xfe0b, v3 +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmaak_f16_e32 v5, v1, v255, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmamk_f16_e32 v5, v1, 0xfe0b, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s @@ -0,0 +1,192 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s + +v_add_f16 v255, v1, v2 +// GFX11: v_add_f16_e64 + +v_fmac_f16 v255, v1, v2 +// GFX11: v_fmac_f16_e64 + +v_ldexp_f16 v255, v1, v2 +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v255, v1, v2 +// GFX11: v_max_f16_e64 + +v_min_f16 v255, v1, v2 +// GFX11: v_min_f16_e64 + +v_mul_f16 v255, v1, v2 +// GFX11: v_mul_f16_e64 + +v_sub_f16 v255, v1, v2 +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v255, v1, v2 +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v255, v2 +// GFX11: v_add_f16_e64 + +v_fmac_f16 v5, v255, v2 +// GFX11: v_fmac_f16_e64 + +v_ldexp_f16 v5, v255, v2 +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v5, v255, v2 +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v255, v2 +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v255, v2 +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v255, v2 +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v255, v2 +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v1, v255 +// GFX11: v_add_f16_e64 + +v_fmac_f16 v5, v1, v255 +// GFX11: v_fmac_f16_e64 + +v_max_f16 v5, v1, v255 +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v1, v255 +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v1, v255 +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v1, v255 +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v1, v255 +// GFX11: v_subrev_f16_e64 + +v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64 + +v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64 + +v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64 + +v_max_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64 + +v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64 + +v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64 + +v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64 + +v_max_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_subrev_f16_e64 + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s @@ -0,0 +1,1973 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s + +v_cmp_class_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s @@ -0,0 +1,1973 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s + +v_cmp_class_f16 vcc, v1, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc, v127, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v1, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc, v127, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v1, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc, v127, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v1, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc, v127, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v1, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc, v127, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v1, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc, v127, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v1, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc, v127, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v1, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc, v127, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v1, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc, v127, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v1, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc, v127, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v1, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc, v127, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v1, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc, v127, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v1, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc, v127, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v1, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc, v127, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v1, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc, v127, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v1, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc, v127, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v1, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc, v127, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v1, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc, v127, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v1, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc, v127, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v1, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc, v127, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v1, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc, v127, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v1, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc, v127, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v1, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc, v127, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v1, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc, v127, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v1, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc, v127, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v1, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc, v127, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v1, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc, v127, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v1, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc, v127, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v1, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v127, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v1, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc, v127, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_class_f16 vcc, v128, v2 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v128, v2 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v128, v2 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v128, v2 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v128, v2 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v128, v2 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v128, v2 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v128, v2 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v128, v2 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v128, v2 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v128, v2 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v128, v2 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v128, v2 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v128, v2 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v128, v2 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v128, v2 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v128, v2 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v128, v2 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v128, v2 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v128, v2 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v128, v2 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v128, v2 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v128, v2 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v128, v2 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v128, v2 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v128, v2 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v128, v2 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v128, v2 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v128, v2 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v128, v2 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s @@ -0,0 +1,542 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s + +v_cmpx_class_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_f_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lg_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ne_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ne_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_neq_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nge_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ngt_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nle_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nlg_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nlt_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_o_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_t_f16_e32 v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_class_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_f_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lg_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ne_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ne_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_neq_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nge_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ngt_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nle_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nlg_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nlt_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_o_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_t_f16_e32 v255, v2 +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v255, v2 +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_class_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_f_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lg_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_neq_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nge_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ngt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nle_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlg_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_o_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_t_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_class_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_f_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_neq_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nge_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ngt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nle_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlg_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_o_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_t_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_class_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_f_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_neq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ngt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nle_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_o_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_t_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_class_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_f_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_neq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ngt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nle_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_o_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_t_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s @@ -0,0 +1,542 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s + +v_cmpx_class_f16 v1, v255 +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v1, v255 +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v1, v255 +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v1, v255 +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v1, v255 +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v1, v255 +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v1, v255 +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v1, v255 +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v1, v255 +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v1, v255 +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v1, v255 +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v1, v255 +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v1, v255 +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v1, v255 +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v1, v255 +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v1, v255 +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v1, v255 +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v1, v255 +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v1, v255 +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v1, v255 +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v1, v255 +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v1, v255 +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v1, v255 +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v1, v255 +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v1, v255 +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v1, v255 +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v1, v255 +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v1, v255 +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v1, v255 +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v1, v255 +// GFX11: v_cmpx_u_f16_e64 + +v_cmpx_class_f16 v255, v2 +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v255, v2 +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v255, v2 +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v255, v2 +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v255, v2 +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v255, v2 +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v255, v2 +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v255, v2 +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v255, v2 +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v255, v2 +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v255, v2 +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v255, v2 +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v255, v2 +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v255, v2 +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v255, v2 +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v255, v2 +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v255, v2 +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v255, v2 +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v255, v2 +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v255, v2 +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v255, v2 +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v255, v2 +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v255, v2 +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v255, v2 +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v255, v2 +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v255, v2 +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v255, v2 +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v255, v2 +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v255, v2 +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v255, v2 +// GFX11: v_cmpx_u_f16_e64 + +v_cmpx_class_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_u_f16_e64 + +v_cmpx_class_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_u_f16_e64 + +v_cmpx_class_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_u_f16_e64 + +v_cmpx_class_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_u_f16_e64 +