Index: lib/Target/AMDGPU/AMDGPURegisterInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterInfo.td +++ lib/Target/AMDGPU/AMDGPURegisterInfo.td @@ -12,7 +12,7 @@ let Namespace = "AMDGPU" in { -foreach Index = 0-15 in { +foreach Index = 0-31 in { def sub#Index : SubRegIndex<32, !shl(Index, 5)>; } Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -69,7 +69,7 @@ class AMDGPUAsmParser; -enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; +enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; //===----------------------------------------------------------------------===// // Operand @@ -179,8 +179,11 @@ ImmTyNegHi, ImmTySwizzle, ImmTyGprIdxMode, + ImmTyHigh, + ImmTyBLGP, + ImmTyCBSZ, + ImmTyABID, ImmTyEndpgm, - ImmTyHigh }; private: @@ -497,6 +500,126 @@ return isVSrcF16() || isLiteralImm(MVT::v2f16); } + bool isVISrcB32() const { + return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); + } + + bool isVISrcB16() const { + return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); + } + + bool isVISrcV2B16() const { + return isVISrcB16(); + } + + bool isVISrcF32() const { + return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); + } + + bool isVISrcF16() const { + return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); + } + + bool isVISrcV2F16() const { + return isVISrcF16() || isVISrcB32(); + } + + bool isAISrcB32() const { + return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); + } + + bool isAISrcB16() const { + return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); + } + + bool isAISrcV2B16() const { + return isAISrcB16(); + } + + bool isAISrcF32() const { + return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); + } + + bool isAISrcF16() const { + return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); + } + + bool isAISrcV2F16() const { + return isAISrcF16() || isAISrcB32(); + } + + bool isAISrc_128B32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); + } + + bool isAISrc_128B16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); + } + + bool isAISrc_128V2B16() const { + return isAISrc_128B16(); + } + + bool isAISrc_128F32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); + } + + bool isAISrc_128F16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); + } + + bool isAISrc_128V2F16() const { + return isAISrc_128F16() || isAISrc_128B32(); + } + + bool isAISrc_512B32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); + } + + bool isAISrc_512B16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); + } + + bool isAISrc_512V2B16() const { + return isAISrc_512B16(); + } + + bool isAISrc_512F32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); + } + + bool isAISrc_512F16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); + } + + bool isAISrc_512V2F16() const { + return isAISrc_512F16() || isAISrc_512B32(); + } + + bool isAISrc_1024B32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); + } + + bool isAISrc_1024B16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); + } + + bool isAISrc_1024V2B16() const { + return isAISrc_1024B16(); + } + + bool isAISrc_1024F32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); + } + + bool isAISrc_1024F16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); + } + + bool isAISrc_1024V2F16() const { + return isAISrc_1024F16() || isAISrc_1024B32(); + } + bool isKImmFP32() const { return isLiteralImm(MVT::f32); } @@ -526,6 +649,9 @@ bool isSMRDLiteralOffset() const; bool isDPP8() const; bool isDPPCtrl() const; + bool isBLGP() const; + bool isCBSZ() const; + bool isABID() const; bool isGPRIdxMode() const; bool isS16Imm() const; bool isU16Imm() const; @@ -728,9 +854,10 @@ case ImmTySwizzle: OS << "Swizzle"; break; case ImmTyGprIdxMode: OS << "GprIdxMode"; break; case ImmTyHigh: OS << "High"; break; - case ImmTyEndpgm: - OS << "Endpgm"; - break; + case ImmTyBLGP: OS << "BLGP"; break; + case ImmTyCBSZ: OS << "CBSZ"; break; + case ImmTyABID: OS << "ABID"; break; + case ImmTyEndpgm: OS << "Endpgm"; break; } } @@ -850,6 +977,7 @@ void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { switch (RegKind) { case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; + case IS_AGPR: // fall through case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; default: break; } @@ -1275,6 +1403,10 @@ void cvtSDWA(MCInst &Inst, const OperandVector &Operands, uint64_t BasicInstType, bool skipVcc = false); + AMDGPUOperand::Ptr defaultBLGP() const; + AMDGPUOperand::Ptr defaultCBSZ() const; + AMDGPUOperand::Ptr defaultABID() const; + OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; }; @@ -1312,6 +1444,8 @@ case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: return &APFloat::IEEEsingle(); case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: @@ -1324,6 +1458,10 @@ case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: return &APFloat::IEEEhalf(); @@ -1580,12 +1718,18 @@ case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: { bool lost; @@ -1614,6 +1758,8 @@ case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: if (isSafeTruncation(Val, 32) && @@ -1642,6 +1788,8 @@ case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: if (isSafeTruncation(Val, 16) && AMDGPU::isInlinableLiteral16(static_cast(Val), AsmParser->hasInv2PiInlineImm())) { @@ -1653,7 +1801,9 @@ return; case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { assert(isSafeTruncation(Val, 16)); assert(AMDGPU::isInlinableLiteral16(static_cast(Val), AsmParser->hasInv2PiInlineImm())); @@ -1741,6 +1891,15 @@ case 8: return AMDGPU::SGPR_256RegClassID; case 16: return AMDGPU::SGPR_512RegClassID; } + } else if (Is == IS_AGPR) { + switch (RegWidth) { + default: return -1; + case 1: return AMDGPU::AGPR_32RegClassID; + case 2: return AMDGPU::AReg_64RegClassID; + case 4: return AMDGPU::AReg_128RegClassID; + case 16: return AMDGPU::AReg_512RegClassID; + case 32: return AMDGPU::AReg_1024RegClassID; + } } return -1; } @@ -1837,6 +1996,7 @@ return false; case IS_VGPR: case IS_SGPR: + case IS_AGPR: case IS_TTMP: if (Reg1 != Reg + RegWidth) { return false; @@ -1852,6 +2012,8 @@ { "v" }, { "s" }, { "ttmp" }, + { "acc" }, + { "a" }, }; bool @@ -1911,6 +2073,9 @@ } else if (RegName[0] == 's') { RegNumIndex = 1; RegKind = IS_SGPR; + } else if (RegName[0] == 'a') { + RegNumIndex = RegName.startswith("acc") ? 3 : 1; + RegKind = IS_AGPR; } else if (RegName.startswith("ttmp")) { RegNumIndex = strlen("ttmp"); RegKind = IS_TTMP; @@ -1992,6 +2157,7 @@ break; case IS_VGPR: case IS_SGPR: + case IS_AGPR: case IS_TTMP: { unsigned Size = 1; @@ -2517,6 +2683,8 @@ const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || + OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || + OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); @@ -4327,7 +4495,7 @@ if (Tok == Name) { if (Tok == "r128" && isGFX9()) Error(S, "r128 modifier is not supported on this GPU"); - if (Tok == "a16" && !isGFX9()) + if (Tok == "a16" && !isGFX9() && !isGFX10()) Error(S, "a16 modifier is not supported on this GPU"); Bit = 1; Parser.Lex(); @@ -5767,7 +5935,10 @@ {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, - {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} + {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, + {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, + {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, + {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} }; OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { @@ -6109,6 +6280,22 @@ return false; } +//===----------------------------------------------------------------------===// +// mAI +//===----------------------------------------------------------------------===// + +bool AMDGPUOperand::isBLGP() const { + return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); +} + +bool AMDGPUOperand::isCBSZ() const { + return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); +} + +bool AMDGPUOperand::isABID() const { + return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); +} + bool AMDGPUOperand::isS16Imm() const { return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); } @@ -6594,6 +6781,22 @@ } } +//===----------------------------------------------------------------------===// +// mAI +//===----------------------------------------------------------------------===// + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); +} + /// Force static initialization. extern "C" void LLVMInitializeAMDGPUAsmParser() { RegisterMCAsmParser A(getTheAMDGPUTarget()); Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -82,6 +82,8 @@ MCOperand decodeOperand_VReg_64(unsigned Val) const; MCOperand decodeOperand_VReg_96(unsigned Val) const; MCOperand decodeOperand_VReg_128(unsigned Val) const; + MCOperand decodeOperand_VReg_256(unsigned Val) const; + MCOperand decodeOperand_VReg_512(unsigned Val) const; MCOperand decodeOperand_SReg_32(unsigned Val) const; MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const; @@ -93,12 +95,20 @@ MCOperand decodeOperand_SReg_256(unsigned Val) const; MCOperand decodeOperand_SReg_512(unsigned Val) const; + MCOperand decodeOperand_AGPR_32(unsigned Val) const; + MCOperand decodeOperand_AReg_128(unsigned Val) const; + MCOperand decodeOperand_AReg_512(unsigned Val) const; + MCOperand decodeOperand_AReg_1024(unsigned Val) const; + MCOperand decodeOperand_AV_32(unsigned Val) const; + MCOperand decodeOperand_AV_64(unsigned Val) const; + enum OpWidthTy { OPW32, OPW64, OPW128, OPW256, OPW512, + OPW1024, OPW16, OPWV216, OPW_LAST_, @@ -106,6 +116,7 @@ }; unsigned getVgprClassId(const OpWidthTy Width) const; + unsigned getAgprClassId(const OpWidthTy Width) const; unsigned getSgprClassId(const OpWidthTy Width) const; unsigned getTtmpClassId(const OpWidthTy Width) const; Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -155,6 +155,30 @@ return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); } +static DecodeStatus decodeOperand_VS_16(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); +} + +static DecodeStatus decodeOperand_VS_32(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm)); +} + +static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm)); +} + #define DECODE_SDWA(DecName) \ DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) @@ -640,6 +664,30 @@ return decodeSrcOp(OPW32, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const { + return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255); +} + +MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const { + return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255); +} + +MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const { + return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255); +} + +MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const { + return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255); +} + +MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const { + return decodeSrcOp(OPW32, Val); +} + +MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const { + return decodeSrcOp(OPW64, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { return createRegOperand(AMDGPU::VReg_64RegClassID, Val); } @@ -652,6 +700,14 @@ return createRegOperand(AMDGPU::VReg_128RegClassID, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const { + return createRegOperand(AMDGPU::VReg_256RegClassID, Val); +} + +MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const { + return createRegOperand(AMDGPU::VReg_512RegClassID, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { // table-gen generated disassembler doesn't care about operand types // leaving only registry class so SSrc_32 operand turns into SReg_32 @@ -805,6 +861,9 @@ // ToDo: case 248: 1/(2*PI) - is allowed only on VI switch (Width) { case OPW32: + case OPW128: // splat constants + case OPW512: + case OPW1024: return MCOperand::createImm(getInlineImmVal32(Imm)); case OPW64: return MCOperand::createImm(getInlineImmVal64(Imm)); @@ -831,6 +890,24 @@ } } +unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const { + using namespace AMDGPU; + + assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); + switch (Width) { + default: // fall + case OPW32: + case OPW16: + case OPWV216: + return AGPR_32RegClassID; + case OPW64: return AReg_64RegClassID; + case OPW128: return AReg_128RegClassID; + case OPW512: return AReg_512RegClassID; + case OPW1024: return AReg_1024RegClassID; + } +} + + unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { using namespace AMDGPU; @@ -879,10 +956,14 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { using namespace AMDGPU::EncValues; - assert(Val < 512); // enum9 + assert(Val < 1024); // enum10 + + bool IsAGPR = Val & 512; + Val &= 511; if (VGPR_MIN <= Val && Val <= VGPR_MAX) { - return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN); + return createRegOperand(IsAGPR ? getAgprClassId(Width) + : getVgprClassId(Width), Val - VGPR_MIN); } if (Val <= SGPR_MAX) { assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -137,11 +137,22 @@ OPERAND_REG_INLINE_C_V2FP16, OPERAND_REG_INLINE_C_V2INT16, + /// Operands with an AccVGPR register or inline constant + OPERAND_REG_INLINE_AC_INT16, + OPERAND_REG_INLINE_AC_INT32, + OPERAND_REG_INLINE_AC_FP16, + OPERAND_REG_INLINE_AC_FP32, + OPERAND_REG_INLINE_AC_V2FP16, + OPERAND_REG_INLINE_AC_V2INT16, + OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2INT16, OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16, - OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_V2INT16, + OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2INT16, + + OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16, + OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2INT16, OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32, OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -20,12 +20,21 @@ sub4, sub5, sub6, sub7, sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15]; + list ret32 = [sub0, sub1, sub2, sub3, + sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, + sub12, sub13, sub14, sub15, + sub16, sub17, sub18, sub19, + sub20, sub21, sub22, sub23, + sub24, sub25, sub26, sub27, + sub28, sub29, sub30, sub31]; list ret = !if(!eq(size, 2), ret2, !if(!eq(size, 3), ret3, !if(!eq(size, 4), ret4, !if(!eq(size, 5), ret5, - !if(!eq(size, 8), ret8, ret16))))); + !if(!eq(size, 8), ret8, + !if(!eq(size, 16), ret16, ret32)))))); } //===----------------------------------------------------------------------===// @@ -160,6 +169,13 @@ } } +// AccVGPR registers +foreach Index = 0-255 in { + def AGPR#Index : SIReg <"AGPR"#Index, Index> { + let HWEncoding{8} = 1; + } +} + //===----------------------------------------------------------------------===// // Groupings using register classes and tuples //===----------------------------------------------------------------------===// @@ -181,7 +197,7 @@ (add (sequence "SGPR%u", 0, 105))> { // Give all SGPR classes higher priority than VGPR classes, because // we want to spill SGPRs to VGPRs. - let AllocationPriority = 7; + let AllocationPriority = 9; } // SGPR 64-bit registers @@ -240,6 +256,41 @@ (add (decimate (shl SGPR_32, 14), 4)), (add (decimate (shl SGPR_32, 15), 4))]>; +// SGPR 1024-bit registers +def SGPR_1024Regs : RegisterTuples.ret, + [(add (decimate SGPR_32, 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4)), + (add (decimate (shl SGPR_32, 4), 4)), + (add (decimate (shl SGPR_32, 5), 4)), + (add (decimate (shl SGPR_32, 6), 4)), + (add (decimate (shl SGPR_32, 7), 4)), + (add (decimate (shl SGPR_32, 8), 4)), + (add (decimate (shl SGPR_32, 9), 4)), + (add (decimate (shl SGPR_32, 10), 4)), + (add (decimate (shl SGPR_32, 11), 4)), + (add (decimate (shl SGPR_32, 12), 4)), + (add (decimate (shl SGPR_32, 13), 4)), + (add (decimate (shl SGPR_32, 14), 4)), + (add (decimate (shl SGPR_32, 15), 4)), + (add (decimate (shl SGPR_32, 16), 4)), + (add (decimate (shl SGPR_32, 17), 4)), + (add (decimate (shl SGPR_32, 18), 4)), + (add (decimate (shl SGPR_32, 19), 4)), + (add (decimate (shl SGPR_32, 20), 4)), + (add (decimate (shl SGPR_32, 21), 4)), + (add (decimate (shl SGPR_32, 22), 4)), + (add (decimate (shl SGPR_32, 23), 4)), + (add (decimate (shl SGPR_32, 24), 4)), + (add (decimate (shl SGPR_32, 25), 4)), + (add (decimate (shl SGPR_32, 26), 4)), + (add (decimate (shl SGPR_32, 27), 4)), + (add (decimate (shl SGPR_32, 28), 4)), + (add (decimate (shl SGPR_32, 29), 4)), + (add (decimate (shl SGPR_32, 30), 4)), + (add (decimate (shl SGPR_32, 31), 4))]>; + // Trap handler TMP 32-bit registers def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, (add (sequence "TTMP%u", 0, 15))> { @@ -365,7 +416,6 @@ TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10, TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>; - // VGPR 32-bit registers // i16/f16 only on VI+ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, @@ -430,6 +480,114 @@ (add (shl VGPR_32, 14)), (add (shl VGPR_32, 15))]>; +// VGPR 1024-bit registers +def VGPR_1024 : RegisterTuples.ret, + [(add (trunc VGPR_32, 225)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3)), + (add (shl VGPR_32, 4)), + (add (shl VGPR_32, 5)), + (add (shl VGPR_32, 6)), + (add (shl VGPR_32, 7)), + (add (shl VGPR_32, 8)), + (add (shl VGPR_32, 9)), + (add (shl VGPR_32, 10)), + (add (shl VGPR_32, 11)), + (add (shl VGPR_32, 12)), + (add (shl VGPR_32, 13)), + (add (shl VGPR_32, 14)), + (add (shl VGPR_32, 15)), + (add (shl VGPR_32, 16)), + (add (shl VGPR_32, 17)), + (add (shl VGPR_32, 18)), + (add (shl VGPR_32, 19)), + (add (shl VGPR_32, 20)), + (add (shl VGPR_32, 21)), + (add (shl VGPR_32, 22)), + (add (shl VGPR_32, 23)), + (add (shl VGPR_32, 24)), + (add (shl VGPR_32, 25)), + (add (shl VGPR_32, 26)), + (add (shl VGPR_32, 27)), + (add (shl VGPR_32, 28)), + (add (shl VGPR_32, 29)), + (add (shl VGPR_32, 30)), + (add (shl VGPR_32, 31))]>; + +// AccVGPR 32-bit registers +def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add (sequence "AGPR%u", 0, 255))> { + let AllocationPriority = 1; + let Size = 32; +} + +// AGPR 64-bit registers +def AGPR_64 : RegisterTuples.ret, + [(add (trunc AGPR_32, 255)), + (add (shl AGPR_32, 1))]>; + +// AGPR 128-bit registers +def AGPR_128 : RegisterTuples.ret, + [(add (trunc AGPR_32, 253)), + (add (shl AGPR_32, 1)), + (add (shl AGPR_32, 2)), + (add (shl AGPR_32, 3))]>; + +// AGPR 512-bit registers +def AGPR_512 : RegisterTuples.ret, + [(add (trunc AGPR_32, 241)), + (add (shl AGPR_32, 1)), + (add (shl AGPR_32, 2)), + (add (shl AGPR_32, 3)), + (add (shl AGPR_32, 4)), + (add (shl AGPR_32, 5)), + (add (shl AGPR_32, 6)), + (add (shl AGPR_32, 7)), + (add (shl AGPR_32, 8)), + (add (shl AGPR_32, 9)), + (add (shl AGPR_32, 10)), + (add (shl AGPR_32, 11)), + (add (shl AGPR_32, 12)), + (add (shl AGPR_32, 13)), + (add (shl AGPR_32, 14)), + (add (shl AGPR_32, 15))]>; + +// AGPR 1024-bit registers +def AGPR_1024 : RegisterTuples.ret, + [(add (trunc AGPR_32, 225)), + (add (shl AGPR_32, 1)), + (add (shl AGPR_32, 2)), + (add (shl AGPR_32, 3)), + (add (shl AGPR_32, 4)), + (add (shl AGPR_32, 5)), + (add (shl AGPR_32, 6)), + (add (shl AGPR_32, 7)), + (add (shl AGPR_32, 8)), + (add (shl AGPR_32, 9)), + (add (shl AGPR_32, 10)), + (add (shl AGPR_32, 11)), + (add (shl AGPR_32, 12)), + (add (shl AGPR_32, 13)), + (add (shl AGPR_32, 14)), + (add (shl AGPR_32, 15)), + (add (shl AGPR_32, 16)), + (add (shl AGPR_32, 17)), + (add (shl AGPR_32, 18)), + (add (shl AGPR_32, 19)), + (add (shl AGPR_32, 20)), + (add (shl AGPR_32, 21)), + (add (shl AGPR_32, 22)), + (add (shl AGPR_32, 23)), + (add (shl AGPR_32, 24)), + (add (shl AGPR_32, 25)), + (add (shl AGPR_32, 26)), + (add (shl AGPR_32, 27)), + (add (shl AGPR_32, 28)), + (add (shl AGPR_32, 29)), + (add (shl AGPR_32, 30)), + (add (shl AGPR_32, 31))]>; + //===----------------------------------------------------------------------===// // Register classes used as source and destination //===----------------------------------------------------------------------===// @@ -459,23 +617,23 @@ SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, SRC_VCCZ, SRC_EXECZ, SRC_SCC)> { - let AllocationPriority = 8; + let AllocationPriority = 10; } def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> { - let AllocationPriority = 8; + let AllocationPriority = 10; } def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { - let AllocationPriority = 8; + let AllocationPriority = 10; } // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> { - let AllocationPriority = 8; + let AllocationPriority = 10; } def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, @@ -485,7 +643,7 @@ def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> { let CopyCost = 1; - let AllocationPriority = 9; + let AllocationPriority = 11; } // CCR (call clobbered registers) SGPR 64-bit registers @@ -501,13 +659,13 @@ def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { let CopyCost = 1; - let AllocationPriority = 9; + let AllocationPriority = 13; } def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, (add SReg_64_XEXEC, EXEC)> { let CopyCost = 1; - let AllocationPriority = 9; + let AllocationPriority = 13; } def SReg_1_XEXEC : RegisterClass<"AMDGPU", [i1], 32, @@ -529,16 +687,16 @@ // for symmetry with VGPRs. def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add SGPR_96Regs)> { - let AllocationPriority = 10; + let AllocationPriority = 14; } def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add SGPR_96)> { - let AllocationPriority = 10; + let AllocationPriority = 14; } def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add SGPR_128Regs)> { - let AllocationPriority = 11; + let AllocationPriority = 15; } def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add TTMP_128Regs)> { @@ -547,7 +705,7 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add SGPR_128, TTMP_128)> { - let AllocationPriority = 11; + let AllocationPriority = 15; } } // End CopyCost = 2 @@ -556,16 +714,16 @@ // for symmetry with VGPRs. def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, (add SGPR_160Regs)> { - let AllocationPriority = 12; + let AllocationPriority = 16; } def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, (add SGPR_160)> { - let AllocationPriority = 12; + let AllocationPriority = 16; } def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> { - let AllocationPriority = 13; + let AllocationPriority = 17; } def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> { @@ -576,11 +734,11 @@ (add SGPR_256, TTMP_256)> { // Requires 4 s_mov_b64 to copy let CopyCost = 4; - let AllocationPriority = 13; + let AllocationPriority = 17; } def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> { - let AllocationPriority = 14; + let AllocationPriority = 18; } def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> { @@ -591,7 +749,7 @@ (add SGPR_512, TTMP_512)> { // Requires 8 s_mov_b64 to copy let CopyCost = 8; - let AllocationPriority = 14; + let AllocationPriority = 18; } def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, @@ -599,6 +757,16 @@ let isAllocatable = 0; } +def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add SGPR_1024Regs)> { + let AllocationPriority = 19; +} + +def SReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, + (add SGPR_1024)> { + let CopyCost = 16; + let AllocationPriority = 19; +} + // Register class for all vector registers (VGPRs + Interploation Registers) def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add VGPR_64)> { let Size = 64; @@ -644,6 +812,40 @@ let AllocationPriority = 7; } +def VReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add VGPR_1024)> { + let Size = 1024; + let CopyCost = 32; + let AllocationPriority = 8; +} + +def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add AGPR_64)> { + let Size = 64; + + let CopyCost = 5; + let AllocationPriority = 2; +} + +def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add AGPR_128)> { + let Size = 128; + + // Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr + let CopyCost = 9; + let AllocationPriority = 4; +} + +def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add AGPR_512)> { + let Size = 512; + let CopyCost = 33; + let AllocationPriority = 7; +} + +// TODO: add v32f32 value type +def AReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add AGPR_1024)> { + let Size = 1024; + let CopyCost = 65; + let AllocationPriority = 8; +} + def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> { let Size = 32; } @@ -657,6 +859,15 @@ let isAllocatable = 0; } +def AV_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add AGPR_32, VGPR_32)> { + let isAllocatable = 0; +} + +def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32, (add AReg_64, VReg_64)> { + let isAllocatable = 0; +} + //===----------------------------------------------------------------------===// // Register operands //===----------------------------------------------------------------------===// @@ -666,47 +877,40 @@ let RenderMethod = "addRegOrImmOperands"; } -multiclass SIRegOperand { +multiclass SIRegOperand32 { let OperandNamespace = "AMDGPU" in { - def _b16 : RegisterOperand(rc#"_32")> { + def _b16 : RegisterOperand(rc#rc_suffix)> { let OperandType = opType#"_INT16"; let ParserMatchClass = RegImmMatcher; let DecoderMethod = "decodeOperand_VSrc16"; } - def _f16 : RegisterOperand(rc#"_32")> { + def _f16 : RegisterOperand(rc#rc_suffix)> { let OperandType = opType#"_FP16"; let ParserMatchClass = RegImmMatcher; - let DecoderMethod = "decodeOperand_VSrc16"; + let DecoderMethod = "decodeOperand_" # rc # "_16"; } - def _b32 : RegisterOperand(rc#"_32")> { + def _b32 : RegisterOperand(rc#rc_suffix)> { let OperandType = opType#"_INT32"; let ParserMatchClass = RegImmMatcher; + let DecoderMethod = "decodeOperand_" # rc # rc_suffix; } - def _f32 : RegisterOperand(rc#"_32")> { + def _f32 : RegisterOperand(rc#rc_suffix)> { let OperandType = opType#"_FP32"; let ParserMatchClass = RegImmMatcher; + let DecoderMethod = "decodeOperand_" # rc # rc_suffix; } - def _b64 : RegisterOperand(rc#"_64")> { - let OperandType = opType#"_INT64"; - let ParserMatchClass = RegImmMatcher; - } - - def _f64 : RegisterOperand(rc#"_64")> { - let OperandType = opType#"_FP64"; - let ParserMatchClass = RegImmMatcher; - } - - def _v2b16 : RegisterOperand(rc#"_32")> { + def _v2b16 : RegisterOperand(rc#rc_suffix)> { let OperandType = opType#"_V2INT16"; let ParserMatchClass = RegImmMatcher; let DecoderMethod = "decodeOperand_VSrcV216"; } - def _v2f16 : RegisterOperand(rc#"_32")> { + def _v2f16 : RegisterOperand(rc#rc_suffix)> { let OperandType = opType#"_V2FP16"; let ParserMatchClass = RegImmMatcher; let DecoderMethod = "decodeOperand_VSrcV216"; @@ -714,6 +918,21 @@ } } +multiclass SIRegOperand : + SIRegOperand32 { + let OperandNamespace = "AMDGPU" in { + def _b64 : RegisterOperand(rc#"_64")> { + let OperandType = opType#"_INT64"; + let ParserMatchClass = RegImmMatcher; + } + + def _f64 : RegisterOperand(rc#"_64")> { + let OperandType = opType#"_FP64"; + let ParserMatchClass = RegImmMatcher; + } + } +} + // FIXME: 64-bit sources can sometimes use 32-bit constants. multiclass RegImmOperand : SIRegOperand; @@ -721,6 +940,14 @@ multiclass RegInlineOperand : SIRegOperand; +multiclass RegInlineOperand32 + : SIRegOperand32; + +multiclass RegInlineOperandAC + : SIRegOperand32; + //===----------------------------------------------------------------------===// // SSrc_* Operands with an SGPR or a 32-bit immediate //===----------------------------------------------------------------------===// @@ -760,8 +987,46 @@ let DecoderMethod = "DecodeVS_32RegisterClass"; } +//===----------------------------------------------------------------------===// +// ASrc_* Operands with an AccVGPR +//===----------------------------------------------------------------------===// + +def ARegSrc_32 : RegisterOperand { + let DecoderMethod = "DecodeAGPR_32RegisterClass"; + let EncoderMethod = "getAVOperandEncoding"; +} + //===----------------------------------------------------------------------===// // VCSrc_* Operands with an SGPR, VGPR or an inline constant //===----------------------------------------------------------------------===// defm VCSrc : RegInlineOperand<"VS", "VCSrc">; + +//===----------------------------------------------------------------------===// +// VISrc_* Operands with a VGPR or an inline constant +//===----------------------------------------------------------------------===// + +defm VISrc : RegInlineOperand32<"VGPR", "VISrc">; + +//===----------------------------------------------------------------------===// +// AVSrc_* Operands with an AGPR or VGPR +//===----------------------------------------------------------------------===// + +def AVSrc_32 : RegisterOperand { + let DecoderMethod = "DecodeAV_32RegisterClass"; + let EncoderMethod = "getAVOperandEncoding"; +} + +def AVSrc_64 : RegisterOperand { + let DecoderMethod = "DecodeAV_64RegisterClass"; + let EncoderMethod = "getAVOperandEncoding"; +} + +//===----------------------------------------------------------------------===// +// ACSrc_* Operands with an AGPR or an inline constant +//===----------------------------------------------------------------------===// + +defm AISrc : RegInlineOperandAC<"AGPR", "AISrc">; +defm AISrc_128 : RegInlineOperandAC<"AReg", "AISrc_128", "_128">; +defm AISrc_512 : RegInlineOperandAC<"AReg", "AISrc_512", "_512">; +defm AISrc_1024 : RegInlineOperandAC<"AReg", "AISrc_1024", "_1024">; Index: test/CodeGen/AMDGPU/ipra-regmask.ll =================================================================== --- test/CodeGen/AMDGPU/ipra-regmask.ll +++ test/CodeGen/AMDGPU/ipra-regmask.ll @@ -1,19 +1,19 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s ; Make sure the expected regmask is generated for sub/superregisters. -; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}} +; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}} define void @csr() #0 { call void asm sideeffect "", "~{v0},~{v36},~{v37}"() #0 ret void } -; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}} +; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}} define void @subregs_for_super() #0 { call void asm sideeffect "", "~{v0},~{v1}"() #0 ret void } -; CHECK-DAG: clobbered_reg_with_sub Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}} +; CHECK-DAG: Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}} define void @clobbered_reg_with_sub() #0 { call void asm sideeffect "", "~{v[0:1]}"() #0 ret void