Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -622,6 +622,7 @@ case AMDGPU::SRC_SHARED_LIMIT: case AMDGPU::SRC_PRIVATE_BASE: case AMDGPU::SRC_PRIVATE_LIMIT: + case AMDGPU::SGPR_NULL: continue; case AMDGPU::SRC_POPS_EXITING_WAVE_ID: Index: lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp +++ lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp @@ -45,7 +45,7 @@ "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99", - "s100", "s101", "s102", "s103" + "s100", "s101", "s102", "s103", "s104", "s105" }; static const char *const VGPR64RegNames[] = { @@ -324,7 +324,7 @@ "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]", "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]", "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]", - "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]" + "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]", "s[104:105]" }; static const char *const SGPR128RegNames[] = { Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1012,7 +1012,11 @@ } bool hasSGPR102_SGPR103() const { - return !isVI(); + return !isVI() && !isGFX9(); + } + + bool hasSGPR104_SGPR105() const { + return isGFX10(); } bool hasIntClamp() const { @@ -1702,6 +1706,7 @@ .Case("tma_hi", AMDGPU::TMA_HI) .Case("tba_lo", AMDGPU::TBA_LO) .Case("tba_hi", AMDGPU::TBA_HI) + .Case("null", AMDGPU::SGPR_NULL) .Default(0); } @@ -2394,7 +2399,10 @@ switch (Reg) { case AMDGPU::FLAT_SCR: case AMDGPU::VCC: + case AMDGPU::VCC_LO: + case AMDGPU::VCC_HI: case AMDGPU::M0: + case AMDGPU::SGPR_NULL: return Reg; default: break; @@ -2969,21 +2977,27 @@ unsigned NumVGPRs = NextFreeVGPR; unsigned NumSGPRs = NextFreeSGPR; - unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); - if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && - NumSGPRs > MaxAddressableNumSGPRs) - return OutOfRangeError(SGPRRange); + if (Version.Major >= 10) + NumSGPRs = 0; + else { + unsigned MaxAddressableNumSGPRs = + IsaInfo::getAddressableNumSGPRs(&getSTI()); - NumSGPRs += - IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); + if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && + NumSGPRs > MaxAddressableNumSGPRs) + return OutOfRangeError(SGPRRange); - if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && - NumSGPRs > MaxAddressableNumSGPRs) - return OutOfRangeError(SGPRRange); + NumSGPRs += + IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); + + if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && + NumSGPRs > MaxAddressableNumSGPRs) + return OutOfRangeError(SGPRRange); - if (Features.test(FeatureSGPRInitBug)) - NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; + if (Features.test(FeatureSGPRInitBug)) + NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; + } VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); @@ -3561,7 +3575,14 @@ for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); R.isValid(); ++R) { if (*R == RegNo) - return isGFX9(); + return isGFX9() || isGFX10(); + } + + // GFX10 has 2 more SGPRs 104 and 105. + for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); + R.isValid(); ++R) { + if (*R == RegNo) + return hasSGPR104_SGPR105(); } switch (RegNo) { @@ -3571,11 +3592,13 @@ case AMDGPU::TMA: case AMDGPU::TMA_LO: case AMDGPU::TMA_HI: - return !isGFX9(); + return !isGFX9() && !isGFX10(); case AMDGPU::XNACK_MASK: case AMDGPU::XNACK_MASK_LO: case AMDGPU::XNACK_MASK_HI: - return !isCI() && !isSI() && hasXNACK(); + return !isCI() && !isSI() && !isGFX10() && hasXNACK(); + case AMDGPU::SGPR_NULL: + return isGFX10(); default: break; } @@ -3586,8 +3609,10 @@ if (isCI()) return true; - if (isSI()) { - // No flat_scr + if (isSI() || isGFX10()) { + // No flat_scr on SI. + // On GFX10 flat scratch is not a valid register operand and can only be + // accessed with s_setreg/s_getreg. switch (RegNo) { case AMDGPU::FLAT_SCR: case AMDGPU::FLAT_SCR_LO: @@ -3603,7 +3628,7 @@ for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); R.isValid(); ++R) { if (*R == RegNo) - return false; + return hasSGPR102_SGPR103(); } return true; @@ -4177,6 +4202,8 @@ int Last = ID_SYMBOLIC_LAST_; if (isSI() || isCI() || isVI()) Last = ID_SYMBOLIC_FIRST_GFX9_; + else if (isGFX9()) + Last = ID_SYMBOLIC_FIRST_GFX10_; for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { if (tok == IdSymbolic[i]) { HwReg.Id = i; @@ -4292,7 +4319,12 @@ for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { switch(i) { default: continue; // Omit gaps. - case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; + case ID_GS_ALLOC_REQ: + if (isSI() || isCI() || isVI()) + continue; + break; + case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: + case ID_SYSMSG: break; } if (tok == IdSymbolic[i]) { Msg.Id = i; @@ -4539,6 +4571,7 @@ do { // Validate and encode message ID. if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) + || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI()) || Msg.Id == ID_SYSMSG)) { if (Msg.IsSymbolic) Error(S, "invalid/unsupported symbolic name of message"); Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -127,6 +127,7 @@ bool isVI() const; bool isGFX9() const; + bool isGFX10() const; }; //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -51,6 +51,9 @@ #define DEBUG_TYPE "amdgpu-disassembler" +#define SGPR_MAX (isGFX10() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \ + : AMDGPU::EncValues::SGPR_MAX_SI) + using DecodeStatus = llvm::MCDisassembler::DecodeStatus; inline static MCDisassembler::DecodeStatus @@ -705,8 +708,10 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { using namespace AMDGPU::EncValues; - unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN; - unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX; + unsigned TTmpMin = + (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN; + unsigned TTmpMax = + (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX; return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; } @@ -784,6 +789,7 @@ case 110: return createRegOperand(TMA_LO); case 111: return createRegOperand(TMA_HI); case 124: return createRegOperand(M0); + case 125: return createRegOperand(SGPR_NULL); case 126: return createRegOperand(EXEC_LO); case 127: return createRegOperand(EXEC_HI); case 235: return createRegOperand(SRC_SHARED_BASE); @@ -827,7 +833,8 @@ using namespace AMDGPU::SDWA; using namespace AMDGPU::EncValues; - if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { + if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || + STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { // XXX: cast to int is needed to avoid stupid warning: // compare with unsigned is always true if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) && @@ -836,7 +843,8 @@ Val - SDWA9EncValues::SRC_VGPR_MIN); } if (SDWA9EncValues::SRC_SGPR_MIN <= Val && - Val <= SDWA9EncValues::SRC_SGPR_MAX) { + Val <= (isGFX10() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10 + : SDWA9EncValues::SRC_SGPR_MAX_SI)) { return createSRegOperand(getSgprClassId(Width), Val - SDWA9EncValues::SRC_SGPR_MIN); } @@ -872,15 +880,17 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { using namespace AMDGPU::SDWA; - assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] && - "SDWAVopcDst should be present only on GFX9"); + assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] || + STI.getFeatureBits()[AMDGPU::FeatureGFX10]) && + "SDWAVopcDst should be present only on GFX9+"); + if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; int TTmpIdx = getTTmpIdx(Val); if (TTmpIdx >= 0) { return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); - } else if (Val > AMDGPU::EncValues::SGPR_MAX) { + } else if (Val > SGPR_MAX) { return decodeSpecialReg64(Val); } else { return createSRegOperand(getSgprClassId(OPW64), Val); @@ -898,6 +908,10 @@ return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; } +bool AMDGPUDisassembler::isGFX10() const { + return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -262,6 +262,9 @@ case AMDGPU::M0: O << "m0"; return; + case AMDGPU::SGPR_NULL: + O << "null"; + return; case AMDGPU::FLAT_SCR: O << "flat_scratch"; return; @@ -1037,7 +1040,9 @@ const unsigned SImm16 = MI->getOperand(OpNo).getImm(); const unsigned Id = SImm16 & ID_MASK_; do { - if (Id == ID_INTERRUPT) { + if (Id == ID_INTERRUPT || + (Id == ID_GS_ALLOC_REQ && !AMDGPU::isSI(STI) && !AMDGPU::isCI(STI) && + !AMDGPU::isVI(STI))) { if ((SImm16 & ~ID_MASK_) != 0) // Unused/unknown bits must be 0. break; O << "sendmsg(" << IdSymbolic[Id] << ')'; @@ -1219,6 +1224,8 @@ unsigned Last = ID_SYMBOLIC_LAST_; if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI) || AMDGPU::isVI(STI)) Last = ID_SYMBOLIC_FIRST_GFX9_; + else if (AMDGPU::isGFX9(STI)) + Last = ID_SYMBOLIC_FIRST_GFX10_; if (ID_SYMBOLIC_FIRST_ <= Id && Id < Last && IdSymbolic[Id]) { O << IdSymbolic[Id]; } else { Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -223,11 +223,12 @@ enum : unsigned { SGPR_MIN = 0, - SGPR_MAX = 101, + SGPR_MAX_SI = 101, + SGPR_MAX_GFX10 = 105, TTMP_VI_MIN = 112, TTMP_VI_MAX = 123, - TTMP_GFX9_MIN = 108, - TTMP_GFX9_MAX = 123, + TTMP_GFX9_GFX10_MIN = 108, + TTMP_GFX9_GFX10_MAX = 123, INLINE_INTEGER_C_MIN = 128, INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64 INLINE_INTEGER_C_MAX = 208, @@ -249,6 +250,7 @@ ID_INTERRUPT = 1, ID_GS, ID_GS_DONE, + ID_GS_ALLOC_REQ = 9, ID_SYSMSG = 15, ID_GAPS_LAST_, // Indicate that sequence has gaps. ID_GAPS_FIRST_ = ID_INTERRUPT, @@ -305,7 +307,16 @@ ID_IB_STS = 7, ID_MEM_BASES = 15, ID_SYMBOLIC_FIRST_GFX9_ = ID_MEM_BASES, - ID_SYMBOLIC_LAST_ = 16, + ID_TBA_LO = 16, + ID_SYMBOLIC_FIRST_GFX10_ = ID_TBA_LO, + ID_TBA_HI = 17, + ID_TMA_LO = 18, + ID_TMA_HI = 19, + ID_FLAT_SCR_LO = 20, + ID_FLAT_SCR_HI = 21, + ID_XNACK_MASK = 22, + ID_POPS_PACKER = 25, + ID_SYMBOLIC_LAST_ = 26, ID_SHIFT_ = 0, ID_WIDTH_ = 6, ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_) @@ -400,7 +411,8 @@ SRC_VGPR_MIN = 0, SRC_VGPR_MAX = 255, SRC_SGPR_MIN = 256, - SRC_SGPR_MAX = 357, + SRC_SGPR_MAX_SI = 357, + SRC_SGPR_MAX_GFX10 = 361, SRC_TTMP_MIN = 364, SRC_TTMP_MAX = 379, }; Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -181,6 +181,9 @@ reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13); reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); + // Reserve null register - it shall never be allocated + reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL); + const GCNSubtarget &ST = MF.getSubtarget(); unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); @@ -1213,9 +1216,9 @@ } REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); - REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames); + REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR105, SGPR32RegNames); REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); - REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR104_SGPR105, SGPR64RegNames); REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, VGPR96RegNames); Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -71,6 +71,7 @@ def SCC : SIReg<"scc", 253>; def M0 : SIReg <"m0", 124>; +def SGPR_NULL : SIReg<"null", 125>; def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>; def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>; @@ -139,7 +140,7 @@ def FLAT_SCR : FlatReg; // SGPR registers -foreach Index = 0-103 in { +foreach Index = 0-105 in { def SGPR#Index : SIReg <"SGPR"#Index, Index>; } @@ -168,7 +169,7 @@ // SGPR 32-bit registers def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add (sequence "SGPR%u", 0, 103))> { + (add (sequence "SGPR%u", 0, 105))> { // Give all SGPR classes higher priority than VGPR classes, because // we want to spill SGPRs to VGPRs. let AllocationPriority = 7; @@ -446,7 +447,7 @@ // See comments in SIInstructions.td for more info. def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, - TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, + SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID)> { let AllocationPriority = 8; } Index: lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -22,7 +22,7 @@ nullptr, nullptr, nullptr, - nullptr, + "MSG_GS_ALLOC_REQ", nullptr, nullptr, nullptr, @@ -68,7 +68,17 @@ nullptr, nullptr, nullptr, - "HW_REG_SH_MEM_BASES" + "HW_REG_SH_MEM_BASES", + "HW_REG_TBA_LO", + "HW_REG_TBA_HI", + "HW_REG_TMA_LO", + "HW_REG_TMA_HI", + "HW_REG_FLAT_SCR_LO", + "HW_REG_FLAT_SCR_HI", + "HW_REG_XNACK_MASK", + nullptr, // HW_ID1, no predictable values + nullptr, // HW_ID2, no predictable values + "HW_REG_POPS_PACKER" }; } // namespace Hwreg Index: test/MC/AMDGPU/sopp-err.s =================================================================== --- test/MC/AMDGPU/sopp-err.s +++ test/MC/AMDGPU/sopp-err.s @@ -1,6 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=SICI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=SICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN %s s_sendmsg sendmsg(11) // GCN: error: invalid/unsupported code of message @@ -50,6 +51,12 @@ s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP, 0) // GCN: error: failed parsing operand +s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) +// SICIVI: error: invalid/unsupported symbolic name of message + +s_sendmsg sendmsg(MSG_GS_ALLOC_REQ, 0) +// SICIVI: error: invalid/unsupported symbolic name of message + s_sendmsg sendmsg(15) // GCN: error: failed parsing operand @@ -75,13 +82,19 @@ // GCN: error: invalid/unsupported code of SYSMSG_OP s_waitcnt lgkmcnt(16) +// SICIVI: error: too large value for lgkmcnt + +s_waitcnt lgkmcnt(64) // GCN: error: too large value for lgkmcnt s_waitcnt expcnt(8) // GCN: error: too large value for expcnt s_waitcnt vmcnt(16) -// GCN: error: too large value for vmcnt +// SICIVI: error: too large value for vmcnt + +s_waitcnt vmcnt(64) +// GFX10: error: too large value for vmcnt s_waitcnt vmcnt(0xFFFFFFFFFFFF0000) // GCN: error: too large value for vmcnt