Index: llvm/trunk/lib/Target/AMDGPU/SIDefines.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIDefines.h +++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h @@ -121,6 +121,8 @@ OPERAND_REG_IMM_FP32, OPERAND_REG_IMM_FP64, OPERAND_REG_IMM_FP16, + OPERAND_REG_IMM_V2FP16, + OPERAND_REG_IMM_V2INT16, /// Operands with register or inline constant OPERAND_REG_INLINE_C_INT16, @@ -133,7 +135,7 @@ OPERAND_REG_INLINE_C_V2INT16, OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, - OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16, + OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2INT16, OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16, OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_V2INT16, Index: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -826,7 +826,7 @@ // with knowledge of the called routines. if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || MI.getOpcode() == AMDGPU::S_SETPC_B64_return) { - Wait = AMDGPU::Waitcnt::allZero(); + Wait = AMDGPU::Waitcnt::allZero(IV); } // Resolve vm waits before gs-done. else if ((MI.getOpcode() == AMDGPU::S_SENDMSG || @@ -998,7 +998,7 @@ // requiring a WAITCNT beforehand. if (MI.getOpcode() == AMDGPU::S_BARRIER && !ST->hasAutoWaitcntBeforeBarrier()) { - Wait = AMDGPU::Waitcnt::allZero(); + Wait = AMDGPU::Waitcnt::allZero(IV); } // TODO: Remove this work-around, enable the assert for Bug 457939 @@ -1030,7 +1030,7 @@ } if (ForceEmitZeroWaitcnts) - Wait = AMDGPU::Waitcnt::allZero(); + Wait = AMDGPU::Waitcnt::allZero(IV); if (ForceEmitWaitcnt[VM_CNT]) Wait.VmCnt = 0; @@ -1311,7 +1311,7 @@ Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P || Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) { // TODO: && context->target_info->GwsRequiresMemViolTest() ) { - ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZero()); + ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt()); } // TODO: Remove this work-around after fixing the scheduler and enable the Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -203,7 +203,13 @@ }; LLVM_READONLY -const MIMGDimInfo *getMIMGDimInfo(unsigned Dim); +const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); + +LLVM_READONLY +const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); + +LLVM_READONLY +const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); struct MIMGLZMappingInfo { MIMGBaseOpcode L; @@ -220,6 +226,17 @@ LLVM_READONLY int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); +struct MIMGInfo { + uint16_t Opcode; + uint16_t BaseOpcode; + uint8_t MIMGEncoding; + uint8_t VDataDwords; + uint8_t VAddrDwords; +}; + +LLVM_READONLY +const MIMGInfo *getMIMGInfo(unsigned Opc); + LLVM_READONLY int getMUBUFBaseOpcode(unsigned Opc); @@ -285,21 +302,30 @@ unsigned VmCnt = ~0u; unsigned ExpCnt = ~0u; unsigned LgkmCnt = ~0u; + unsigned VsCnt = ~0u; Waitcnt() {} - Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt) - : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {} + Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) + : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} - static Waitcnt allZero() { return Waitcnt(0, 0, 0); } + static Waitcnt allZero(const IsaVersion &Version) { + return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u); + } + static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } + + bool hasWait() const { + return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; + } bool dominates(const Waitcnt &Other) const { return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && - LgkmCnt <= Other.LgkmCnt; + LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; } Waitcnt combined(const Waitcnt &Other) const { return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), - std::min(LgkmCnt, Other.LgkmCnt)); + std::min(LgkmCnt, Other.LgkmCnt), + std::min(VsCnt, Other.VsCnt)); } }; @@ -332,7 +358,8 @@ /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) /// \p Expcnt = \p Waitcnt[6:4] -/// \p Lgkmcnt = \p Waitcnt[11:8] +/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) +/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); @@ -357,7 +384,8 @@ /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) /// Waitcnt[6:4] = \p Expcnt -/// Waitcnt[11:8] = \p Lgkmcnt +/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) +/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) /// /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given @@ -455,6 +483,8 @@ case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: return 2; default: Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -84,7 +84,9 @@ unsigned getLgkmcntBitShift() { return 8; } /// \returns Lgkmcnt bit width. -unsigned getLgkmcntBitWidth() { return 4; } +unsigned getLgkmcntBitWidth(unsigned VersionMajor) { + return (VersionMajor >= 10) ? 6 : 4; +} /// \returns Vmcnt bit shift (higher bits). unsigned getVmcntBitShiftHi() { return 14; } @@ -98,14 +100,6 @@ namespace AMDGPU { -struct MIMGInfo { - uint16_t Opcode; - uint16_t BaseOpcode; - uint8_t MIMGEncoding; - uint8_t VDataDwords; - uint8_t VAddrDwords; -}; - #define GET_MIMGBaseOpcodesTable_IMPL #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL @@ -119,6 +113,11 @@ return Info ? Info->Opcode : -1; } +const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { + const MIMGInfo *Info = getMIMGInfo(Opc); + return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; +} + int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { const MIMGInfo *OrigInfo = getMIMGInfo(Opc); const MIMGInfo *NewInfo = @@ -279,6 +278,8 @@ unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { IsaVersion Version = getIsaVersion(STI->getCPU()); + if (Version.Major >= 10) + return getAddressableNumSGPRs(STI); if (Version.Major >= 8) return 16; return 8; @@ -300,6 +301,8 @@ return FIXED_NUM_SGPRS_FOR_INIT_BUG; IsaVersion Version = getIsaVersion(STI->getCPU()); + if (Version.Major >= 10) + return 106; if (Version.Major >= 8) return 102; return 104; @@ -308,6 +311,10 @@ unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); + IsaVersion Version = getIsaVersion(STI->getCPU()); + if (Version.Major >= 10) + return 0; + if (WavesPerEU >= getMaxWavesPerEU()) return 0; @@ -322,8 +329,10 @@ bool Addressable) { assert(WavesPerEU != 0); - IsaVersion Version = getIsaVersion(STI->getCPU()); unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); + IsaVersion Version = getIsaVersion(STI->getCPU()); + if (Version.Major >= 10) + return Addressable ? AddressableNumSGPRs : 108; if (Version.Major >= 8 && !Addressable) AddressableNumSGPRs = 112; unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; @@ -340,6 +349,9 @@ ExtraSGPRs = 2; IsaVersion Version = getIsaVersion(STI->getCPU()); + if (Version.Major >= 10) + return ExtraSGPRs; + if (Version.Major < 8) { if (FlatScrUsed) ExtraSGPRs = 4; @@ -540,13 +552,14 @@ } unsigned getLgkmcntBitMask(const IsaVersion &Version) { - return (1 << getLgkmcntBitWidth()) - 1; + return (1 << getLgkmcntBitWidth(Version.Major)) - 1; } unsigned getWaitcntBitMask(const IsaVersion &Version) { unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); - unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); + unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), + getLgkmcntBitWidth(Version.Major)); unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; if (Version.Major < 9) return Waitcnt; @@ -572,7 +585,8 @@ } unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { - return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); + return unpackBits(Waitcnt, getLgkmcntBitShift(), + getLgkmcntBitWidth(Version.Major)); } void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, @@ -608,7 +622,8 @@ unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt) { - return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); + return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), + getLgkmcntBitWidth(Version.Major)); } unsigned encodeWaitcnt(const IsaVersion &Version, @@ -800,10 +815,13 @@ case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: return true; default: return false; @@ -934,6 +952,13 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { assert(HasInv2Pi); + if (isInt<16>(Literal) || isUInt<16>(Literal)) { + int16_t Trunc = static_cast(Literal); + return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi); + } + if (!(Literal & 0xffff)) + return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi); + int16_t Lo16 = static_cast(Literal); int16_t Hi16 = static_cast(Literal >> 16); return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); @@ -965,15 +990,19 @@ } } +static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { + return isGCN3Encoding(ST) || isGFX10(ST); +} + int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { - if (isGCN3Encoding(ST)) + if (hasSMEMByteOffset(ST)) return ByteOffset; return ByteOffset >> 2; } bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); - return isGCN3Encoding(ST) ? + return (hasSMEMByteOffset(ST)) ? isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); }