Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2016,53 +2016,41 @@ if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) Parser.Lex(); - int CntShift; - int CntMask; - IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); - if (CntName == "vmcnt") { - CntMask = getVmcntMask(IV); - CntShift = getVmcntShift(IV); - } else if (CntName == "expcnt") { - CntMask = getExpcntMask(IV); - CntShift = getExpcntShift(IV); - } else if (CntName == "lgkmcnt") { - CntMask = getLgkmcntMask(IV); - CntShift = getLgkmcntShift(IV); - } else { + if (CntName == "vmcnt") + IntVal = encodeVmcnt(IV, IntVal, CntVal); + else if (CntName == "expcnt") + IntVal = encodeExpcnt(IV, IntVal, CntVal); + else if (CntName == "lgkmcnt") + IntVal = encodeLgkmcnt(IV, IntVal, CntVal); + else return true; - } - IntVal &= ~(CntMask << CntShift); - IntVal |= (CntVal << CntShift); return false; } AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { - // Disable all counters by default. - // vmcnt [3:0] - // expcnt [6:4] - // lgkmcnt [11:8] - int64_t CntVal = 0xf7f; + IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); + int64_t Waitcnt = getWaitcntMask(IV); SMLoc S = Parser.getTok().getLoc(); switch(getLexer().getKind()) { default: return MatchOperand_ParseFail; case AsmToken::Integer: // The operand can be an integer value. - if (getParser().parseAbsoluteExpression(CntVal)) + if (getParser().parseAbsoluteExpression(Waitcnt)) return MatchOperand_ParseFail; break; case AsmToken::Identifier: do { - if (parseCnt(CntVal)) + if (parseCnt(Waitcnt)) return MatchOperand_ParseFail; } while(getLexer().isNot(AsmToken::EndOfStatement)); break; } - Operands.push_back(AMDGPUOperand::CreateImm(this, CntVal, S)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); return MatchOperand_Success; } Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -874,9 +874,8 @@ IsaVersion IV = getIsaVersion(STI.getFeatureBits()); unsigned SImm16 = MI->getOperand(OpNo).getImm(); - unsigned Vmcnt = (SImm16 >> getVmcntShift(IV)) & getVmcntMask(IV); - unsigned Expcnt = (SImm16 >> getExpcntShift(IV)) & getExpcntMask(IV); - unsigned Lgkmcnt = (SImm16 >> getLgkmcntShift(IV)) & getLgkmcntMask(IV); + unsigned Vmcnt, Expcnt, Lgkmcnt; + decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt); bool NeedSpace = false; Index: lib/Target/AMDGPU/SIInsertWaits.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertWaits.cpp +++ lib/Target/AMDGPU/SIInsertWaits.cpp @@ -63,12 +63,12 @@ const MachineRegisterInfo *MRI; IsaVersion IV; - /// \brief Constant hardware limits - static const Counters WaitCounts; - /// \brief Constant zero value static const Counters ZeroCounts; + /// \brief Hardware limits + Counters HardwareLimits; + /// \brief Counter values we have already waited on. Counters WaitedOn; @@ -173,7 +173,6 @@ return new SIInsertWaits(); } -const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } }; const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } }; static bool readsVCCZ(unsigned Opcode) { @@ -379,7 +378,7 @@ Ordered[2] = false; // The values we are going to put into the S_WAITCNT instruction - Counters Counts = WaitCounts; + Counters Counts = HardwareLimits; // Do we really need to wait? bool NeedWait = false; @@ -395,7 +394,7 @@ unsigned Value = LastIssued.Array[i] - Required.Array[i]; // Adjust the value to the real hardware possibilities. - Counts.Array[i] = std::min(Value, WaitCounts.Array[i]); + Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]); } else Counts.Array[i] = 0; @@ -413,9 +412,10 @@ // Build the wait instruction BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) - .addImm(((Counts.Named.VM & getVmcntMask(IV)) << getVmcntShift(IV)) | - ((Counts.Named.EXP & getExpcntMask(IV)) << getExpcntShift(IV)) | - ((Counts.Named.LGKM & getLgkmcntMask(IV)) << getLgkmcntShift(IV))); + .addImm(encodeWaitcnt(IV, + Counts.Named.VM, + Counts.Named.EXP, + Counts.Named.LGKM)); LastOpcodeType = OTHER; LastInstWritesM0 = false; @@ -443,9 +443,9 @@ unsigned Imm = I->getOperand(0).getImm(); Counters Counts, WaitOn; - Counts.Named.VM = (Imm >> getVmcntShift(IV)) & getVmcntMask(IV); - Counts.Named.EXP = (Imm >> getExpcntShift(IV)) & getExpcntMask(IV); - Counts.Named.LGKM = (Imm >> getLgkmcntShift(IV)) & getLgkmcntMask(IV); + Counts.Named.VM = decodeVmcnt(IV, Imm); + Counts.Named.EXP = decodeExpcnt(IV, Imm); + Counts.Named.LGKM = decodeLgkmcnt(IV, Imm); for (unsigned i = 0; i < 3; ++i) { if (Counts.Array[i] <= LastIssued.Array[i]) @@ -523,6 +523,10 @@ MRI = &MF.getRegInfo(); IV = getIsaVersion(ST->getFeatureBits()); + HardwareLimits.Named.VM = getMaxVmcnt(IV); + HardwareLimits.Named.EXP = getMaxExpcnt(IV); + HardwareLimits.Named.LGKM = getMaxLgkmcnt(IV); + WaitedOn = ZeroCounts; DelayedWaitOn = ZeroCounts; LastIssued = ZeroCounts; Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -76,23 +76,59 @@ std::pair Default, bool OnlyFirstRequired = false); -/// \returns VMCNT bit mask for given isa \p Version. -unsigned getVmcntMask(IsaVersion Version); +/// \returns Waitcnt mask for given isa \p Version. +unsigned getWaitcntMask(IsaVersion Version); -/// \returns VMCNT bit shift for given isa \p Version. -unsigned getVmcntShift(IsaVersion Version); +/// \returns Maximum Vmcnt for given isa \p Version. +unsigned getMaxVmcnt(IsaVersion Version); -/// \returns EXPCNT bit mask for given isa \p Version. -unsigned getExpcntMask(IsaVersion Version); +/// \returns Maximum Expcnt for given isa \p Version. +unsigned getMaxExpcnt(IsaVersion Version); -/// \returns EXPCNT bit shift for given isa \p Version. -unsigned getExpcntShift(IsaVersion Version); +/// \returns Maximum Lgkmcnt for given isa \p Version. +unsigned getMaxLgkmcnt(IsaVersion Version); -/// \returns LGKMCNT bit mask for given isa \p Version. -unsigned getLgkmcntMask(IsaVersion Version); +/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. +unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt); -/// \returns LGKMCNT bit shift for given isa \p Version. -unsigned getLgkmcntShift(IsaVersion Version); +/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. +unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt); + +/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. +unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt); + +/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa +/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and +/// \p Lgkmcnt respectively. +/// +/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: +/// \p Vmcnt = \p Waitcnt[3:0] +/// \p Expcnt = \p Waitcnt[6:4] +/// \p Lgkmcnt = \p Waitcnt[11:8] +void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt, + unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); + +/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. +unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt); + +/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. +unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt); + +/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. +unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt); + +/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa +/// \p Version. +/// +/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: +/// Waitcnt[3:0] = \p Vmcnt +/// Waitcnt[6:4] = \p Expcnt +/// Waitcnt[11:8] = \p Lgkmcnt +/// +/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given +/// isa \p Version. +unsigned encodeWaitcnt(IsaVersion Version, + unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); unsigned getInitialPSInputAddr(const Function &F); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -33,6 +33,49 @@ #undef GET_INSTRINFO_NAMED_OPS #undef GET_INSTRINFO_ENUM +namespace { + +/// \returns Bit mask for given bit \p Shift and bit \p Width. +unsigned getBitMask(unsigned Shift, unsigned Width) { + return ((1 << Width) - 1) << Shift; +} + +/// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. +/// +/// \returns Packed \p Dst. +unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { + Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); + Dst |= (Src << Shift) & getBitMask(Shift, Width); + return Dst; +} + +/// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. +/// +/// \returns Unpacked bits. +unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { + return (Src & getBitMask(Shift, Width)) >> Shift; +} + +/// \returns Vmcnt bit shift. +unsigned getVmcntBitShift() { return 0; } + +/// \returns Vmcnt bit width. +unsigned getVmcntBitWidth() { return 4; } + +/// \returns Expcnt bit shift. +unsigned getExpcntBitShift() { return 4; } + +/// \returns Expcnt bit width. +unsigned getExpcntBitWidth() { return 3; } + +/// \returns Lgkmcnt bit shift. +unsigned getLgkmcntBitShift() { return 8; } + +/// \returns Lgkmcnt bit width. +unsigned getLgkmcntBitWidth() { return 4; } + +} // anonymous namespace + namespace llvm { namespace AMDGPU { @@ -158,28 +201,63 @@ return Ints; } -unsigned getVmcntMask(IsaVersion Version) { - return 0xf; +unsigned getWaitcntMask(IsaVersion Version) { + unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth()); + unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); + unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); + return Vmcnt | Expcnt | Lgkmcnt; +} + +unsigned getMaxVmcnt(IsaVersion Version) { + return (1 << getVmcntBitWidth()) - 1; +} + +unsigned getMaxExpcnt(IsaVersion Version) { + return (1 << getExpcntBitWidth()) - 1; +} + +unsigned getMaxLgkmcnt(IsaVersion Version) { + return (1 << getLgkmcntBitWidth()) - 1; +} + +unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) { + return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); +} + +unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) { + return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); +} + +unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) { + return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -unsigned getVmcntShift(IsaVersion Version) { - return 0; +void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt, + unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { + Vmcnt = decodeVmcnt(Version, Waitcnt); + Expcnt = decodeExpcnt(Version, Waitcnt); + Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); } -unsigned getExpcntMask(IsaVersion Version) { - return 0x7; +unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) { + return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); } -unsigned getExpcntShift(IsaVersion Version) { - return 4; +unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) { + return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned getLgkmcntMask(IsaVersion Version) { - return 0xf; +unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) { + return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -unsigned getLgkmcntShift(IsaVersion Version) { - return 8; +unsigned encodeWaitcnt(IsaVersion Version, + unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { + unsigned Waitcnt = 0; + Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); + Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); + Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); + return Waitcnt; } unsigned getInitialPSInputAddr(const Function &F) {