diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1332,6 +1332,12 @@ return false; } +static bool isStoreCountWait(const MachineInstr &I) { + return (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT && + I.getOperand(0).getReg() == AMDGPU::SGPR_NULL && + !I.getOperand(1).getImm()); +} + bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { if (!RunLdsBranchVmemWARHazardFixup) return false; @@ -1351,9 +1357,7 @@ return false; auto IsExpiredFn = [&IsHazardInst](const MachineInstr &I, int) { - return IsHazardInst(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT && - I.getOperand(0).getReg() == AMDGPU::SGPR_NULL && - !I.getOperand(1).getImm()); + return IsHazardInst(I) || isStoreCountWait(I); }; auto IsHazardFn = [InstType, &IsHazardInst](const MachineInstr &I) { @@ -1370,9 +1374,7 @@ if (InstType == InstType2) return true; - return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT && - I.getOperand(0).getReg() == AMDGPU::SGPR_NULL && - !I.getOperand(1).getImm(); + return isStoreCountWait(I); }; return ::getWaitStatesSince(IsHazardFn, &I, IsExpiredFn) != diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -211,19 +211,20 @@ return ScoreUBs[T]; } + unsigned getScoreRange(InstCounterType T) const { + return getScoreUB(T) - getScoreLB(T); + } + // Mapping from event to counter. InstCounterType eventCounter(WaitEventType E) { - if (WaitEventMaskForInst[VM_CNT] & (1 << E)) - return VM_CNT; - if (WaitEventMaskForInst[LGKM_CNT] & (1 << E)) - return LGKM_CNT; - if (WaitEventMaskForInst[VS_CNT] & (1 << E)) - return VS_CNT; - assert(WaitEventMaskForInst[EXP_CNT] & (1 << E)); - return EXP_CNT; + for (auto T : inst_counter_types()) { + if (WaitEventMaskForInst[T] & (1 << E)) + return T; + } + llvm_unreachable("event type has no associated counter"); } - unsigned getRegScore(int GprNo, InstCounterType T) { + unsigned getRegScore(int GprNo, InstCounterType T) const { if (GprNo < NUM_ALL_VGPRS) { return VgprScores[T][GprNo]; } @@ -240,8 +241,7 @@ bool counterOutOfOrder(InstCounterType T) const; void simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const; void simplifyWaitcnt(InstCounterType T, unsigned &Count) const; - void determineWait(InstCounterType T, unsigned ScoreToWait, - AMDGPU::Waitcnt &Wait) const; + void determineWait(InstCounterType T, int RegNo, AMDGPU::Waitcnt &Wait) const; void applyWaitcnt(const AMDGPU::Waitcnt &Wait); void applyWaitcnt(InstCounterType T, unsigned Count); void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI, @@ -305,9 +305,9 @@ assert(T < NUM_INST_CNTS); ScoreUBs[T] = Val; if (T == EXP_CNT) { - unsigned UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT); - if (ScoreLBs[T] < UB && UB < ScoreUBs[T]) - ScoreLBs[T] = UB; + unsigned UB = ScoreUBs[EXP_CNT] - getWaitCountMax(EXP_CNT); + if (ScoreLBs[EXP_CNT] < UB && UB < ScoreUBs[EXP_CNT]) + ScoreLBs[EXP_CNT] = UB; } } @@ -765,8 +765,10 @@ Count = ~0u; } -void WaitcntBrackets::determineWait(InstCounterType T, unsigned ScoreToWait, +void WaitcntBrackets::determineWait(InstCounterType T, int RegNo, AMDGPU::Waitcnt &Wait) const { + unsigned ScoreToWait = getRegScore(RegNo, T); + // If the score of src_operand falls within the bracket, we need an // s_waitcnt instruction. const unsigned LB = getScoreLB(T); @@ -1106,8 +1108,7 @@ for (int RegNo = CallAddrOpInterval.first; RegNo < CallAddrOpInterval.second; ++RegNo) - ScoreBrackets.determineWait( - LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait); + ScoreBrackets.determineWait(LGKM_CNT, RegNo, Wait); int RtnAddrOpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); @@ -1117,8 +1118,7 @@ for (int RegNo = RtnAddrOpInterval.first; RegNo < RtnAddrOpInterval.second; ++RegNo) - ScoreBrackets.determineWait( - LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait); + ScoreBrackets.determineWait(LGKM_CNT, RegNo, Wait); } } } else { @@ -1150,11 +1150,9 @@ continue; unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS; // VM_CNT is only relevant to vgpr or LDS. - ScoreBrackets.determineWait( - VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait); + ScoreBrackets.determineWait(VM_CNT, RegNo, Wait); if (Memop->isStore()) { - ScoreBrackets.determineWait( - EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait); + ScoreBrackets.determineWait(EXP_CNT, RegNo, Wait); } } @@ -1176,17 +1174,14 @@ if (Op.isUse() || !SIInstrInfo::isVMEM(MI) || ScoreBrackets.hasOtherPendingVmemTypes(RegNo, getVmemType(MI))) { - ScoreBrackets.determineWait( - VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait); + ScoreBrackets.determineWait(VM_CNT, RegNo, Wait); ScoreBrackets.clearVgprVmemTypes(RegNo); } if (Op.isDef() || ScoreBrackets.hasPendingEvent(EXP_LDS_ACCESS)) { - ScoreBrackets.determineWait( - EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait); + ScoreBrackets.determineWait(EXP_CNT, RegNo, Wait); } } - ScoreBrackets.determineWait( - LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait); + ScoreBrackets.determineWait(LGKM_CNT, RegNo, Wait); } } } @@ -1228,9 +1223,7 @@ Wait.VsCnt = 0; if (FlushVmCnt) { - unsigned UB = ScoreBrackets.getScoreUB(VM_CNT); - unsigned LB = ScoreBrackets.getScoreLB(VM_CNT); - if (UB - LB != 0) + if (ScoreBrackets.getScoreRange(VM_CNT) != 0) Wait.VmCnt = 0; } @@ -1245,9 +1238,7 @@ MachineInstr *OldWaitcntInstr) { AMDGPU::Waitcnt Wait; - unsigned UB = ScoreBrackets.getScoreUB(VM_CNT); - unsigned LB = ScoreBrackets.getScoreLB(VM_CNT); - if (UB - LB == 0) + if (ScoreBrackets.getScoreRange(VM_CNT) == 0) return false; Wait.VmCnt = 0;