Index: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -382,8 +382,6 @@ DenseMap> LoopWaitcntDataMap; - std::vector> KillWaitBrackets; - // ForceEmitZeroWaitcnts: force all waitcnts insts to be s_waitcnt 0 // because of amdgpu-waitcnt-forcezero flag bool ForceEmitZeroWaitcnts; @@ -410,13 +408,6 @@ MachineFunctionPass::getAnalysisUsage(AU); } - void addKillWaitBracket(BlockWaitcntBrackets *Bracket) { - // The waitcnt information is copied because it changes as the block is - // traversed. - KillWaitBrackets.push_back( - llvm::make_unique(*Bracket)); - } - bool isForceEmitWaitcnt() const { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) @@ -1425,24 +1416,6 @@ MixedExpTypes |= PredScoreBrackets->mixedExpTypes(); } - // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()? - // Also handle kills for exit block. - if (Block.succ_empty() && !KillWaitBrackets.empty()) { - for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) { - for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; - T = (enum InstCounterType)(T + 1)) { - int Span = KillWaitBrackets[I]->getScoreUB(T) - - KillWaitBrackets[I]->getScoreLB(T); - MaxPending[T] = std::max(MaxPending[T], Span); - Span = KillWaitBrackets[I]->pendingFlat(T) - - KillWaitBrackets[I]->getScoreLB(T); - MaxFlat[T] = std::max(MaxFlat[T], Span); - } - - MixedExpTypes |= KillWaitBrackets[I]->mixedExpTypes(); - } - } - // Special handling for GDS_GPR_LOCK and EXP_GPR_LOCK. for (MachineBasicBlock *Pred : Block.predecessors()) { BlockWaitcntBrackets *PredScoreBrackets = @@ -1460,18 +1433,6 @@ MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan); } - // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()? - if (Block.succ_empty() && !KillWaitBrackets.empty()) { - for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) { - int GDSSpan = KillWaitBrackets[I]->getEventUB(GDS_GPR_LOCK) - - KillWaitBrackets[I]->getScoreLB(EXP_CNT); - MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan); - int EXPSpan = KillWaitBrackets[I]->getEventUB(EXP_GPR_LOCK) - - KillWaitBrackets[I]->getScoreLB(EXP_CNT); - MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan); - } - } - #if 0 // LC does not (unlike) add a waitcnt at beginning. Leaving it as marker. // TODO: how does LC distinguish between function entry and main entry? @@ -1551,60 +1512,6 @@ } } - // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()? - // Set the register scoreboard. - if (Block.succ_empty() && !KillWaitBrackets.empty()) { - for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) { - // Now merge the gpr_reg_score information. - for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; - T = (enum InstCounterType)(T + 1)) { - int PredLB = KillWaitBrackets[I]->getScoreLB(T); - int PredUB = KillWaitBrackets[I]->getScoreUB(T); - if (PredLB < PredUB) { - int PredScale = MaxPending[T] - PredUB; - // Merge vgpr scores. - for (int J = 0; J <= KillWaitBrackets[I]->getMaxVGPR(); J++) { - int PredRegScore = KillWaitBrackets[I]->getRegScore(J, T); - if (PredRegScore <= PredLB) - continue; - int NewRegScore = PredScale + PredRegScore; - ScoreBrackets->setRegScore( - J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore)); - } - // Also need to merge sgpr scores for lgkm_cnt. - if (T == LGKM_CNT) { - for (int J = 0; J <= KillWaitBrackets[I]->getMaxSGPR(); J++) { - int PredRegScore = - KillWaitBrackets[I]->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT); - if (PredRegScore <= PredLB) - continue; - int NewRegScore = PredScale + PredRegScore; - ScoreBrackets->setRegScore( - J + NUM_ALL_VGPRS, LGKM_CNT, - std::max( - ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT), - NewRegScore)); - } - } - } - } - - // Also merge the WaitEvent information. - ForAllWaitEventType(W) { - enum InstCounterType T = KillWaitBrackets[I]->eventCounter(W); - int PredEventUB = KillWaitBrackets[I]->getEventUB(W); - if (PredEventUB > KillWaitBrackets[I]->getScoreLB(T)) { - int NewEventUB = - MaxPending[T] + PredEventUB - KillWaitBrackets[I]->getScoreUB(T); - if (NewEventUB > 0) { - ScoreBrackets->setEventUB( - W, std::max(ScoreBrackets->getEventUB(W), NewEventUB)); - } - } - } - } - } - // Special case handling of GDS_GPR_LOCK and EXP_GPR_LOCK. Merge this for the // sequencing predecessors, because changes to EXEC require waitcnts due to // the delayed nature of these operations. @@ -1701,13 +1608,6 @@ continue; } - // Kill instructions generate a conditional branch to the endmain block. - // Merge the current waitcnt state into the endmain block information. - // TODO: Are there other flavors of KILL instruction? - if (Inst.getOpcode() == AMDGPU::KILL) { - addKillWaitBracket(ScoreBrackets); - } - bool VCCZBugWorkAround = false; if (readsVCCZ(Inst) && (!VCCZBugHandledSet.count(&Inst))) { @@ -1871,7 +1771,7 @@ LoopWaitcntDataMap.clear(); BlockWaitcntProcessedSet.clear(); - // Walk over the blocks in reverse post-dominator order, inserting + // Walk over the blocks in reverse post order, inserting // s_waitcnt where needed. ReversePostOrderTraversal RPOT(&MF); bool Modified = false;