diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -852,34 +852,6 @@ return MI && TII->isVALU(*MI); } - bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const { - if (Pred->NodeNum < Succ->NodeNum) - return true; - - SmallVector Succs({Succ}), Preds({Pred}); - - for (unsigned I = 0; I < Succs.size(); ++I) { - for (const SDep &SI : Succs[I]->Succs) { - const SUnit *SU = SI.getSUnit(); - if (SU != Succs[I] && !llvm::is_contained(Succs, SU)) - Succs.push_back(SU); - } - } - - SmallPtrSet Visited; - while (!Preds.empty()) { - const SUnit *SU = Preds.pop_back_val(); - if (llvm::is_contained(Succs, SU)) - return false; - Visited.insert(SU); - for (const SDep &SI : SU->Preds) - if (SI.getSUnit() != SU && !Visited.count(SI.getSUnit())) - Preds.push_back(SI.getSUnit()); - } - - return true; - } - // Link as many SALU instructions in chain as possible. Return the size // of the chain. Links up to MaxChain instructions. unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain, @@ -895,18 +867,20 @@ LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From); dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n'); - if (SU->addPred(SDep(From, SDep::Artificial), false)) - ++Linked; + if (SU != From && From != &DAG->ExitSU && DAG->canAddEdge(SU, From)) + if (DAG->addEdge(SU, SDep(From, SDep::Artificial))) + ++Linked; for (SDep &SI : From->Succs) { SUnit *SUv = SI.getSUnit(); - if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU)) - SUv->addPred(SDep(SU, SDep::Artificial), false); + if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) && + DAG->canAddEdge(SUv, SU)) + DAG->addEdge(SUv, SDep(SU, SDep::Artificial)); } for (SDep &SI : SU->Succs) { SUnit *Succ = SI.getSUnit(); - if (Succ != SU && isSALU(Succ) && canAddEdge(From, Succ)) + if (Succ != SU && isSALU(Succ)) Worklist.push_back(Succ); } } @@ -949,7 +923,8 @@ if (Visited.count(&*LastSALU)) continue; - if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU)) + if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) || + !DAG->canAddEdge(&*LastSALU, &SU)) continue; Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited); diff --git a/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir b/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=postmisched %s -o - -amdgpu-enable-power-sched=true 2>&1 | FileCheck %s +# This test represents a pattern which caused power-sched to introduce cycles into the Scheduling graph. By virtue of this test not crashing indicates it has completed succesfully. + +--- +name: power_sched_cycle_condition +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr1, $sgpr2_sgpr3, $sgpr1, $vgpr72_vgpr73, $vgpr2_vgpr3, $sgpr22, $sgpr6_sgpr7, $sgpr10_sgpr11 + ; CHECK-LABEL: name: power_sched_cycle_condition + ; CHECK: liveins: $vgpr1, $sgpr2_sgpr3, $sgpr1, $vgpr72_vgpr73, $vgpr2_vgpr3, $sgpr22, $sgpr6_sgpr7, $sgpr10_sgpr11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr2_sgpr3, 56, 0 :: (dereferenceable invariant load (s64), addrspace 4) + ; CHECK-NEXT: $sgpr4 = S_LSHL_B32 killed $sgpr22, 1, implicit-def dead $scc + ; CHECK-NEXT: $sgpr22_sgpr23 = S_LOAD_DWORDX2_IMM killed $sgpr2_sgpr3, 36, 0 :: (dereferenceable invariant load (s64), addrspace 4) + ; CHECK-NEXT: $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr1, killed $sgpr1, implicit $exec + ; CHECK-NEXT: early-clobber $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr2_vgpr3, $vgpr72_vgpr73, 0, 0, 0, 0, implicit $mode, implicit $exec + $sgpr1 = S_LOAD_DWORD_IMM $sgpr2_sgpr3, 56, 0 :: (dereferenceable invariant load (s64), addrspace 4) + $vgpr2 = nsw V_MUL_LO_U32_e64 $vgpr1, $sgpr1, implicit $exec + $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr2_vgpr3, $vgpr72_vgpr73, 0, 0, 0, 0, implicit $mode, implicit $exec + $sgpr4 = S_LSHL_B32 $sgpr22, 1, implicit-def dead $scc + $sgpr22_sgpr23 = S_LOAD_DWORDX2_IMM $sgpr2_sgpr3, 36, 0 :: (dereferenceable invariant load (s64), addrspace 4) +... + +