diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -40,11 +40,6 @@ #include "AMDGPUGenSubtargetInfo.inc" #undef AMDGPUSubtarget -static cl::opt DisablePowerSched( - "amdgpu-disable-power-sched", - cl::desc("Disable scheduling to minimize mAI power bursts"), - cl::init(false)); - static cl::opt EnableVGPRIndexMode( "amdgpu-vgpr-index-mode", cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), @@ -836,141 +831,6 @@ } } -namespace { -struct FillMFMAShadowMutation : ScheduleDAGMutation { - const SIInstrInfo *TII; - - ScheduleDAGMI *DAG; - - FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {} - - bool isSALU(const SUnit *SU) const { - const MachineInstr *MI = SU->getInstr(); - return MI && TII->isSALU(*MI) && !MI->isTerminator(); - } - - bool isVALU(const SUnit *SU) const { - const MachineInstr *MI = SU->getInstr(); - return MI && TII->isVALU(*MI); - } - - bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const { - if (Pred->NodeNum < Succ->NodeNum) - return true; - - SmallVector Succs({Succ}), Preds({Pred}); - - for (unsigned I = 0; I < Succs.size(); ++I) { - for (const SDep &SI : Succs[I]->Succs) { - const SUnit *SU = SI.getSUnit(); - if (SU != Succs[I] && !llvm::is_contained(Succs, SU)) - Succs.push_back(SU); - } - } - - SmallPtrSet Visited; - while (!Preds.empty()) { - const SUnit *SU = Preds.pop_back_val(); - if (llvm::is_contained(Succs, SU)) - return false; - Visited.insert(SU); - for (const SDep &SI : SU->Preds) - if (SI.getSUnit() != SU && !Visited.count(SI.getSUnit())) - Preds.push_back(SI.getSUnit()); - } - - return true; - } - - // Link as many SALU instructions in chain as possible. Return the size - // of the chain. Links up to MaxChain instructions. - unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain, - SmallPtrSetImpl &Visited) const { - SmallVector Worklist({To}); - unsigned Linked = 0; - - while (!Worklist.empty() && MaxChain-- > 0) { - SUnit *SU = Worklist.pop_back_val(); - if (!Visited.insert(SU).second) - continue; - - LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From); - dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n'); - - if (SU->addPred(SDep(From, SDep::Artificial), false)) - ++Linked; - - for (SDep &SI : From->Succs) { - SUnit *SUv = SI.getSUnit(); - if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU)) - SUv->addPred(SDep(SU, SDep::Artificial), false); - } - - for (SDep &SI : SU->Succs) { - SUnit *Succ = SI.getSUnit(); - if (Succ != SU && isSALU(Succ) && canAddEdge(From, Succ)) - Worklist.push_back(Succ); - } - } - - return Linked; - } - - void apply(ScheduleDAGInstrs *DAGInstrs) override { - const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget(); - if (!ST.hasMAIInsts() || DisablePowerSched) - return; - DAG = static_cast(DAGInstrs); - const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel(); - if (!TSchedModel || DAG->SUnits.empty()) - return; - - // Scan for MFMA long latency instructions and try to add a dependency - // of available SALU instructions to give them a chance to fill MFMA - // shadow. That is desirable to fill MFMA shadow with SALU instructions - // rather than VALU to prevent power consumption bursts and throttle. - auto LastSALU = DAG->SUnits.begin(); - auto E = DAG->SUnits.end(); - SmallPtrSet Visited; - for (SUnit &SU : DAG->SUnits) { - MachineInstr &MAI = *SU.getInstr(); - if (!TII->isMAI(MAI) || - MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 || - MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64) - continue; - - unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1; - - LLVM_DEBUG(dbgs() << "Found MFMA: "; DAG->dumpNode(SU); - dbgs() << "Need " << Lat - << " instructions to cover latency.\n"); - - // Find up to Lat independent scalar instructions as early as - // possible such that they can be scheduled after this MFMA. - for ( ; Lat && LastSALU != E; ++LastSALU) { - if (Visited.count(&*LastSALU)) - continue; - - if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU)) - continue; - - Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited); - } - } - } -}; -} // namespace - -void GCNSubtarget::getPostRAMutations( - std::vector> &Mutations) const { - Mutations.push_back(std::make_unique(&InstrInfo)); -} - -std::unique_ptr -GCNSubtarget::createFillMFMAShadowMutation(const TargetInstrInfo *TII) const { - return std::make_unique(&InstrInfo); -} - const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) { if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn) return static_cast(MF.getSubtarget()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -895,9 +895,7 @@ ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { ScheduleDAGMI *DAG = createGenericSchedPostRA(C); - const GCNSubtarget &ST = C->MF->getSubtarget(); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); - DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII)); DAG->addMutation(createMFMAIGroupLPDAGMutation()); return DAG; } diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1217,13 +1217,6 @@ /// unit requirement. unsigned getMaxNumVGPRs(const MachineFunction &MF) const; - void getPostRAMutations( - std::vector> &Mutations) - const override; - - std::unique_ptr - createFillMFMAShadowMutation(const TargetInstrInfo *TII) const; - bool isWave32() const { return getWavefrontSize() == 32; } diff --git a/llvm/test/CodeGen/AMDGPU/sched-prefer-non-mfma.mir b/llvm/test/CodeGen/AMDGPU/sched-prefer-non-mfma.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-prefer-non-mfma.mir +++ /dev/null @@ -1,18 +0,0 @@ -# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass post-RA-sched -amdgpu-disable-power-sched -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s - -# GCN-LABEL: name: unrelated_mfma -# GCN: V_MFMA_F32_32X32X1F32 -# GCN: S_CMP_LG_U32 -# GCN: V_MFMA_F32_32X32X1F32 - ---- -name: unrelated_mfma -body: | - bb.0.entry: - renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32_e64 $vgpr67, $vgpr66, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec - renamable $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63 = V_MFMA_F32_32X32X1F32_e64 $vgpr69, $vgpr68, killed $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63, 0, 0, 0, implicit $mode, implicit $exec - renamable $sgpr2 = S_ADD_U32 renamable $sgpr2, 4, implicit-def $scc - renamable $sgpr3 = S_ADDC_U32 renamable $sgpr3, 0, implicit-def dead $scc, implicit killed $scc - S_CMP_LG_U32 renamable $sgpr2, 64, implicit-def $scc - -...