diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -108,6 +108,7 @@ unsigned PreEmitNoopsCommon(MachineInstr *); void AdvanceCycle() override; void RecedeCycle() override; + bool ShouldPreferAnother(SUnit *SU) override; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1383,3 +1383,27 @@ return WaitStatesNeeded; } + +bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) { + if (!SU->isInstr()) + return false; + + MachineInstr *MAI = nullptr; + auto IsMFMAFn = [&MAI] (MachineInstr *MI) { + MAI = nullptr; + if (SIInstrInfo::isMAI(*MI) && + MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 && + MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32) + MAI = MI; + return MAI != nullptr; + }; + + MachineInstr *MI = SU->getInstr(); + if (IsMFMAFn(MI)) { + int W = getWaitStatesSince(IsMFMAFn, 16); + if (MAI) + return W < (int)TSchedModel.computeInstrLatency(MAI); + } + + return false; +} diff --git a/llvm/test/CodeGen/AMDGPU/sched-prefer-non-mfma.mir b/llvm/test/CodeGen/AMDGPU/sched-prefer-non-mfma.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched-prefer-non-mfma.mir @@ -0,0 +1,18 @@ +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass post-RA-sched -amdgpu-disable-power-sched -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: unrelated_mfma +# GCN: V_MFMA_F32_32X32X1F32 +# GCN: S_CMP_LG_U32 +# GCN: V_MFMA_F32_32X32X1F32 + +--- +name: unrelated_mfma +body: | + bb.0.entry: + renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32 $vgpr67, $vgpr66, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + renamable $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63 = V_MFMA_F32_32X32X1F32 $vgpr69, $vgpr68, killed $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63, 0, 0, 0, implicit $mode, implicit $exec + renamable $sgpr2 = S_ADD_U32 renamable $sgpr2, 4, implicit-def $scc + renamable $sgpr3 = S_ADDC_U32 renamable $sgpr3, 0, implicit-def dead $scc, implicit killed $scc + S_CMP_LG_U32 renamable $sgpr2, 64, implicit-def $scc + +...