diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -166,6 +166,11 @@ Opcode == AMDGPU::V_PERMLANEX16_B32_e64; } +static bool isLdsDma(const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI) && + (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI)); +} + static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, AMDGPU::OpName::simm16); @@ -226,12 +231,12 @@ if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) return HazardType; - if (ST.hasReadM0MovRelInterpHazard() && - (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && - checkReadM0Hazards(MI) > 0) - return HazardType; - - if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) && + if (((ST.hasReadM0MovRelInterpHazard() && + (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) || + (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) || + (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) || + (ST.hasReadM0LdsDirectHazard() && + MI->readsRegister(AMDGPU::LDS_DIRECT))) && checkReadM0Hazards(MI) > 0) return HazardType; @@ -351,11 +356,11 @@ if (isRFE(MI->getOpcode())) return std::max(WaitStates, checkRFEHazards(MI)); - if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) || - isSMovRel(MI->getOpcode()))) - return std::max(WaitStates, checkReadM0Hazards(MI)); - - if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) + if ((ST.hasReadM0MovRelInterpHazard() && + (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) || + (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) || + (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) || + (ST.hasReadM0LdsDirectHazard() && MI->readsRegister(AMDGPU::LDS_DIRECT))) return std::max(WaitStates, checkReadM0Hazards(MI)); if (SIInstrInfo::isMAI(*MI)) @@ -1014,10 +1019,10 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { const SIInstrInfo *TII = ST.getInstrInfo(); - const int SMovRelWaitStates = 1; + const int ReadM0WaitStates = 1; auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); }; - return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, - SMovRelWaitStates); + return ReadM0WaitStates - + getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates); } void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -931,6 +931,14 @@ getGeneration() <= AMDGPUSubtarget::GFX9; } + bool hasReadM0LdsDmaHazard() const { + return getGeneration() == AMDGPUSubtarget::GFX9; + } + + bool hasReadM0LdsDirectHazard() const { + return getGeneration() == AMDGPUSubtarget::GFX9; + } + bool hasVcmpxPermlaneHazard() const { return HasVcmpxPermlaneHazard; } diff --git a/llvm/test/CodeGen/AMDGPU/hazard.mir b/llvm/test/CodeGen/AMDGPU/hazard.mir --- a/llvm/test/CodeGen/AMDGPU/hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/hazard.mir @@ -171,3 +171,27 @@ S_SENDMSG 3, implicit $exec, implicit $m0 S_ENDPGM 0 ... + +# GCN-LABEL: name: buffer_store_lds_dword +# GCN: $m0 = S_MOV_B32 0 +# GFX9-NEXT: S_NOP 0 +# GCN-NEXT: BUFFER_STORE_LDS_DWORD +--- +name: buffer_store_lds_dword +body: | + bb.0: + $m0 = S_MOV_B32 0 + BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $m0 +... + +# GCN-LABEL: name: lds_direct_read_m0 +# GCN: $m0 = S_MOV_B32 0 +# GFX9-NEXT: S_NOP 0 +# GCN-NEXT: V_MOV_B32 +--- +name: lds_direct_read_m0 +body: | + bb.0: + $m0 = S_MOV_B32 0 + $vgpr0 = V_MOV_B32_e32 $lds_direct, implicit $exec, implicit $m0 +... diff --git a/llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir b/llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir @@ -0,0 +1,49 @@ +# RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s + +# GCN-LABEL: name: buffer_load_dword_lds +# GCN: $m0 = S_MOV_B32 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: BUFFER_LOAD_DWORD_LDS_ADDR64 +--- +name: buffer_load_dword_lds +body: | + bb.0: + $m0 = S_MOV_B32 0 + BUFFER_LOAD_DWORD_LDS_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec, implicit $m0 +... + +# GCN-LABEL: name: buffer_store_lds_dword +# GCN: $m0 = S_MOV_B32 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: BUFFER_STORE_LDS_DWORD +--- +name: buffer_store_lds_dword +body: | + bb.0: + $m0 = S_MOV_B32 0 + BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $m0 +... + +# GCN-LABEL: name: global_load_lds_dword +# GCN: $m0 = S_MOV_B32 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: GLOBAL_LOAD_LDS_DWORD +--- +name: global_load_lds_dword +body: | + bb.0: + $m0 = S_MOV_B32 0 + GLOBAL_LOAD_LDS_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $m0 +... + +# GCN-LABEL: name: scratch_load_lds_dword +# GCN: $m0 = S_MOV_B32 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: SCRATCH_LOAD_LDS_DWORD +--- +name: scratch_load_lds_dword +body: | + bb.0: + $m0 = S_MOV_B32 0 + SCRATCH_LOAD_LDS_DWORD $vgpr2, 0, 0, implicit $exec, implicit $m0 +...