diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1368,7 +1368,7 @@ Register Reg = Op.getReg(); const int AccVgprReadLdStWaitStates = 2; - const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1; + const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1; const int MaxWaitStates = 2; int WaitStatesNeededForUse = AccVgprReadLdStWaitStates - @@ -1378,8 +1378,9 @@ if (WaitStatesNeeded == MaxWaitStates) return WaitStatesNeeded; // Early exit. - auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) { - if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32) + auto IsVALUAccVgprRdWrCheckFn = [Reg, this](MachineInstr *MI) { + if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32 && + MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32) return false; auto IsVALUFn = [] (MachineInstr *MI) { return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI); @@ -1388,8 +1389,8 @@ std::numeric_limits::max(); }; - WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates - - getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates); + WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates - + getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards.mir --- a/llvm/test/CodeGen/AMDGPU/mai-hazards.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards.mir @@ -480,6 +480,20 @@ ... --- +# GCN-LABEL: name: valu_write_vgpr_accvgpr_write_load_1_and_3_depend +# GCN: V_MOV_B32 +# GCN-NEXT: V_ACCVGPR_WRITE_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: FLAT_LOAD_DWORD +name: valu_write_vgpr_accvgpr_write_load_1_and_3_depend +body: | + bb.0: + $vgpr0 = V_MOV_B32_e32 1, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr +... +--- + # GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend # GCN: V_MOV_B32 # GCN-NEXT: V_ACCVGPR_READ_B32