Index: lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -368,8 +368,9 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { const SIRegisterInfo *TRI = ST.getRegisterInfo(); - // Check for DPP VGPR read after VALU VGPR write. + // Check for DPP VGPR read after VALU VGPR write and EXEC write. int DppVgprWaitStates = 2; + int DppExecWaitStates = 5; int WaitStatesNeeded = 0; for (const MachineOperand &Use : DPP->uses()) { @@ -380,6 +381,10 @@ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } + WaitStatesNeeded = + std::max(WaitStatesNeeded, + DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC)); + return WaitStatesNeeded; } Index: test/CodeGen/AMDGPU/inserted-wait-states.mir =================================================================== --- test/CodeGen/AMDGPU/inserted-wait-states.mir +++ test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -13,6 +13,7 @@ define amdgpu_kernel void @s_mov_fed_b32() { ret void } define amdgpu_kernel void @s_movrel() { ret void } define amdgpu_kernel void @v_interp() { ret void } + define amdgpu_kernel void @dpp() { ret void } define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) { entry: @@ -477,6 +478,40 @@ %vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit %m0, implicit %exec S_ENDPGM ... + +... +--- + +# GCN-LABEL: name: dpp + +# VI-LABEL: bb.0: +# VI: V_MOV_B32_e32 +# VI: S_NOP +# VI: S_NOP +# VI-NEXT: V_MOV_B32_dpp + +# VI-LABEL: bb.1: +# VI: S_MOV_B64 +# VI: S_NOP +# VI: S_NOP +# VI: S_NOP +# VI: S_NOP +# VI: S_NOP +# VI-NEXT: V_MOV_B32_dpp + +name: dpp + +body: | + bb.0: + %vgpr0 = V_MOV_B32_e32 0, implicit %exec + %vgpr1 = V_MOV_B32_dpp %vgpr0, 0, 15, 15, 0, implicit %exec + S_BRANCH %bb.1 + + bb.1: + %exec = S_MOV_B64 -1 + %vgpr2 = V_MOV_B32_dpp %vgpr0, 0, 15, 15, 0, implicit %exec + S_ENDPGM +... --- name: mov_fed_hazard_crash_on_dbg_value alignment: 0