Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -367,10 +367,13 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); - // Check for DPP VGPR read after VALU VGPR write. + // Check for DPP VGPR read after VALU VGPR write and EXEC write. int DppVgprWaitStates = 2; + int DppExecWaitStates = 5; int WaitStatesNeeded = 0; + auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; for (const MachineOperand &Use : DPP->uses()) { if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) @@ -380,6 +383,10 @@ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } + WaitStatesNeeded = std::max( + WaitStatesNeeded, + DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn)); + return WaitStatesNeeded; } Index: llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -13,6 +13,7 @@ define amdgpu_kernel void @s_mov_fed_b32() { ret void } define amdgpu_kernel void @s_movrel() { ret void } define amdgpu_kernel void @v_interp() { ret void } + define amdgpu_kernel void @dpp() { ret void } define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) { entry: @@ -477,6 +478,40 @@ %vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit %m0, implicit %exec S_ENDPGM ... + +... +--- + +# GCN-LABEL: name: dpp + +# VI-LABEL: bb.0: +# VI: V_MOV_B32_e32 +# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 0 +# VI-NEXT: V_MOV_B32_dpp + +# VI-LABEL: bb.1: +# VI: V_CMPX_EQ_I32_e32 +# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 0 +# VI-NEXT: V_MOV_B32_dpp + +name: dpp + +body: | + bb.0: + %vgpr0 = V_MOV_B32_e32 0, implicit %exec + %vgpr1 = V_MOV_B32_dpp %vgpr0, 0, 15, 15, 0, implicit %exec + S_BRANCH %bb.1 + + bb.1: + implicit %exec, implicit %vcc = V_CMPX_EQ_I32_e32 %vgpr0, %vgpr1, implicit %exec + %vgpr3 = V_MOV_B32_dpp %vgpr0, 0, 15, 15, 0, implicit %exec + S_ENDPGM +... --- name: mov_fed_hazard_crash_on_dbg_value alignment: 0