Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -43,6 +43,7 @@ int checkSMRDHazards(MachineInstr *SMRD); int checkVMEMHazards(MachineInstr* VMEM); int checkDPPHazards(MachineInstr *DPP); + int checkDivFMasHazards(MachineInstr *DivFMas); public: GCNHazardRecognizer(const MachineFunction &MF); // We can only issue one instruction per cycle. Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -38,6 +38,10 @@ CurrCycleInstr = MI; } +static bool isDivFMas(unsigned Opcode) { + return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; +} + ScheduleHazardRecognizer::HazardType GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *MI = SU->getInstr(); @@ -51,6 +55,9 @@ if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) return NoopHazard; + if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) + return NoopHazard; + return NoHazard; } @@ -68,6 +75,9 @@ if (SIInstrInfo::isDPP(*MI)) return std::max(0, checkDPPHazards(MI)); + if (isDivFMas(MI->getOpcode())) + return std::max(0, checkDivFMasHazards(MI)); + return 0; } @@ -262,3 +272,15 @@ return WaitStatesNeeded; } + +int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { + const SIInstrInfo *TII = ST.getInstrInfo(); + + // v_div_fmas requires 4 wait states after a write to vcc from a VALU + // instruction. + const int DivFMasWaitStates = 4; + auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; + int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn); + + return DivFMasWaitStates - WaitStatesNeeded; +} Index: llvm/trunk/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir =================================================================== --- llvm/trunk/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir +++ llvm/trunk/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir @@ -0,0 +1,60 @@ +# RUN: llc -march=amdgcn -run-pass post-RA-hazard-rec %s -o - | FileCheck %s + +# CHECK-LABEL: bb.0: +# CHECK: S_MOV_B64 +# CHECK-NOT: S_NOP +# CHECK: V_DIV_FMAS + +# CHECK-LABEL: bb.1: +# CHECK: V_CMP_EQ_I32 +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: V_DIV_FMAS_F32 + +# CHECK-LABEL: bb.2: +# CHECK: V_CMP_EQ_I32 +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: V_DIV_FMAS_F32 + +# CHECK-LABEL: bb.3: +# CHECK: V_DIV_SCALE_F32 +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: S_NOP +# CHECK: V_DIV_FMAS_F32 +--- | + define void @test0() { ret void } +... +--- +name: test0 + +body: | + bb.0: + successors: %bb.1 + %vcc = S_MOV_B64 0 + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + implicit %vcc = V_CMP_EQ_I32_e32 %vgpr1, %vgpr2, implicit %exec + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + %vcc = V_CMP_EQ_I32_e64 %vgpr1, %vgpr2, implicit %exec + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_BRANCH %bb.3 + + bb.3: + %vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_ENDPGM +...