Index: lib/Target/AMDGPU/GCNHazardRecognizer.h =================================================================== --- lib/Target/AMDGPU/GCNHazardRecognizer.h +++ lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -44,6 +44,7 @@ int checkVMEMHazards(MachineInstr* VMEM); int checkDPPHazards(MachineInstr *DPP); int checkDivFMasHazards(MachineInstr *DivFMas); + int checkGetRegHazards(MachineInstr *GetRegInstr); public: GCNHazardRecognizer(const MachineFunction &MF); // We can only issue one instruction per cycle. Index: lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -42,6 +42,21 @@ return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; } +static bool isSGetReg(unsigned Opcode) { + return Opcode == AMDGPU::S_GETREG_B32; +} + +static bool isSSetReg(unsigned Opcode) { + return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32; +} + +static bool getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { + + const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, + AMDGPU::OpName::simm16); + return RegOp->getImm() & 0x3f; +} + ScheduleHazardRecognizer::HazardType GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *MI = SU->getInstr(); @@ -58,6 +73,9 @@ if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) return NoopHazard; + if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) + return NoopHazard; + return NoHazard; } @@ -78,6 +96,9 @@ if (isDivFMas(MI->getOpcode())) return std::max(0, checkDivFMasHazards(MI)); + if (isSGetReg(MI->getOpcode())) + return std::max(0, checkGetRegHazards(MI)); + return 0; } @@ -284,3 +305,23 @@ return DivFMasWaitStates - WaitStatesNeeded; } + +int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { + const SIInstrInfo *TII = ST.getInstrInfo(); + unsigned HWReg = getHWReg(TII, *GetRegInstr); + + int GetRegWaitStates = 2; + + for (std::list::iterator I = EmittedInstrs.begin(), + E = EmittedInstrs.end(); I != E && GetRegWaitStates > 0; + ++I, --GetRegWaitStates) { + + const MachineInstr *MI = *I; + if (!MI || !isSSetReg(MI->getOpcode()) || HWReg != getHWReg(TII, *MI)) + continue; + + return GetRegWaitStates; + } + + return 0; +} Index: test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir =================================================================== --- test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir +++ test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir @@ -1,5 +1,12 @@ # RUN: llc -march=amdgcn -run-pass post-RA-hazard-rec %s -o - | FileCheck %s +--- | + define void @div_fmas() { ret void } + define void @s_getreg() { ret void } +... +--- +# CHECK-LABEL: name: div_fmas + # CHECK-LABEL: bb.0: # CHECK: S_MOV_B64 # CHECK-NOT: S_NOP @@ -28,11 +35,7 @@ # CHECK: S_NOP # CHECK: S_NOP # CHECK: V_DIV_FMAS_F32 ---- | - define void @test0() { ret void } -... ---- -name: test0 +name: div_fmas body: | bb.0: @@ -57,4 +60,58 @@ %vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec S_ENDPGM + +... + +... +--- +# CHECK-LABEL: name: s_getreg + +# CHECK-LABEL: bb.0: +# CHECK: S_SETREG +# CHECK: S_NOP 0 +# CHECK: S_NOP 0 +# CHECK: S_GETREG + +# CHECK-LABEL: bb.1: +# CHECK: S_SETREG_IMM32 +# CHECK: S_NOP 0 +# CHECK: S_NOP 0 +# CHECK: S_GETREG + +# CHECK-LABEL: bb.2: +# CHECK: S_SETREG +# CHECK: S_NOP 0 +# CHECK: S_GETREG + +# CHECK-LABEL: bb.3: +# CHECK: S_SETREG +# CHECK-NEXT: S_GETREG + +name: s_getreg + +body: | + bb.0: + successors: %bb.1 + S_SETREG_B32 %sgpr0, 1 + %sgpr1 = S_GETREG_B32 1 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + S_SETREG_IMM32_B32 0, 1 + %sgpr1 = S_GETREG_B32 1 + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + S_SETREG_B32 %sgpr0, 1 + %sgpr1 = S_MOV_B32 0 + %sgpr2 = S_GETREG_B32 1 + S_BRANCH %bb.3 + + bb.3: + S_SETREG_B32 %sgpr0, 0 + %sgpr1 = S_GETREG_B32 1 + S_ENDPGM ...