Index: lib/Target/AMDGPU/GCNHazardRecognizer.h =================================================================== --- lib/Target/AMDGPU/GCNHazardRecognizer.h +++ lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -51,6 +51,7 @@ int createsVALUHazard(const MachineInstr &MI); int checkVALUHazards(MachineInstr *VALU); int checkRWLaneHazards(MachineInstr *RWLane); + int checkRFEHazards(MachineInstr *RFE); public: GCNHazardRecognizer(const MachineFunction &MF); // We can only issue one instruction per cycle. Index: lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -54,7 +54,11 @@ return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32; } -static bool getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { +static bool isRFE(unsigned Opcode) { + return Opcode == AMDGPU::S_RFE_B64; +} + +static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, AMDGPU::OpName::simm16); @@ -89,6 +93,9 @@ if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0) return NoopHazard; + if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) + return NoopHazard; + return NoHazard; } @@ -124,6 +131,9 @@ if (isSSetReg(MI->getOpcode())) return std::max(0, checkSetRegHazards(MI)); + if (isRFE(MI->getOpcode())) + return std::max(0, checkRFEHazards(MI)); + return 0; } @@ -464,3 +474,19 @@ int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn); return RWLaneWaitStates - WaitStatesSince; } + +int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { + + if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + return 0; + + const SIInstrInfo *TII = ST.getInstrInfo(); + + const int RFEWaitStates = 1; + + auto IsHazardFn = [TII] (MachineInstr *MI) { + return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS; + }; + int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); + return RFEWaitStates - WaitStatesNeeded; +} Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -198,6 +198,13 @@ enum Id { // HwRegCode, (6) [5:0] ID_UNKNOWN_ = -1, ID_SYMBOLIC_FIRST_ = 1, // There are corresponding symbolic names defined. + ID_MODE = 1, + ID_STATUS = 2, + ID_TRAPSTS = 3, + ID_HW_ID = 4, + ID_GPR_ALLOC = 5, + ID_LDS_ALLOC = 6, + ID_IB_STS = 7, ID_SYMBOLIC_LAST_ = 8, ID_SHIFT_ = 0, ID_WIDTH_ = 6, Index: test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir =================================================================== --- test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir +++ test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir @@ -8,6 +8,7 @@ define void @s_setreg() { ret void } define void @vmem_gt_8dw_store() { ret void } define void @readwrite_lane() { ret void } + define void @rfe() { ret void } ... --- # GCN-LABEL: name: div_fmas @@ -300,3 +301,33 @@ S_ENDPGM ... + +... +--- + +# GCN-LABEL: name: rfe + +# GCN-LABEL: bb.0: +# GCN: S_SETREG +# VI: S_NOP +# GCN-NEXT: S_RFE_B64 + +# GCN-LABEL: bb.1: +# GCN: S_SETREG +# GCN-NEXT: S_RFE_B64 + +name: rfe + +body: | + bb.0: + successors: %bb.1 + S_SETREG_B32 %sgpr0, 3 + S_RFE_B64 %sgpr2_sgpr3 + S_BRANCH %bb.1 + + bb.1: + S_SETREG_B32 %sgpr0, 0 + S_RFE_B64 %sgpr2_sgpr3 + S_ENDPGM + +...