Index: lib/Target/AMDGPU/GCNHazardRecognizer.h =================================================================== --- lib/Target/AMDGPU/GCNHazardRecognizer.h +++ lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -22,6 +22,8 @@ class MachineFunction; class MachineInstr; +class MachineOperand; +class MachineRegisterInfo; class ScheduleDAG; class SIInstrInfo; class SISubtarget; @@ -51,8 +53,10 @@ int checkSetRegHazards(MachineInstr *SetRegInstr); int createsVALUHazard(const MachineInstr &MI); int checkVALUHazards(MachineInstr *VALU); + int checkVALUHazardsHelper(const MachineOperand &Def, const MachineRegisterInfo &MRI); int checkRWLaneHazards(MachineInstr *RWLane); int checkRFEHazards(MachineInstr *RFE); + int checkInlineAsmHazards(MachineInstr *IA); int checkAnyInstHazards(MachineInstr *MI); int checkReadM0Hazards(MachineInstr *SMovRel); public: Index: lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -125,6 +125,9 @@ checkReadM0Hazards(MI) > 0) return NoopHazard; + if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) + return NoopHazard; + if (checkAnyInstHazards(MI) > 0) return NoopHazard; @@ -162,6 +165,9 @@ return WaitStates; } + if (MI->isInlineAsm()) + return std::max(WaitStates, checkInlineAsmHazards(MI)); + if (isSGetReg(MI->getOpcode())) return std::max(WaitStates, checkGetRegHazards(MI)); @@ -510,31 +516,68 @@ return -1; } +int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, const MachineRegisterInfo &MRI) { + // Helper to check for the hazard where VMEM instructions that store more than + // 8 bytes can have there store data over written by the next instruction. + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + const int VALUWaitStates = 1; + int WaitStatesNeeded = 0; + + if (!TRI->isVGPR(MRI, Def.getReg())) + return WaitStatesNeeded; + unsigned Reg = Def.getReg(); + auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { + int DataIdx = createsVALUHazard(*MI); + return DataIdx >= 0 && + TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); + }; + int WaitStatesNeededForDef = + VALUWaitStates - getWaitStatesSince(IsHazardFn); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + + return WaitStatesNeeded; +} + int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { // This checks for the hazard where VMEM instructions that store more than // 8 bytes can have there store data over written by the next instruction. if (!ST.has12DWordStoreHazard()) return 0; - const SIRegisterInfo *TRI = ST.getRegisterInfo(); const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo(); - - const int VALUWaitStates = 1; int WaitStatesNeeded = 0; for (const MachineOperand &Def : VALU->defs()) { - if (!TRI->isVGPR(MRI, Def.getReg())) - continue; - unsigned Reg = Def.getReg(); - auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { - int DataIdx = createsVALUHazard(*MI); - return DataIdx >= 0 && - TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); - }; - int WaitStatesNeededForDef = - VALUWaitStates - getWaitStatesSince(IsHazardFn); - WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI)); + } + + return WaitStatesNeeded; +} + +int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { + // This checks for hazards associated with inline asm statements. + // Since inline asms can contain just about anything, we use this + // to call/leverage other check*Hazard routines. Note that + // this function doesn't attempt to address all possible inline asm + // hazards (good luck), but is a collection of what has been + // problematic thus far. + + // see checkVALUHazards() + if (!ST.has12DWordStoreHazard()) + return 0; + + const MachineRegisterInfo &MRI = IA->getParent()->getParent()->getRegInfo(); + int WaitStatesNeeded = 0; + + for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands(); + I != E; ++I) { + const MachineOperand &Op = IA->getOperand(I); + if (Op.isReg() && Op.isDef()) { + WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); + } } + return WaitStatesNeeded; } Index: test/CodeGen/AMDGPU/hazard-inlineasm.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/hazard-inlineasm.mir @@ -0,0 +1,24 @@ +# RUN: llc -mcpu=gfx900 -march=amdgcn -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck %s + +# If an INLINEASM statement is preceded by a vmem store of more than 8 bytes *and* +# the INLINEASM defs the vregs holding the data-to-be-stored by that preceding store, +# then the hazard recognizer should insert a s_nop in between them. + +... + +# GCN-LABEL: name: hazard-inlineasm +# CHECK: FLAT_STORE_DWORDX4 +# CHECK-NEXT: S_NOP 0 +# CHECK-NEXT: INLINEASM + +--- +name: hazard-inlineasm + +body: | + bb.0: + FLAT_STORE_DWORDX4 %vgpr49_vgpr50, %vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, implicit %exec, implicit %flat_scr + INLINEASM $"v_mad_u64_u32 $0, $1, $2, $3, $4", 0, 2621450, def %vgpr26_vgpr27, 2818058, def dead %sgpr14_sgpr15, 589833, %sgpr12, 327689, killed %vgpr51, 2621449, %vgpr46_vgpr47 + S_ENDPGM +... + +