diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -60,6 +60,10 @@ void addClauseInst(const MachineInstr &MI); + // Advance over a MachineInstr bundle. Look for hazards in the bundled + // instructions. + void processBundle(); + int getWaitStatesSince(IsHazardFn IsHazard, int Limit); int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, int Limit); int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit); @@ -80,6 +84,8 @@ int checkAnyInstHazards(MachineInstr *MI); int checkReadM0Hazards(MachineInstr *SMovRel); int checkNSAtoVMEMHazard(MachineInstr *MI); + + void fixHazards(MachineInstr *MI); bool fixVMEMtoScalarWriteHazards(MachineInstr *MI); bool fixSMEMtoVectorWriteHazards(MachineInstr *MI); bool fixVcmpxExecWARHazard(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -124,6 +124,8 @@ ScheduleHazardRecognizer::HazardType GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *MI = SU->getInstr(); + if (MI->isBundle()) + return NoHazard; if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) return NoopHazard; @@ -179,6 +181,37 @@ return NoHazard; } +static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) { + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP)) + .addImm(0); +} + +void GCNHazardRecognizer::processBundle() { + MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator()); + MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end(); + // Check bundled MachineInstr's for hazards. + for (; MI != E && MI->isInsideBundle(); ++MI) { + CurrCycleInstr = &*MI; + unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr); + + if (IsHazardRecognizerMode) + fixHazards(CurrCycleInstr); + + for (unsigned i = 0; i < WaitStates; ++i) + insertNoopInBundle(CurrCycleInstr, TII); + + // It’s unnecessary to track more than MaxLookAhead instructions. Since we + // include the bundled MI directly after, only add a maximum of + // (MaxLookAhead - 1) noops to EmittedInstrs. + for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i) + EmittedInstrs.push_front(nullptr); + + EmittedInstrs.push_front(CurrCycleInstr); + EmittedInstrs.resize(MaxLookAhead); + } + CurrCycleInstr = nullptr; +} + unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) { IsHazardRecognizerMode = false; return PreEmitNoopsCommon(SU->getInstr()); @@ -188,17 +221,15 @@ IsHazardRecognizerMode = true; CurrCycleInstr = MI; unsigned W = PreEmitNoopsCommon(MI); - - fixVMEMtoScalarWriteHazards(MI); - fixSMEMtoVectorWriteHazards(MI); - fixVcmpxExecWARHazard(MI); - fixLdsBranchVmemWARHazard(MI); - + fixHazards(MI); CurrCycleInstr = nullptr; return W; } unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { + if (MI->isBundle()) + return 0; + int WaitStates = std::max(0, checkAnyInstHazards(MI)); if (SIInstrInfo::isSMRD(*MI)) @@ -264,6 +295,11 @@ CurrCycleInstr->isKill()) return; + if (CurrCycleInstr->isBundle()) { + processBundle(); + return; + } + unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr); // Keep track of emitted instructions @@ -304,8 +340,11 @@ int WaitStates, IsExpiredFn IsExpired, DenseSet &Visited) { + for (auto E = MBB->instr_rend(); I != E; ++I) { + // Don't add WaitStates for parent BUNDLE instructions. + if (I->isBundle()) + continue; - for (auto E = MBB->rend() ; I != E; ++I) { if (IsHazard(&*I)) return WaitStates; @@ -437,9 +476,9 @@ // instructions in this group may return out of order and/or may be // replayed (i.e. the same instruction issued more than once). // - // In order to handle these situations correctly we need to make sure - // that when a clause has more than one instruction, no instruction in the - // clause writes to a register that is read another instruction in the clause + // In order to handle these situations correctly we need to make sure that + // when a clause has more than one instruction, no instruction in the clause + // writes to a register that is read by another instruction in the clause // (including itself). If we encounter this situaion, we need to break the // clause by inserting a non SMEM instruction. @@ -525,7 +564,6 @@ // SGPR was written by a VALU Instruction. const int VmemSgprWaitStates = 5; auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; - for (const MachineOperand &Use : VMEM->uses()) { if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) continue; @@ -795,6 +833,13 @@ SMovRelWaitStates); } +void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { + fixVMEMtoScalarWriteHazards(MI); + fixSMEMtoVectorWriteHazards(MI); + fixVcmpxExecWARHazard(MI); + fixLdsBranchVmemWARHazard(MI); +} + bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { if (!ST.hasVMEMtoScalarWriteHazard()) return false; diff --git a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir @@ -0,0 +1,94 @@ +# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK,GCX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX10 %s + +# GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr_hidden_bundle +# GCN: } +# XNACK-NEXT: S_NOP +# NOXNACK-NOT: S_NOP +# GCN: S_LOAD_DWORDX2_IMM +--- +name: break_smem_clause_simple_load_smrd8_ptr_hidden_bundle +body: | + bb.0: + BUNDLE implicit-def $sgpr6_sgpr7 { + $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0 + } + $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 + S_ENDPGM 0 +... + +# GFX9-LABEL: name: hazard_precedes_bundle +# GFX9: S_MOV_B32 +# GFX9-NEXT: S_NOP +# GFX9: BUNDLE +# GFX9-NEXT: S_NOP +--- +name: hazard_precedes_bundle +body: | + bb.0: + $m0 = S_MOV_B32 $sgpr7 + S_SENDMSG 3, implicit $exec, implicit $m0 + $m0 = S_MOV_B32 $sgpr8 + BUNDLE implicit-def $vgpr0 { + $vgpr0 = V_INTERP_P1_F32 killed $vgpr4, 0, 0, implicit $m0, implicit $exec + } + S_ENDPGM 0 +... + +# GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr +# GCN: S_LOAD_DWORDX2_IMM +# GCN-NEXT: } +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_hazard_ignore_bundle_instr +body: | + bb.0: + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + BUNDLE implicit-def $vgpr1, implicit $vgpr0, implicit $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec { + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + } + BUNDLE implicit-def $sgpr0_sgpr1, implicit $sgpr10_sgpr11 { + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 + } + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle +# GCN: bb.2: +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_min_of_two_after_bundle +body: | + bb.0: + successors: %bb.2 + + BUNDLE implicit-def $vgpr1, implicit $vgpr0 { + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + } + S_NOP 0 + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + + BUNDLE implicit-def $vgpr1, implicit $vgpr0 { + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + } + + bb.2: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec +... diff --git a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir @@ -0,0 +1,66 @@ +# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK,GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX10 %s + +# GCN-LABEL: name: break_smem_clause_max_look_ahead_in_bundle +# GCN: S_LOAD_DWORDX2_IMM +# XNACK-NEXT: S_NOP +# NOXNACK-NOT: S_NOP +# GCN: S_LOAD_DWORDX2 +# XNACK-NEXT: S_NOP +# NOXNACK-NOT: S_NOP +# GCN: } +--- +name: break_smem_clause_max_look_ahead_in_bundle +body: | + bb.0: + BUNDLE implicit-def $sgpr6_sgpr7 { + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 + S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0 + S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 4, 0, 0 + S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 8, 0, 0 + S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 12, 0, 0 + S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 16, 0, 0 + $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0 + $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM $sgpr14_sgpr15, 0, 0, 0 + } + S_ENDPGM 0 +... + +# GFX10-LABEL: name: hazard_smem_war_in_bundle +# GFX10: S_LOAD_DWORD_IMM +# GFX10-NEXT: $sgpr_null = S_MOV_B32 0 +# GFX10: V_CMP_EQ_F32 +--- +name: hazard_smem_war_in_bundle +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + BUNDLE implicit-def $sgpr0_sgpr1 { + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + } + S_ENDPGM 0 +... + +# GFX9-LABEL: name: hazard_ignore_dbg_label_in_bundle +# GFX9: DBG_LABEL 6 +# GFX9-NEXT: S_NOP 0 +# GFX9: S_SENDMSG 3, implicit $exec, implicit $m0 +--- +name: hazard_ignore_dbg_label_in_bundle +body: | + bb.0: + BUNDLE { + $m0 = S_MOV_B32 killed $sgpr12 + DBG_LABEL 0 + DBG_LABEL 1 + DBG_LABEL 2 + DBG_LABEL 3 + DBG_LABEL 4 + DBG_LABEL 5 + DBG_LABEL 6 + S_SENDMSG 3, implicit $exec, implicit $m0 + } + S_ENDPGM 0 +...