Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -30,6 +30,13 @@ class GCNSubtarget; class GCNHazardRecognizer final : public ScheduleHazardRecognizer { +public: + typedef function_ref IsHazardFn; + +private: + // Distinguish if we are called from scheduler or hazard recognizer + bool IsHazardRecognizerMode; + // This variable stores the instruction that has been emitted this cycle. It // will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is // called. @@ -53,11 +60,9 @@ void addClauseInst(const MachineInstr &MI); - int getWaitStatesSince(function_ref IsHazard); - int getWaitStatesSinceDef(unsigned Reg, - function_ref IsHazardDef = - [](MachineInstr *) { return true; }); - int getWaitStatesSinceSetReg(function_ref IsHazard); + int getWaitStatesSince(IsHazardFn IsHazard, int Limit); + int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, int Limit); + int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit); int checkSoftClauseHazards(MachineInstr *SMEM); int checkSMRDHazards(MachineInstr *SMRD); @@ -84,6 +89,7 @@ void EmitNoop() override; unsigned PreEmitNoops(SUnit *SU) override; unsigned PreEmitNoops(MachineInstr *) override; + unsigned PreEmitNoopsCommon(MachineInstr *); void AdvanceCycle() override; void RecedeCycle() override; }; Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -37,6 +37,7 @@ //===----------------------------------------------------------------------===// GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : + IsHazardRecognizerMode(false), CurrCycleInstr(nullptr), MF(MF), ST(MF.getSubtarget()), @@ -172,10 +173,19 @@ } unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) { - return PreEmitNoops(SU->getInstr()); + IsHazardRecognizerMode = false; + return PreEmitNoopsCommon(SU->getInstr()); } unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { + IsHazardRecognizerMode = true; + CurrCycleInstr = MI; + unsigned W = PreEmitNoopsCommon(MI); + CurrCycleInstr = nullptr; + return W; +} + +unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { int WaitStates = std::max(0, checkAnyInstHazards(MI)); if (SIInstrInfo::isSMRD(*MI)) @@ -231,7 +241,7 @@ // Do not track non-instructions which do not affect the wait states. // If included, these instructions can lead to buffer overflow such that // detectable hazards are missed. - if (CurrCycleInstr->getOpcode() == AMDGPU::IMPLICIT_DEF) + if (CurrCycleInstr->isImplicitDef()) return; else if (CurrCycleInstr->isDebugInstr()) return; @@ -265,41 +275,109 @@ // Helper Functions //===----------------------------------------------------------------------===// -int GCNHazardRecognizer::getWaitStatesSince( - function_ref IsHazard) { +typedef function_ref IsExpiredFn; + +// Returns a minimum wait states since \p I walking all predecessors. +// Only scans until \p IsExpired does not return true. +// Can only be run in a hazard recognizer mode. +static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, + MachineBasicBlock *MBB, + MachineBasicBlock::reverse_instr_iterator I, + int WaitStates, + IsExpiredFn IsExpired, + DenseSet &Visited) { + + for (auto E = MBB->rend() ; I != E; ++I) { + if (IsHazard(&*I)) + return WaitStates; + + if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr()) + continue; + + WaitStates += SIInstrInfo::getNumWaitStates(*I); + + if (IsExpired(&*I, WaitStates)) + return std::numeric_limits::max(); + } + + int MinWaitStates = WaitStates; + bool Found = false; + for (MachineBasicBlock *Pred : MBB->predecessors()) { + if (!Visited.insert(Pred).second) + continue; + + int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), + WaitStates, IsExpired, Visited); + + if (W == std::numeric_limits::max()) + continue; + + MinWaitStates = Found ? std::min(MinWaitStates, W) : W; + if (IsExpired(nullptr, MinWaitStates)) + return MinWaitStates; + + Found = true; + } + + if (Found) + return MinWaitStates; + + return std::numeric_limits::max(); +} + +static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, + MachineInstr *MI, + IsExpiredFn IsExpired) { + DenseSet Visited; + return getWaitStatesSince(IsHazard, MI->getParent(), + std::next(MI->getReverseIterator()), + 0, IsExpired, Visited); +} + +int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) { + if (IsHazardRecognizerMode) { + auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) { + return WaitStates >= Limit; + }; + return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn); + } + int WaitStates = 0; for (MachineInstr *MI : EmittedInstrs) { if (MI) { if (IsHazard(MI)) return WaitStates; - unsigned Opcode = MI->getOpcode(); - if (Opcode == AMDGPU::INLINEASM) + if (MI->isInlineAsm()) continue; } ++WaitStates; + + if (WaitStates >= Limit) + break; } return std::numeric_limits::max(); } -int GCNHazardRecognizer::getWaitStatesSinceDef( - unsigned Reg, function_ref IsHazardDef) { +int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg, + IsHazardFn IsHazardDef, + int Limit) { const SIRegisterInfo *TRI = ST.getRegisterInfo(); auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) { return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI); }; - return getWaitStatesSince(IsHazardFn); + return getWaitStatesSince(IsHazardFn, Limit); } -int GCNHazardRecognizer::getWaitStatesSinceSetReg( - function_ref IsHazard) { +int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard, + int Limit) { auto IsHazardFn = [IsHazard] (MachineInstr *MI) { return isSSetReg(MI->getOpcode()) && IsHazard(MI); }; - return getWaitStatesSince(IsHazardFn); + return getWaitStatesSince(IsHazardFn, Limit); } //===----------------------------------------------------------------------===// @@ -397,7 +475,8 @@ if (!Use.isReg()) continue; int WaitStatesNeededForUse = - SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); + SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, + SmrdSgprWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); // This fixes what appears to be undocumented hardware behavior in SI where @@ -410,7 +489,8 @@ if (IsBufferSMRD) { int WaitStatesNeededForUse = SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), - IsBufferHazardDefFn); + IsBufferHazardDefFn, + SmrdSgprWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } } @@ -434,7 +514,8 @@ continue; int WaitStatesNeededForUse = - VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); + VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, + VmemSgprWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } return WaitStatesNeeded; @@ -454,13 +535,16 @@ if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) continue; int WaitStatesNeededForUse = - DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg()); + DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(), + [](MachineInstr *) { return true; }, + DppVgprWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } WaitStatesNeeded = std::max( WaitStatesNeeded, - DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn)); + DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn, + DppExecWaitStates)); return WaitStatesNeeded; } @@ -472,7 +556,8 @@ // instruction. const int DivFMasWaitStates = 4; auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; - int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn); + int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn, + DivFMasWaitStates); return DivFMasWaitStates - WaitStatesNeeded; } @@ -485,7 +570,7 @@ auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) { return GetRegHWReg == getHWReg(TII, *MI); }; - int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); + int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates); return GetRegWaitStates - WaitStatesNeeded; } @@ -499,7 +584,7 @@ auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) { return HWReg == getHWReg(TII, *MI); }; - int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); + int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates); return SetRegWaitStates - WaitStatesNeeded; } @@ -570,7 +655,7 @@ TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); }; int WaitStatesNeededForDef = - VALUWaitStates - getWaitStatesSince(IsHazardFn); + VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); return WaitStatesNeeded; @@ -635,7 +720,8 @@ }; const int RWLaneWaitStates = 4; - int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn); + int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn, + RWLaneWaitStates); return RWLaneWaitStates - WaitStatesSince; } @@ -650,7 +736,7 @@ auto IsHazardFn = [TII] (MachineInstr *MI) { return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS; }; - int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); + int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates); return RFEWaitStates - WaitStatesNeeded; } @@ -674,7 +760,8 @@ return MI->getOpcode() == AMDGPU::S_MOV_FED_B32; }; int WaitStatesNeededForUse = - MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn); + MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn, + MovFedWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } @@ -687,5 +774,6 @@ auto IsHazardFn = [TII] (MachineInstr *MI) { return TII->isSALU(*MI); }; - return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn); + return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, + SMovRelWaitStates); } Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h @@ -837,7 +837,7 @@ void insertReturn(MachineBasicBlock &MBB) const; /// Return the number of wait states that result from executing this /// instruction. - unsigned getNumWaitStates(const MachineInstr &MI) const; + static unsigned getNumWaitStates(const MachineInstr &MI); /// Returns the operand named \p Op. If \p MI does not have an /// operand named \c Op, this function returns nullptr. Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1152,7 +1152,7 @@ } } -unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { +unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return 1; // FIXME: Do wait states equal cycles? Index: llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir +++ llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir @@ -0,0 +1,230 @@ +# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: vmem_vcc_fallthrough +# GCN: bb.1: +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_fallthrough +body: | + bb.0: + successors: %bb.1 + + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec +... +# GCN-LABEL: name: vmem_vcc_branch_to_next +# GCN: bb.1: +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_branch_to_next +body: | + bb.0: + successors: %bb.1 + + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec +... +# GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_too_far +# GCN: bb.1: +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_fallthrough_no_hazard_too_far +body: | + bb.0: + successors: %bb.1 + + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + $sgpr0 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec +... +# GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_nops +# GCN: bb.1: +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_fallthrough_no_hazard_nops +body: | + bb.0: + successors: %bb.1 + + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + S_NOP 4 + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec +... +# GCN-LABEL: name: vmem_vcc_branch_around +# GCN: bb.2: +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_branch_around +body: | + bb.0: + successors: %bb.2 + + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + + bb.2: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec +... +# GCN-LABEL: name: vmem_vcc_branch_backedge +# GCN: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_branch_backedge +body: | + bb.0: + successors: %bb.1 + + $vgpr0 = IMPLICIT_DEF + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + + bb.1: + $vgpr0 = IMPLICIT_DEF + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + S_BRANCH %bb.0 +... +# GCN-LABEL: name: vmem_vcc_min_of_two +# GCN: bb.2: +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_min_of_two +body: | + bb.0: + successors: %bb.2 + + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + S_NOP 0 + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + + bb.2: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec +... +# GCN-LABEL: name: vmem_vcc_self_loop +# GCN: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_self_loop +body: | + bb.0: + successors: %bb.0 + + $vgpr0 = IMPLICIT_DEF + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + S_BRANCH %bb.0 +... +# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop1 +# GCN: bb.1: +# GCN: $sgpr0 = S_MOV_B32 0 +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_min_of_two_self_loop1 +body: | + bb.0: + successors: %bb.1 + + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + + bb.1: + successors: %bb.1 + + $sgpr0 = S_MOV_B32 0 + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec + S_BRANCH %bb.1 +... +# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop2 +# GCN: bb.1: +# GCN: $sgpr0 = S_MOV_B32 0 +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: vmem_vcc_min_of_two_self_loop2 +body: | + bb.0: + successors: %bb.1 + + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + S_NOP 0 + + bb.1: + successors: %bb.1 + + $sgpr0 = S_MOV_B32 0 + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec + S_BRANCH %bb.1 +...