diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -79,6 +79,7 @@ SetVector LoweredEndCf; DenseSet LoweredIf; SmallSet KillBlocks; + SmallSet RecomputeRegs; const TargetRegisterClass *BoolRC = nullptr; unsigned AndOpc; @@ -297,8 +298,7 @@ // FIXME: Is there a better way of adjusting the liveness? It shouldn't be // hard to add another def here but I'm not sure how to correctly update the // valno. - LIS->removeInterval(SaveExecReg); - LIS->createAndComputeVirtRegInterval(SaveExecReg); + RecomputeRegs.insert(SaveExecReg); LIS->createAndComputeVirtRegInterval(Tmp); if (!SimpleIf) LIS->createAndComputeVirtRegInterval(CopyReg); @@ -309,6 +309,7 @@ const DebugLoc &DL = MI.getDebugLoc(); Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); MachineBasicBlock::iterator Start = MBB.begin(); @@ -319,7 +320,7 @@ BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg) .add(MI.getOperand(1)); // Saved EXEC if (LV) - LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *OrSaveExec); + LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec); MachineBasicBlock *DestBB = MI.getOperand(2).getMBB(); @@ -331,9 +332,6 @@ .addReg(Exec) .addReg(SaveReg); - if (LIS) - LIS->InsertMachineInstrInMaps(*And); - MachineInstr *Xor = BuildMI(MBB, ElsePt, DL, TII->get(XorTermrOpc), Exec) .addReg(Exec) @@ -356,12 +354,13 @@ MI.eraseFromParent(); LIS->InsertMachineInstrInMaps(*OrSaveExec); + LIS->InsertMachineInstrInMaps(*And); LIS->InsertMachineInstrInMaps(*Xor); LIS->InsertMachineInstrInMaps(*Branch); - LIS->removeInterval(DstReg); - LIS->createAndComputeVirtRegInterval(DstReg); + RecomputeRegs.insert(SrcReg); + RecomputeRegs.insert(DstReg); LIS->createAndComputeVirtRegInterval(SaveReg); // Let this be recomputed. @@ -388,8 +387,9 @@ // AND the break condition operand with exec, then OR that into the "loop // exit" mask. MachineInstr *And = nullptr, *Or = nullptr; + Register AndReg; if (!SkipAnding) { - Register AndReg = MRI->createVirtualRegister(BoolRC); + AndReg = MRI->createVirtualRegister(BoolRC); And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg) .addReg(Exec) .add(MI.getOperand(1)); @@ -398,8 +398,6 @@ Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst) .addReg(AndReg) .add(MI.getOperand(2)); - if (LIS) - LIS->createAndComputeVirtRegInterval(AndReg); } else { Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst) .add(MI.getOperand(1)) @@ -411,9 +409,13 @@ LV->replaceKillInstruction(MI.getOperand(2).getReg(), MI, *Or); if (LIS) { - if (And) - LIS->InsertMachineInstrInMaps(*And); LIS->ReplaceMachineInstrInMaps(MI, *Or); + if (And) { + // Read of original operand 1 is on And now not Or. + RecomputeRegs.insert(And->getOperand(2).getReg()); + LIS->InsertMachineInstrInMaps(*And); + LIS->createAndComputeVirtRegInterval(AndReg); + } } MI.eraseFromParent(); @@ -436,6 +438,7 @@ .add(MI.getOperand(1)); if (LIS) { + RecomputeRegs.insert(MI.getOperand(0).getReg()); LIS->ReplaceMachineInstrInMaps(MI, *AndN2); LIS->InsertMachineInstrInMaps(*Branch); } @@ -714,11 +717,13 @@ if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) { // This should be before all vector instructions. - BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(), + MachineInstr *InitMI = BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(), TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec) .addImm(MI.getOperand(0).getImm()); - if (LIS) + if (LIS) { LIS->RemoveMachineInstrFromMaps(MI); + LIS->InsertMachineInstrInMaps(*InitMI); + } MI.eraseFromParent(); return; } @@ -789,8 +794,7 @@ LIS->InsertMachineInstrInMaps(*CmpMI); LIS->InsertMachineInstrInMaps(*CmovMI); - LIS->removeInterval(InputReg); - LIS->createAndComputeVirtRegInterval(InputReg); + RecomputeRegs.insert(InputReg); LIS->createAndComputeVirtRegInterval(CountReg); } @@ -807,7 +811,7 @@ while (!MBB.predecessors().empty()) { MachineBasicBlock *P = *MBB.pred_begin(); - if (P->getFallThrough() == &MBB) + if (P->getFallThrough(false) == &MBB) FallThrough = P; P->ReplaceUsesOfBlockWith(&MBB, Succ); } @@ -828,14 +832,13 @@ MBB.clear(); MBB.eraseFromParent(); if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) { - if (!Succ->canFallThrough()) { - MachineFunction *MF = FallThrough->getParent(); - MachineFunction::iterator FallThroughPos(FallThrough); - MF->splice(std::next(FallThroughPos), Succ); - } else - BuildMI(*FallThrough, FallThrough->end(), - FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH)) - .addMBB(Succ); + // Note: we cannot update block layout and preserve live intervals; + // hence we must insert a branch. + MachineInstr *BranchMI = BuildMI(*FallThrough, FallThrough->end(), + FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH)) + .addMBB(Succ); + if (LIS) + LIS->InsertMachineInstrInMaps(*BranchMI); } return true; @@ -947,6 +950,14 @@ optimizeEndCf(); + if (LIS) { + for (Register Reg : RecomputeRegs) { + LIS->removeInterval(Reg); + LIS->createAndComputeVirtRegInterval(Reg); + } + } + + RecomputeRegs.clear(); LoweredEndCf.clear(); LoweredIf.clear(); KillBlocks.clear(); diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -446,15 +446,16 @@ ; GCN-NEXT: bb.2: ; GCN-NEXT: successors: %bb.5(0x80000000) ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.5 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: S_BRANCH %bb.4 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc - ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -923,7 +924,6 @@ S_BRANCH %bb.1 bb.1: - ; predecessors: %bb.0 successors: %bb.2, %bb.6 %3:vgpr_32 = IMPLICIT_DEF @@ -932,7 +932,6 @@ S_BRANCH %bb.2 bb.2: - ; predecessors: %bb.1 successors: %bb.3, %bb.7 %6:vgpr_32 = IMPLICIT_DEF @@ -941,7 +940,6 @@ S_BRANCH %bb.3 bb.3: - ; predecessors: %bb.2 successors: %bb.4, %bb.5 %9:vgpr_32 = IMPLICIT_DEF @@ -950,40 +948,34 @@ S_BRANCH %bb.4 bb.4: - ; predecessors: %bb.3 successors: %bb.5 S_BRANCH %bb.5 bb.5: - ; predecessors: %bb.3, %bb.4 successors: %bb.7 SI_END_CF %11:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.7 bb.6: - ; predecessors: %bb.1, %bb.13 successors: %bb.14 SI_END_CF %5:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.14 bb.7: - ; predecessors: %bb2, %bb.5 successors: %bb.8 SI_END_CF %8:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.8 bb.8: - ; predecessors: %bb.7 successors: %bb.9 S_BRANCH %bb.9 bb.9: - ; predecessors: %bb.8 successors: %bb.11, %bb.12 %12:vgpr_32 = IMPLICIT_DEF @@ -992,33 +984,28 @@ S_BRANCH %bb.11 bb.10: - ; predecessors: %bb.12 successors: %bb.13 S_BRANCH %bb.13 bb.11: - ; predecessors: %bb.9 successors: %bb.12 S_BRANCH %bb.12 bb.12: - ; predecessors: %bb.9, %bb.11 successors: %bb.10, %bb.13 %15:sreg_64 = SI_ELSE %14:sreg_64, %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.10 bb.13: - ; predecessors: %bb.10, %bb.12 successors: %bb.6 SI_END_CF %15:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.6 bb.14: - ; predecessors: %bb.0, %bb.6 SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir @@ -0,0 +1,334 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -run-pass=liveintervals -run-pass=si-lower-control-flow -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs -o - %s | FileCheck %s + +# Check that verifier passes for the following. + +# Caused: Live segment doesn't end at a valid instruction +--- +name: _amdgpu_cs_main1 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: _amdgpu_cs_main1 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_]], [[COPY1]], implicit-def dead $scc + ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, %3, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 [[S_XOR_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[S_OR_SAVEEXEC_B32_]], implicit-def $scc + ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_B32_1]], implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_BRANCH %bb.1 + bb.0: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + liveins: $vgpr0 + + %2:vgpr_32 = COPY killed $vgpr0 + %6:sreg_32 = V_CMP_NE_U32_e64 0, killed %2, implicit $exec + %0:sreg_32 = SI_IF killed %6, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.1: + SI_END_CF killed %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 + + bb.2: + successors: %bb.3(0x80000000) + + + bb.3: + successors: %bb.4(0x40000000), %bb.1(0x40000000) + + %1:sreg_32 = SI_ELSE killed %0, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.4 + + bb.4: + successors: %bb.1(0x80000000) + + S_BRANCH %bb.1 + +... + +# Caused: Assertion `itr != mi2iMap.end() && "Instruction not in maps."' failed. +--- +name: _amdgpu_cs_main2 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: _amdgpu_cs_main2 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 1, [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY1]] + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[V_CMP_GT_I32_e64_]], implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[COPY2]], implicit-def $scc + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_OR_B32_]] + ; CHECK-NEXT: $exec_lo = S_ANDN2_B32_term $exec_lo, [[S_OR_B32_]], implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_]], implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $vgpr0 + + %4:vgpr_32 = COPY killed $vgpr0 + %8:sreg_32 = V_CMP_GT_I32_e64 1, killed %4, implicit $exec + %6:sreg_32 = S_MOV_B32 0 + %10:sreg_32 = COPY killed %6 + + bb.1: + successors: %bb.2(0x04000000), %bb.1(0x7c000000) + + %1:sreg_32 = COPY killed %10 + %2:sreg_32 = SI_IF_BREAK %8, killed %1, implicit-def dead $scc + %10:sreg_32 = COPY %2 + SI_LOOP %2, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + SI_END_CF killed %2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 + +... + +# Caused: Live range continues after kill flag +--- +name: _amdgpu_cs_main3 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: _amdgpu_cs_main3 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, [[COPY]], 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[V_CMP_NGT_F32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_NLT_F32_e64 0, 0, 0, [[COPY]], 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[V_CMP_NLT_F32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_1]] + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY1]], implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.4(0x40000000) + liveins: $vgpr0 + + %2:vgpr_32 = COPY killed $vgpr0 + %5:sreg_32 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, %2, 0, implicit $mode, implicit $exec + %0:sreg_32 = SI_IF killed %5, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %7:sreg_32 = nofpexcept V_CMP_NLT_F32_e64 0, 0, 0, killed %2, 0, implicit $mode, implicit $exec + %1:sreg_32 = SI_IF killed %7, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3(0x80000000) + + + bb.3: + successors: %bb.4(0x80000000) + + SI_END_CF killed %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.4: + SI_END_CF killed %0, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 + +... + +# Caused: Live range continues after dead def flag +--- +name: _amdgpu_cs_main4 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: _amdgpu_cs_main4 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $exec_lo + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]] + ; CHECK-NEXT: S_BRANCH %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE %9, %subreg.sub0, %9, %subreg.sub1, %9, %subreg.sub2, %9, %subreg.sub3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY %11 + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY6]], [[REG_SEQUENCE]], 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY1]], implicit-def $scc + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x04000000), %bb.6(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY8]] + ; CHECK-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY]], [[S_FF1_I32_B32_]] + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY7]], [[V_READLANE_B32_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 1, [[S_FF1_I32_B32_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[COPY8]], [[S_LSHL_B32_]], implicit-def dead $scc + ; CHECK-NEXT: S_CMP_LG_U32 [[S_ANDN2_B32_]], 0, implicit-def $scc + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_ADD_I32_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_ANDN2_B32_]] + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY2]], 0, implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 0, [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY9]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: dead [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_1]], [[COPY9]], implicit-def dead $scc + ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_1]] + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + bb.0: + successors: %bb.1(0x40000000), %bb.5(0x40000000) + liveins: $vgpr0 + + %8:vgpr_32 = COPY killed $vgpr0 + %10:sreg_32 = S_MOV_B32 0 + %11:sreg_32 = V_CMP_NE_U32_e64 0, %8, implicit $exec + %0:sreg_32 = SI_IF killed %11, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.6(0x80000000) + + %13:sreg_32 = COPY $exec_lo + %1:sreg_32 = COPY %13 + %25:sreg_32 = COPY killed %10 + %26:sreg_32 = COPY killed %1 + S_BRANCH %bb.6 + + bb.2: + successors: %bb.3(0x80000000) + + %23:sgpr_128 = REG_SEQUENCE killed %19, %subreg.sub0, %19, %subreg.sub1, %19, %subreg.sub2, %19, %subreg.sub3 + %24:vgpr_32 = COPY killed %4 + BUFFER_ATOMIC_ADD_OFFSET killed %24, killed %23, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + + bb.3: + successors: %bb.5(0x80000000) + + SI_END_CF killed %7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.5 + + bb.4: + S_ENDPGM 0 + + bb.5: + successors: %bb.4(0x80000000) + + SI_END_CF killed %0, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.4 + + bb.6: + successors: %bb.7(0x04000000), %bb.6(0x7c000000) + + %2:sreg_32 = COPY killed %25 + %3:sreg_32 = COPY killed %26 + %14:sreg_32 = S_FF1_I32_B32 %3 + %15:sreg_32 = V_READLANE_B32 %8, %14 + %4:sreg_32 = S_ADD_I32 killed %2, killed %15, implicit-def dead $scc + %17:sreg_32 = S_LSHL_B32 1, killed %14, implicit-def dead $scc + %5:sreg_32 = S_ANDN2_B32 killed %3, killed %17, implicit-def dead $scc + S_CMP_LG_U32 %5, 0, implicit-def $scc + %25:sreg_32 = COPY %4 + %26:sreg_32 = COPY killed %5 + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + S_BRANCH %bb.7 + + bb.7: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %19:sreg_32 = S_MOV_B32 0 + %20:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed %13, 0, implicit $exec + %21:sreg_32 = V_CMP_EQ_U32_e64 0, killed %20, implicit $exec + %7:sreg_32 = SI_IF killed %21, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + +...