Index: lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- lib/Target/AMDGPU/SILowerControlFlow.cpp +++ lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -130,6 +130,19 @@ unsigned NumInstr = 0; + // Check whether 'To MBB' is before 'From MBB', this is possible after Block + // Placement Pass: + // bb2<--+ bb2 (From) Block Flow (To) + // / \ | bb6 => Placement => bb2 (From) + // bb6 -> Flow Flow (To) bb6 + for (MachineFunction::iterator MBBI = MachineFunction::iterator(From), + ToI = MachineFunction::iterator(To); + ToI != From->getParent()->end(); ++ToI) { + // return true so we generate conditional branch for 'From MBB' + if (MBBI == ToI) + return true; + } + for (MachineFunction::iterator MBBI = MachineFunction::iterator(From), ToI = MachineFunction::iterator(To); MBBI != ToI; ++MBBI) { @@ -295,8 +308,38 @@ .addReg(AMDGPU::EXEC) .addReg(Src); - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addOperand(MI.getOperand(1)); + // After Block Placement Pass, Latch block might before Header block + // bb2<--+ Flow (Latch) MBB + // / \ | bb2 (Header) NextBB + // bb6 -> Flow bb6 + // + // If this is the case, then 'Flow MBB' can fall through 'bb2 MBB'. But we + // need to change branch condition for 'Flow MBB' + + // Check if Latch is before Header. + // %Flow: + // SI_LOOP %SGPR0_SGPR1, bb2, .. + // S_BRANCH end + // %bb2: + // .. + MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); + if (&NextBB == MI.getOperand(1).getMBB()) { + MachineInstr &NextMI = *std::next(MachineBasicBlock::iterator(MI)); + + assert(NextMI.getOpcode() == AMDGPU::S_BRANCH && + "Next instruction of SI_LOOP should be S_BRANCH"); + + // Result: + // s_cbranch_execz end + // s_branch end + // The 's_branch end' is removed at Branch() + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) + .addOperand(NextMI.getOperand(0)); + } + else { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addOperand(MI.getOperand(1)); + } MI.eraseFromParent(); } @@ -315,10 +358,20 @@ } void SILowerControlFlow::Branch(MachineInstr &MI) { + // If these aren't equal, this is probably an infinite loop. if (MI.getOperand(0).getMBB() == MI.getParent()->getNextNode()) MI.eraseFromParent(); - - // If these aren't equal, this is probably an infinite loop. + else if (&*MI.getParent()->begin() != &MI) { + MachineInstr &PrevMI = *std::prev(MachineBasicBlock::iterator(MI)); + + // Look at this pattern (see comments in Loop()): + // s_cbranch_execz end + // s_branch end + // Remove 's_branch end' + if (PrevMI.getOpcode() == AMDGPU::S_CBRANCH_EXECZ && + PrevMI.getOperand(0).getMBB() == MI.getOperand(0).getMBB()) + MI.eraseFromParent(); + } } void SILowerControlFlow::Kill(MachineInstr &MI) {