diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -928,44 +928,56 @@ // There may be a branch to the layout successor. This is unlikely but it // happens. The correct thing to do is to remove the branch before // duplicating the instructions in all cases. - TII->removeBranch(*PrevBB); - if (PreRegAlloc) { - DenseMap LocalVRMap; - SmallVector, 4> CopyInfos; - MachineBasicBlock::iterator I = TailBB->begin(); - // Process PHI instructions first. - while (I != TailBB->end() && I->isPHI()) { - // Replace the uses of the def of the PHI with the register coming - // from PredBB. - MachineInstr *MI = &*I++; - processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); - } + bool RemovedBranches = TII->removeBranch(*PrevBB) != 0; + + // If there are still tail instructions, abort the merge + if (PrevBB->getFirstTerminator() == PrevBB->end()) { + if (PreRegAlloc) { + DenseMap LocalVRMap; + SmallVector, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, + true); + } - // Now copy the non-PHI instructions. - while (I != TailBB->end()) { - // Replace def of virtual registers with new registers, and update - // uses with PHI source register or the new registers. - MachineInstr *MI = &*I++; - assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); - duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); - MI->eraseFromParent(); + // Now copy the non-PHI instructions. + while (I != TailBB->end()) { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + MachineInstr *MI = &*I++; + assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); + duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); + MI->eraseFromParent(); + } + appendCopies(PrevBB, CopyInfos, Copies); + } else { + TII->removeBranch(*PrevBB); + // No PHIs to worry about, just splice the instructions over. + PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } - appendCopies(PrevBB, CopyInfos, Copies); - } else { - TII->removeBranch(*PrevBB); - // No PHIs to worry about, just splice the instructions over. - PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); - } - PrevBB->removeSuccessor(PrevBB->succ_begin()); - assert(PrevBB->succ_empty()); - PrevBB->transferSuccessors(TailBB); + PrevBB->removeSuccessor(PrevBB->succ_begin()); + assert(PrevBB->succ_empty()); + PrevBB->transferSuccessors(TailBB); - // Update branches in PrevBB based on Tail's layout successor. - if (ShouldUpdateTerminators) - PrevBB->updateTerminator(TailBB->getNextNode()); + // Update branches in PrevBB based on Tail's layout successor. + if (ShouldUpdateTerminators) + PrevBB->updateTerminator(TailBB->getNextNode()); - TDBBs.push_back(PrevBB); - Changed = true; + TDBBs.push_back(PrevBB); + Changed = true; + } else { + LLVM_DEBUG(dbgs() << "Abort merging blocks, the predecessor still " + "contains terminator instructions"); + // Return early if no changes were made + if (!Changed) + return RemovedBranches; + } + Changed |= RemovedBranches; } // If this is after register allocation, there are no phis to fix. diff --git a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir @@ -0,0 +1,60 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s + +# Early tail duplication should not merge bb.6 into bb.5, adding a +# non-terminator (S_SLEEP) after the terminator S_MOV_B32_term. + +--- +name: tail_duplicate_terminator +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: tail_duplicate_terminator + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_SLEEP 1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.3, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_SLEEP 2 + ; CHECK-NEXT: S_BRANCH %bb.1 + bb.1: + S_BRANCH %bb.3 + + bb.2: + S_SLEEP 1 + + bb.3: + %0:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo + + bb.4: + SI_WATERFALL_LOOP %bb.4, implicit $exec + + bb.5: + $exec_lo = S_MOV_B32_term %0 + + bb.6: + S_SLEEP 2 + S_BRANCH %bb.2 +...