diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -684,6 +684,26 @@ } bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) { + auto GetFallThroughSucc = [=](MachineBasicBlock *MBB) { + MachineBasicBlock *Ret = nullptr; + for (auto *S : MBB->successors()) { + if (MBB->isLayoutSuccessor(S)) { + // The only fallthrough candidate + MachineBasicBlock::iterator I(MBB->getFirstInstrTerminator()); + MachineBasicBlock::iterator E = MBB->end(); + for (; I != E; I++) { + if (I->isBranch() && TII->getBranchDestBlock(*I) == S) + // We have unoptimized branch to layout successor + break; + } + if (I == E) + Ret = S; + break; + } + } + return Ret; + }; + bool Redundant = true; for (auto &I : MBB.instrs()) { if (!I.isDebugInstr() && !I.isUnconditionalBranch()) @@ -692,25 +712,11 @@ if (Redundant) { MachineBasicBlock *Succ = *MBB.succ_begin(); SmallVector Preds(MBB.predecessors()); + MachineBasicBlock *FallThrough = nullptr; for (auto P : Preds) { - P->replaceSuccessor(&MBB, Succ); - MachineBasicBlock::iterator I(P->getFirstInstrTerminator()); - while (I != P->end()) { - if (I->isBranch()) { - if (TII->getBranchDestBlock(*I) == &MBB) { - I->getOperand(0).setMBB(Succ); - break; - } - } - I++; - } - if (I == P->end()) { - MachineFunction *MF = P->getParent(); - MachineFunction::iterator InsertPt = - P->getNextNode() ? MachineFunction::iterator(P->getNextNode()) - : MF->end(); - MF->splice(InsertPt, Succ); - } + if (GetFallThroughSucc(P) == &MBB) + FallThrough = P; + P->ReplaceUsesOfBlockWith(&MBB, Succ); } MBB.removeSuccessor(Succ); if (LIS) { @@ -719,6 +725,17 @@ } MBB.clear(); MBB.eraseFromParent(); + if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) { + MachineFunction *MF = MBB.getParent(); + if (!GetFallThroughSucc(Succ)) { + MachineFunction::iterator InsertPt(FallThrough->getNextNode()); + MF->splice(InsertPt, Succ); + } else + BuildMI(*FallThrough, FallThrough->end(), + FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH)) + .addMBB(Succ); + } + return true; } return false; diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -582,3 +582,119 @@ S_ENDPGM 0 ... + +--- +# redundant MBB removal correctness test: +# we can keep bb.2 fallthrough to the new succ because after bb.3 gets removed +# new succ (bb.4) becomes bb.2's layout successor +name: removed_succ_fallthrough_but_layout_successor +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: removed_succ_fallthrough_but_layout_successor + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; GCN: bb.2: + ; GCN: successors: %bb.4(0x80000000) + ; GCN: bb.4: + ; GCN: successors: %bb.5(0x80000000) + ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN: bb.5: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.4 + + %0:sreg_64 = SI_IF undef %1:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.1: + successors: %bb.2, %bb.3 + + %2:sreg_64 = SI_IF undef %3:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.2: + + bb.3: + SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + + bb.4: + SI_END_CF %0:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.5: + S_ENDPGM 0 + +... + +--- +# redundant MBB removal correctness test: +# If one of the remdundant block preds has a fallthrough path, but the only redundant block succ is not +# going to be a layout successor to that pred after redundant block removal, we should not rearrange +# blocks to keep pred's fallthrough path, if the succ has fallthrough path to one of it's succ too. + +name: deleted_succ_fallthrough_not_layout_successor +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: deleted_succ_fallthrough_not_layout_successor + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; GCN: bb.2: + ; GCN: successors: %bb.5(0x80000000) + ; GCN: S_BRANCH %bb.5 + ; GCN: bb.4: + ; GCN: S_ENDPGM 0 + ; GCN: bb.5: + ; GCN: successors: %bb.6(0x80000000) + ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN: bb.6: + ; GCN: successors: %bb.4(0x80000000) + ; GCN: S_BRANCH %bb.4 + bb.0: + successors: %bb.1, %bb.4 + + %0:sreg_64 = SI_IF undef %1:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.1: + successors: %bb.2, %bb.3 + + %2:sreg_64 = SI_IF undef %3:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.2: + + bb.3: + SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.5 + + bb.4: + S_ENDPGM 0 + + + bb.5: + SI_END_CF %0:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.6: + S_BRANCH %bb.4 + +...