diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -68,6 +68,11 @@ cl::desc("Allow relaxed uniform region checks"), cl::init(true)); +static cl::opt + ReorderNodeSize("structurizecfg-node-reorder-size", + cl::desc("Limit region size for reordering nodes"), + cl::init(100), cl::Hidden); + // Definition of the complex types used in this pass. using BBValuePair = std::pair; @@ -262,6 +267,8 @@ void orderNodes(); + void reorderNodes(); + void analyzeLoops(RegionNode *N); Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); @@ -420,6 +427,57 @@ } } +/// Change the node ordering to decrease the range of live values, especially +/// the values that capture the control flow path for branches. We do this +/// by moving blocks with a single predecessor and successor to appear after +/// predecessor. The motivation is to move some loop exit blocks into a loop. +/// In cases where a loop has a large number of exit blocks, this reduces the +/// amount of values needed across the loop boundary. +void StructurizeCFG::reorderNodes() { + SmallVector NewOrder; + DenseMap MoveTo; + BitVector Moved(Order.size()); + + // The benefits of reordering nodes occurs for large regions. + if (Order.size() <= ReorderNodeSize) + return; + + // The algorithm works with two passes over Order. The first pass identifies + // the blocks to move and the position to move them to. The second pass + // creates the new order based upon this information. We move blocks with + // a single predecessor and successor. If there are multiple candidates then + // maintain the original order. + BBSet Seen; + for (int I = Order.size() - 1; I >= 0; --I) { + auto *BB = Order[I]->getEntry(); + Seen.insert(BB); + auto *Pred = BB->getSinglePredecessor(); + auto *Succ = BB->getSingleSuccessor(); + // Consider only those basic blocks that have a predecessor in Order and a + // successor that exits the region. The region may contain subregions that + // have been structurized and are not included in Order. + if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() && + !MoveTo.count(Pred)) { + MoveTo[Pred] = I; + Moved.set(I); + } + } + + // If no blocks have been moved then the original order is good. + if (!Moved.count()) + return; + + for (size_t I = 0, E = Order.size(); I < E; ++I) { + auto *BB = Order[I]->getEntry(); + if (MoveTo.count(BB)) + NewOrder.push_back(Order[MoveTo[BB]]); + if (!Moved[I]) + NewOrder.push_back(Order[I]); + } + + Order.assign(NewOrder); +} + /// Determine the end of the loops void StructurizeCFG::analyzeLoops(RegionNode *N) { if (N->isSubRegion()) { @@ -1081,6 +1139,7 @@ ParentRegion = R; orderNodes(); + reorderNodes(); collectInfos(); createFlow(); insertConditions(false); diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll --- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s @@ -48,9 +47,9 @@ ; GCN-NEXT: s_endpgm ; IR-LABEL: @reduced_nested_loop_conditions( ; IR-NEXT: bb: -; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4 +; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4:[0-9]+]] ; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* [[ARG:%.*]], i32 [[MY_TMP]] -; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]] +; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]], align 4 ; IR-NEXT: br label [[BB5:%.*]] ; IR: bb3: ; IR-NEXT: br i1 true, label [[BB4:%.*]], label [[BB13:%.*]] @@ -84,7 +83,7 @@ ; IR: bb16: ; IR-NEXT: [[MY_TMP17:%.*]] = extractelement <2 x i32> [[MY_TMP15]], i64 1 ; IR-NEXT: [[MY_TMP18:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* undef, i32 [[MY_TMP17]] -; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]] +; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]], align 4 ; IR-NEXT: br label [[BB20]] ; IR: bb20: ; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], [[BB16]] ], [ 0, [[BB13]] ] @@ -93,6 +92,7 @@ ; IR: bb23: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]]) ; IR-NEXT: ret void +; bb: %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %my.tmp1 = getelementptr inbounds i64, i64 addrspace(3)* %arg, i32 %my.tmp @@ -190,19 +190,19 @@ ; GCN-NEXT: s_endpgm ; IR-LABEL: @nested_loop_conditions( ; IR-NEXT: bb: -; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4 +; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4]] ; IR-NEXT: [[MY_TMP1:%.*]] = zext i32 [[MY_TMP]] to i64 ; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[ARG:%.*]], i64 [[MY_TMP1]] ; IR-NEXT: [[MY_TMP3:%.*]] = load i64, i64 addrspace(1)* [[MY_TMP2]], align 16 ; IR-NEXT: [[MY_TMP932:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* undef, align 16 ; IR-NEXT: [[MY_TMP1033:%.*]] = extractelement <4 x i32> [[MY_TMP932]], i64 0 -; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; IR-NEXT: [[MY_TMP1235:%.*]] = icmp slt i32 [[MY_TMP1134]], 9 ; IR-NEXT: br i1 [[MY_TMP1235]], label [[BB14_LR_PH:%.*]], label [[FLOW:%.*]] ; IR: bb14.lr.ph: ; IR-NEXT: br label [[BB14:%.*]] ; IR: Flow3: -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP21:%.*]]) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP20:%.*]]) ; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP14:%.*]]) ; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 ; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 @@ -244,7 +244,7 @@ ; IR-NEXT: [[TMP17:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP16]]) ; IR-NEXT: br i1 [[TMP17]], label [[FLOW2:%.*]], label [[BB14]] ; IR: bb18: -; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; IR-NEXT: [[MY_TMP20:%.*]] = icmp slt i32 [[MY_TMP19]], 9 ; IR-NEXT: br i1 [[MY_TMP20]], label [[BB21]], label [[BB18]] ; IR: bb21: @@ -261,21 +261,22 @@ ; IR-NEXT: [[MY_TMP8:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 [[MY_TMP7]] ; IR-NEXT: [[MY_TMP9]] = load <4 x i32>, <4 x i32> addrspace(1)* [[MY_TMP8]], align 16 ; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0 -; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; IR-NEXT: [[MY_TMP12]] = icmp sge i32 [[MY_TMP11]], 9 ; IR-NEXT: br label [[FLOW1]] ; IR: Flow2: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]]) -; IR-NEXT: [[TMP19:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]]) -; IR-NEXT: [[TMP20:%.*]] = extractvalue { i1, i64 } [[TMP19]], 0 -; IR-NEXT: [[TMP21]] = extractvalue { i1, i64 } [[TMP19]], 1 -; IR-NEXT: br i1 [[TMP20]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]] +; IR-NEXT: [[TMP18:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]]) +; IR-NEXT: [[TMP19:%.*]] = extractvalue { i1, i64 } [[TMP18]], 0 +; IR-NEXT: [[TMP20]] = extractvalue { i1, i64 } [[TMP18]], 1 +; IR-NEXT: br i1 [[TMP19]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]] ; IR: bb31.loopexit: ; IR-NEXT: br label [[FLOW3]] ; IR: bb31: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]]) -; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef +; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef, align 4 ; IR-NEXT: ret void +; bb: %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %my.tmp1 = zext i32 %my.tmp to i64 diff --git a/llvm/test/Transforms/StructurizeCFG/improve-order.ll b/llvm/test/Transforms/StructurizeCFG/improve-order.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/StructurizeCFG/improve-order.ll @@ -0,0 +1,511 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -structurizecfg -structurizecfg-node-reorder-size=3 %s -o - | FileCheck %s +; RUN: opt -S -passes=structurizecfg -structurizecfg-node-reorder-size=3 %s -o - | FileCheck %s + +; Test that exit blocks for a loop are reordered so that they +; appear after their predecessors rather than after the loop. +; This reduces the number of values needed after the loop +; to record if the exit blocks are taken/not taken. + +define void @reorder_loop(i1 %PredEntry, i1 %PredA, i1 %PredC) { +; CHECK-LABEL: @reorder_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[G:%.*]] +; CHECK: A: +; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FLOW1:%.*]] ] +; CHECK-NEXT: br i1 [[PREDA:%.*]], label [[B:%.*]], label [[FLOW:%.*]] +; CHECK: B: +; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[B]] ], [ true, [[A]] ] +; CHECK-NEXT: br i1 [[TMP0]], label [[C:%.*]], label [[FLOW1]] +; CHECK: C: +; CHECK-NEXT: br i1 [[PREDC:%.*]], label [[D:%.*]], label [[FLOW2:%.*]] +; CHECK: Flow1: +; CHECK-NEXT: [[TMP1]] = phi i32 [ [[TMP5:%.*]], [[FLOW3:%.*]] ], [ undef, [[FLOW]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP6:%.*]], [[FLOW3]] ], [ false, [[FLOW]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW3]] ], [ true, [[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_EXIT_GUARD:%.*]], label [[A]] +; CHECK: D: +; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW2]] +; CHECK: Flow2: +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[D]] ], [ true, [[C]] ] +; CHECK-NEXT: br i1 [[TMP4]], label [[E:%.*]], label [[FLOW3]] +; CHECK: E: +; CHECK-NEXT: [[INC2:%.*]] = add i32 [[INC1]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[INC2]], 10 +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: F: +; CHECK-NEXT: unreachable +; CHECK: G: +; CHECK-NEXT: ret void +; CHECK: Flow3: +; CHECK-NEXT: [[TMP5]] = phi i32 [ [[INC2]], [[E]] ], [ undef, [[FLOW2]] ] +; CHECK-NEXT: [[TMP6]] = phi i1 [ true, [[E]] ], [ false, [[FLOW2]] ] +; CHECK-NEXT: [[TMP7]] = phi i1 [ [[CMP]], [[E]] ], [ true, [[FLOW2]] ] +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: loop.exit.guard: +; CHECK-NEXT: br i1 [[TMP2]], label [[G]], label [[F:%.*]] +; +entry: + br i1 %PredEntry, label %A, label %G + +; A is the loop header. +A: + %inc1 = phi i32 [ 0, %entry ], [ %inc2, %E ] + br i1 %PredA, label %B, label %C + +; B is a loop exit block. After reorderNodes in StructurizeCFG, this block +; remains after A. Without reorderNodes, B is dependent on the Flow block for +; the loop back edge. B is executes only when the loop exits from A. It does not +; execute during the loop iterations. +B: + tail call fastcc void @check(i32 1) #0 + br label %loop.exit.guard + +C: + br i1 %PredC, label %D, label %E + +; D is a loop exit block. After reorderNodes, this block remains in the loop +; and is not dependent on the Flow block that contains the back edge to the +; loop header. +D: + tail call fastcc void @check(i32 2) #0 + br label %loop.exit.guard + +; E is a latch and exiting block. +E: + %inc2 = add i32 %inc1, 1 + %cmp = icmp ult i32 %inc2, 10 + br i1 %cmp, label %A, label %loop.exit.guard + +; Paths fom the exit blocks come here through loop.exit.gaurd. +F: + unreachable + +G: + ret void + +; loop.exit.guard is added by UnifyLoopExits as a common block for all loop +; exits. After StructurizeCFG, the predecessor B will remain in the loop. +loop.exit.guard: + %Guard.G = phi i1 [ true, %E ], [ false, %B ], [ false, %D ] + br i1 %Guard.G, label %G, label %F +} + +; Test that the exit blocks for a nested loop are reordered so that they are +; not dependent on the Flow block that contains the loop back edge. That +; dependence forces the block to appear after the loop, which increases the +; number of live values exiting the loop. + +define void @reorder_inner_loop(i1 %PredEntry, i1 %PredB, i1 %PredD) { +; CHECK-LABEL: @reorder_inner_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PREDB_INV:%.*]] = xor i1 [[PREDB:%.*]], true +; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[I:%.*]] +; CHECK: A: +; CHECK-NEXT: [[OUTER1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP5:%.*]], [[FLOW4:%.*]] ] +; CHECK-NEXT: br label [[B:%.*]] +; CHECK: B: +; CHECK-NEXT: [[INNER1:%.*]] = phi i32 [ [[TMP1:%.*]], [[FLOW1:%.*]] ], [ 0, [[A]] ] +; CHECK-NEXT: br i1 [[PREDB_INV]], label [[C:%.*]], label [[FLOW:%.*]] +; CHECK: C: +; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[C]] ], [ true, [[B]] ] +; CHECK-NEXT: br i1 [[TMP0]], label [[D:%.*]], label [[FLOW1]] +; CHECK: D: +; CHECK-NEXT: br i1 [[PREDD:%.*]], label [[E:%.*]], label [[FLOW2:%.*]] +; CHECK: Flow1: +; CHECK-NEXT: [[TMP1]] = phi i32 [ [[TMP8:%.*]], [[FLOW3:%.*]] ], [ undef, [[FLOW]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW3]] ], [ false, [[FLOW]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP10:%.*]], [[FLOW3]] ], [ true, [[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_EXIT_GUARD1:%.*]], label [[B]] +; CHECK: E: +; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW2]] +; CHECK: Flow2: +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[E]] ], [ true, [[D]] ] +; CHECK-NEXT: br i1 [[TMP4]], label [[F:%.*]], label [[FLOW3]] +; CHECK: F: +; CHECK-NEXT: [[INNER2:%.*]] = add i32 [[INNER1]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp uge i32 [[INNER2]], 20 +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: G: +; CHECK-NEXT: [[OUTER2:%.*]] = add i32 [[OUTER1]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp uge i32 [[OUTER2]], 10 +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: H: +; CHECK-NEXT: unreachable +; CHECK: I: +; CHECK-NEXT: ret void +; CHECK: Flow4: +; CHECK-NEXT: [[TMP5]] = phi i32 [ [[OUTER2]], [[G:%.*]] ], [ undef, [[LOOP_EXIT_GUARD1]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ true, [[G]] ], [ false, [[LOOP_EXIT_GUARD1]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[CMP2]], [[G]] ], [ true, [[LOOP_EXIT_GUARD1]] ] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EXIT_GUARD:%.*]], label [[A]] +; CHECK: loop.exit.guard: +; CHECK-NEXT: br i1 [[TMP6]], label [[I]], label [[H:%.*]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP8]] = phi i32 [ [[INNER2]], [[F]] ], [ undef, [[FLOW2]] ] +; CHECK-NEXT: [[TMP9]] = phi i1 [ true, [[F]] ], [ false, [[FLOW2]] ] +; CHECK-NEXT: [[TMP10]] = phi i1 [ [[CMP1]], [[F]] ], [ true, [[FLOW2]] ] +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: loop.exit.guard1: +; CHECK-NEXT: br i1 [[TMP2]], label [[G]], label [[FLOW4]] +; +entry: + br i1 %PredEntry, label %A, label %I + +; A is the outer loop header block. +A: + %outer1 = phi i32 [ 0, %entry ], [ %outer2, %G ] + br label %B + +; B is the inner loop header block. +B: + %inner1 = phi i32 [ 0, %A ], [ %inner2, %F ] + br i1 %PredB, label %D, label %C + +; C is a loop exit block. After reorderNodes in StructureCFG, this block +; remains after B rather and is not dependent on the Flow block that contains +; the back edge to the loop header. +C: + tail call fastcc void @check(i32 1) #0 + br label %loop.exit.guard1 + +D: + br i1 %PredD, label %E, label %F + +; E is a loop exit block. After reorderNodes in StructurizeCFG, this block +; remains in the loop and is not dependent on the Flow block that contains +; the back edge to the loop header. +E: + tail call fastcc void @check(i32 2) #0 + br label %loop.exit.guard1 + +; F is a latch block for the inner loop. +F: + %inner2 = add i32 %inner1, 1 + %cmp1 = icmp ult i32 %inner2, 20 + br i1 %cmp1, label %B, label %loop.exit.guard1 + +; G is a latch block for the outer loop. +G: + %outer2 = add i32 %outer1, 1 + %cmp2 = icmp ult i32 %outer2, 10 + br i1 %cmp2, label %A, label %loop.exit.guard + +; Paths from the exit blocks come here through loop.exit.gaurd. +H: + unreachable + +I: + ret void + +; loop.exit.guard is added by UnifyLoopExits as a common block for all loop +; exits. This instance is for the outer loop. +loop.exit.guard: + %Guard.I = phi i1 [ true, %G ], [ %Guard.I.moved, %loop.exit.guard1 ] + br i1 %Guard.I, label %I, label %H + +; loop.exit.guard is added for the inner loop. +loop.exit.guard1: + %Guard.G = phi i1 [ true, %F ], [ false, %C ], [ false, %E ] + %Guard.I.moved = phi i1 [ undef, %F ], [ false, %C ], [ false, %E ] + br i1 %Guard.G, label %G, label %loop.exit.guard +} + +; Test when the common successor is not unreachable. In particular, the common +; successor is inside the region. In this case, it is still helpful to keep +; the exit blocks inside the loop to reduce live values that exit the loop. + +define void @common_exit(i1 %PredEntry, i1 %PredA, i1 %PredB) { +; CHECK-LABEL: @common_exit( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PREDB_INV:%.*]] = xor i1 [[PREDB:%.*]], true +; CHECK-NEXT: [[PREDA_INV:%.*]] = xor i1 [[PREDA:%.*]], true +; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[G:%.*]] +; CHECK: A: +; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[TMP4:%.*]], [[FLOW:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br i1 [[PREDA_INV]], label [[C:%.*]], label [[FLOW]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, [[D:%.*]] ], [ false, [[FLOW2:%.*]] ] +; CHECK-NEXT: br i1 [[TMP6:%.*]], label [[B:%.*]], label [[FLOW4:%.*]] +; CHECK: B: +; CHECK-NEXT: tail call fastcc void @check1(i32 1) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: C: +; CHECK-NEXT: br i1 [[PREDB_INV]], label [[E:%.*]], label [[FLOW1:%.*]] +; CHECK: Flow1: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[INC2:%.*]], [[E]] ], [ undef, [[C]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[CMP:%.*]], [[E]] ], [ true, [[C]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ false, [[E]] ], [ true, [[C]] ] +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow2: +; CHECK-NEXT: br i1 [[TMP7:%.*]], label [[D]], label [[FLOW3:%.*]] +; CHECK: D: +; CHECK-NEXT: tail call fastcc void @check1(i32 2) #[[ATTR1]] +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP1]], [[FLOW1]] ], [ undef, [[A]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ true, [[A]] ] +; CHECK-NEXT: [[TMP6]] = phi i1 [ false, [[FLOW1]] ], [ true, [[A]] ] +; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP3]], [[FLOW1]] ], [ false, [[A]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[FLOW2]], label [[A]] +; CHECK: E: +; CHECK-NEXT: [[INC2]] = add i32 [[INC1]], 1 +; CHECK-NEXT: [[CMP]] = icmp uge i32 [[INC2]], 10 +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ true, [[B]] ], [ [[TMP0]], [[FLOW3]] ] +; CHECK-NEXT: br i1 [[TMP8]], label [[F:%.*]], label [[FLOW5:%.*]] +; CHECK: F: +; CHECK-NEXT: br label [[FLOW5]] +; CHECK: Flow5: +; CHECK-NEXT: br label [[G]] +; CHECK: G: +; CHECK-NEXT: ret void +; +entry: + br i1 %PredEntry, label %A, label %G + +; A is the loop header. +A: + %inc1 = phi i32 [ 0, %entry ], [ %inc2, %E ] + br i1 %PredA, label %B, label %C + +; B is a loop exit block. After reorderNodes in StructurizeCFG, this block +; remains after A rather than moving to after the loop due to a dependence on +; the Flow block that contains the loop back edge. +B: + tail call fastcc void @check1(i32 1) #1 + br label %F + +C: + br i1 %PredB, label %D, label %E + +; D is a loop exit block. After reorderNodes, this block remains in the loop +; rather than moving to after the loop due to a dependence on the Flow block +; that contains the loop back edge. +D: + tail call fastcc void @check1(i32 2) #1 + br label %F + +; E is a latch and exiting block. +E: + %inc2 = add i32 %inc1, 1 + %cmp = icmp ult i32 %inc2, 10 + br i1 %cmp, label %A, label %G + +; F is reached by the loop exit blocks, B and D. +F: + br label %G + +G: + ret void +} + +; Test when there are many loop exiting blocks. Previous tests have two exiting +; blocks. This test contains five. Each of the exiting blocks remain in the loop +; rather than being moved to after the loop. + +define void @many_exiting_blocks(i1 %PredEntry, i1 %PredA, i1 %PredC, i1 %PredE, i1 %PredG) { +; CHECK-LABEL: @many_exiting_blocks( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[FLOW11:%.*]] +; CHECK: A: +; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[TMP1:%.*]], [[FLOW4:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br i1 [[PREDA:%.*]], label [[B:%.*]], label [[FLOW3:%.*]] +; CHECK: B: +; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[B]] ], [ true, [[A]] ] +; CHECK-NEXT: br i1 [[TMP0]], label [[C:%.*]], label [[FLOW4]] +; CHECK: C: +; CHECK-NEXT: br i1 [[PREDC:%.*]], label [[D:%.*]], label [[FLOW5:%.*]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP1]] = phi i32 [ [[TMP6:%.*]], [[FLOW6:%.*]] ], [ undef, [[FLOW3]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW6]] ], [ true, [[FLOW3]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP8:%.*]], [[FLOW6]] ], [ false, [[FLOW3]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW6]] ], [ true, [[FLOW3]] ] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_EXIT_GUARD:%.*]], label [[A]] +; CHECK: D: +; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW5]] +; CHECK: Flow5: +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[D]] ], [ true, [[C]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[E:%.*]], label [[FLOW6]] +; CHECK: E: +; CHECK-NEXT: br i1 [[PREDE:%.*]], label [[F:%.*]], label [[FLOW7:%.*]] +; CHECK: Flow6: +; CHECK-NEXT: [[TMP6]] = phi i32 [ [[TMP12:%.*]], [[FLOW8:%.*]] ], [ undef, [[FLOW5]] ] +; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP13:%.*]], [[FLOW8]] ], [ true, [[FLOW5]] ] +; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP14:%.*]], [[FLOW8]] ], [ false, [[FLOW5]] ] +; CHECK-NEXT: [[TMP9]] = phi i1 [ [[TMP15:%.*]], [[FLOW8]] ], [ true, [[FLOW5]] ] +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: F: +; CHECK-NEXT: tail call fastcc void @check(i32 3) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW7]] +; CHECK: Flow7: +; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ false, [[F]] ], [ true, [[E]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[F]] ], [ true, [[E]] ] +; CHECK-NEXT: br i1 [[TMP11]], label [[G:%.*]], label [[FLOW8]] +; CHECK: G: +; CHECK-NEXT: br i1 [[PREDG:%.*]], label [[H:%.*]], label [[FLOW9:%.*]] +; CHECK: Flow8: +; CHECK-NEXT: [[TMP12]] = phi i32 [ [[TMP19:%.*]], [[FLOW10:%.*]] ], [ undef, [[FLOW7]] ] +; CHECK-NEXT: [[TMP13]] = phi i1 [ [[TMP20:%.*]], [[FLOW10]] ], [ [[TMP10]], [[FLOW7]] ] +; CHECK-NEXT: [[TMP14]] = phi i1 [ [[TMP21:%.*]], [[FLOW10]] ], [ false, [[FLOW7]] ] +; CHECK-NEXT: [[TMP15]] = phi i1 [ [[TMP22:%.*]], [[FLOW10]] ], [ true, [[FLOW7]] ] +; CHECK-NEXT: br label [[FLOW6]] +; CHECK: H: +; CHECK-NEXT: tail call fastcc void @check(i32 4) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW9]] +; CHECK: Flow9: +; CHECK-NEXT: [[TMP16:%.*]] = phi i1 [ false, [[H]] ], [ [[TMP10]], [[G]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ false, [[H]] ], [ true, [[G]] ] +; CHECK-NEXT: br i1 [[TMP17]], label [[I:%.*]], label [[FLOW10]] +; CHECK: I: +; CHECK-NEXT: [[INC2:%.*]] = add i32 [[INC1]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[INC2]], 10 +; CHECK-NEXT: br label [[FLOW10]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ false, [[K:%.*]] ], [ true, [[LOOP_EXIT_GUARD1:%.*]] ] +; CHECK-NEXT: br i1 [[TMP18]], label [[J:%.*]], label [[FLOW1:%.*]] +; CHECK: J: +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: K: +; CHECK-NEXT: br label [[FLOW:%.*]] +; CHECK: Flow1: +; CHECK-NEXT: br label [[FLOW2:%.*]] +; CHECK: Flow2: +; CHECK-NEXT: br label [[FLOW11]] +; CHECK: L: +; CHECK-NEXT: ret void +; CHECK: Flow10: +; CHECK-NEXT: [[TMP19]] = phi i32 [ [[INC2]], [[I]] ], [ undef, [[FLOW9]] ] +; CHECK-NEXT: [[TMP20]] = phi i1 [ false, [[I]] ], [ [[TMP16]], [[FLOW9]] ] +; CHECK-NEXT: [[TMP21]] = phi i1 [ true, [[I]] ], [ false, [[FLOW9]] ] +; CHECK-NEXT: [[TMP22]] = phi i1 [ [[CMP]], [[I]] ], [ true, [[FLOW9]] ] +; CHECK-NEXT: br label [[FLOW8]] +; CHECK: Flow11: +; CHECK-NEXT: br label [[L:%.*]] +; CHECK: loop.exit.guard: +; CHECK-NEXT: [[GUARD_L_INV:%.*]] = xor i1 [[TMP3]], true +; CHECK-NEXT: [[GUARD_J_INV:%.*]] = xor i1 [[TMP2]], true +; CHECK-NEXT: br i1 [[GUARD_L_INV]], label [[LOOP_EXIT_GUARD1]], label [[FLOW2]] +; CHECK: loop.exit.guard1: +; CHECK-NEXT: br i1 [[GUARD_J_INV]], label [[K]], label [[FLOW]] +; +entry: + br i1 %PredEntry, label %A, label %L + +; A is the loop header block. +A: + %inc1 = phi i32 [ 0, %entry ], [ %inc2, %I ] + br i1 %PredA, label %B, label %C + +; B is a loop exit block. After reorderNodes in StructurizeCFG, this block +; remains after A rather than moving to after the loop due to a dependence +; on the Flow block that contains the loop back edge. +B: + tail call fastcc void @check(i32 1) #0 + br label %loop.exit.guard + +C: + br i1 %PredC, label %D, label %E + +; D is a loop exit block. After reorderNodes, this block remains in the loop +; rather than moving to after the loop. +D: + tail call fastcc void @check(i32 2) #0 + br label %loop.exit.guard + +E: + br i1 %PredE, label %F, label %G + +; F is a loop exit block. After reorderNodes, this block remains in the loop +; rather than moving to after the loop. +F: + tail call fastcc void @check(i32 3) #0 + br label %loop.exit.guard + +G: + br i1 %PredG, label %H, label %I + +; H is a loop exit block. After reorderNodes, this block remains in the loop +; rather than moving to after the loop. +H: + tail call fastcc void @check(i32 4) #0 + br label %loop.exit.guard + +; I is a latch block. +I: + %inc2 = add i32 %inc1, 1 + %cmp = icmp ult i32 %inc2, 10 + br i1 %cmp, label %A, label %loop.exit.guard + +J: + br label %L + +K: + br label %L + +L: + ret void + +loop.exit.guard: + %Guard.L = phi i1 [ true, %I ], [ false, %B ], [ false, %D ], [ false, %F ], [ false, %H ] + %Guard.J = phi i1 [ false, %I ], [ true, %B ], [ true, %D ], [ false, %F ], [ false, %H ] + br i1 %Guard.L, label %L, label %loop.exit.guard1 + +loop.exit.guard1: + br i1 %Guard.J, label %J, label %K +} + +declare void @check(i32 noundef) #0 +declare void @check1(i32 noundef) #1 + +attributes #0 = { noreturn nounwind } +attributes #1 = { nounwind } + + +; When there are two blocks that can be moved closer to the predecessor, +; maintain the original, relative order since swapping the order should not +; provide any benefit. In this case both B and C have a single predecessor +; and a single successor. + +define void @same_predecessor(i1 %PredA) { +; CHECK-LABEL: @same_predecessor( +; CHECK-NEXT: A: +; CHECK-NEXT: [[PREDA_INV:%.*]] = xor i1 [[PREDA:%.*]], true +; CHECK-NEXT: br i1 [[PREDA_INV]], label [[B:%.*]], label [[FLOW:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[B]] ], [ true, [[A:%.*]] ] +; CHECK-NEXT: br i1 [[TMP0]], label [[C:%.*]], label [[D:%.*]] +; CHECK: C: +; CHECK-NEXT: br label [[D]] +; CHECK: D: +; CHECK-NEXT: ret void +; +A: + br i1 %PredA, label %C, label %B + +B: + br label %D + +C: + br label %D + +D: + ret void +} + +