Index: llvm/lib/Transforms/Scalar/StructurizeCFG.cpp =================================================================== --- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -264,6 +264,8 @@ void analyzeLoops(RegionNode *N); + void improveNodeOrder(); + Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); void gatherPredicates(RegionNode *N); @@ -420,6 +422,53 @@ } } +/// Change the node ordering to decrease the range of live values, especially +/// the values that capture the control flow path for branches. We do this +/// by moving blocks with a single predecessor and successor to appear after +/// predecessor. The motivation is to move loop exits blocks to inside a loop. +/// In cases where a loop has a large number of exit blocks, this reduces the +/// amount of values needed across the loop boundary. +void StructurizeCFG::improveNodeOrder() { + SmallVector NewOrder; + DenseMap MoveTo; + BitVector Moved(Order.size()); + + // For small regions, there is no benefit to changing the node order. + if (Order.size() <= 3) + return; + + // The algorithm works with two passes over Order. The first pass identifies + // the blocks to move and the position to move them to. The second pass + // creates the new order based upon this information. We move blocks with + // a single predecessor and successor. If there are multiple candidates then + // maintain the original order. + for (size_t I = 0, E = Order.size(); I < E; ++I) { + auto *BB = Order[I]->getEntry(); + auto *Pred = BB->getSinglePredecessor(); + auto *Succ = BB->getSingleSuccessor(); + if (Pred && Visited.count(Pred) && Succ && !Visited.count(Succ)) { + if (MoveTo.count(Pred)) + Moved.reset(MoveTo[Pred]); + MoveTo[Pred] = I; + Moved.set(I); + } + } + + // If no blocks have been moved then the original order is good. + if (!Moved.count()) + return; + + for (size_t I = 0, E = Order.size(); I < E; ++I) { + auto *BB = Order[I]->getEntry(); + if (MoveTo.count(BB)) + NewOrder.push_back(Order[MoveTo[BB]]); + if (!Moved[I]) + NewOrder.push_back(Order[I]); + } + + Order.assign(NewOrder); +} + /// Determine the end of the loops void StructurizeCFG::analyzeLoops(RegionNode *N) { if (N->isSubRegion()) { @@ -1082,6 +1131,7 @@ orderNodes(); collectInfos(); + improveNodeOrder(); createFlow(); insertConditions(false); insertConditions(true); Index: llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll +++ llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll @@ -12,29 +12,29 @@ define amdgpu_ps void @return_void(float %0) #0 { ; CHECK-LABEL: return_void: ; CHECK: ; %bb.0: ; %main_body -; CHECK-NEXT: s_mov_b64 s[0:1], exec -; CHECK-NEXT: s_mov_b32 s2, 0x41200000 -; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0 -; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc -; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3] -; CHECK-NEXT: s_cbranch_execz .LBB0_3 -; CHECK-NEXT: .LBB0_1: ; %loop +; CHECK-NEXT: s_mov_b64 s[2:3], exec +; CHECK-NEXT: s_mov_b32 s0, 0x41200000 +; CHECK-NEXT: s_mov_b64 vcc, exec +; CHECK-NEXT: v_cmp_ngt_f32_e64 s[0:1], s0, v0 +; CHECK-NEXT: s_cbranch_execz .LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %end +; CHECK-NEXT: v_mov_b32_e32 v0, 1.0 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: exp mrt0 v1, v1, v1, v0 done vm +; CHECK-NEXT: .LBB0_2: ; %Flow1 +; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] +; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; CHECK-NEXT: s_cbranch_execz .LBB0_5 +; CHECK-NEXT: .LBB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec +; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec ; CHECK-NEXT: s_cbranch_scc0 .LBB0_6 -; CHECK-NEXT: ; %bb.2: ; %loop -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: ; %bb.4: ; %loop +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_mov_b64 exec, 0 ; CHECK-NEXT: s_mov_b64 vcc, 0 -; CHECK-NEXT: s_branch .LBB0_1 -; CHECK-NEXT: .LBB0_3: ; %Flow1 -; CHECK-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] -; CHECK-NEXT: s_xor_b64 exec, exec, s[0:1] -; CHECK-NEXT: s_cbranch_execz .LBB0_5 -; CHECK-NEXT: ; %bb.4: ; %end -; CHECK-NEXT: v_mov_b32_e32 v0, 1.0 -; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: exp mrt0 v1, v1, v1, v0 done vm +; CHECK-NEXT: s_branch .LBB0_3 ; CHECK-NEXT: .LBB0_5: ; %UnifiedReturnBlock ; CHECK-NEXT: s_endpgm ; CHECK-NEXT: .LBB0_6: @@ -57,28 +57,28 @@ define amdgpu_ps void @return_void_compr(float %0) #0 { ; CHECK-LABEL: return_void_compr: ; CHECK: ; %bb.0: ; %main_body -; CHECK-NEXT: s_mov_b64 s[0:1], exec -; CHECK-NEXT: s_mov_b32 s2, 0x41200000 -; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0 -; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc -; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3] -; CHECK-NEXT: s_cbranch_execz .LBB1_3 -; CHECK-NEXT: .LBB1_1: ; %loop +; CHECK-NEXT: s_mov_b64 s[2:3], exec +; CHECK-NEXT: s_mov_b32 s0, 0x41200000 +; CHECK-NEXT: s_mov_b64 vcc, exec +; CHECK-NEXT: v_cmp_ngt_f32_e64 s[0:1], s0, v0 +; CHECK-NEXT: s_cbranch_execz .LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %end +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: exp mrt0 v0, off, v0, off done compr vm +; CHECK-NEXT: .LBB1_2: ; %Flow1 +; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] +; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; CHECK-NEXT: s_cbranch_execz .LBB1_5 +; CHECK-NEXT: .LBB1_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec +; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec ; CHECK-NEXT: s_cbranch_scc0 .LBB1_6 -; CHECK-NEXT: ; %bb.2: ; %loop -; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: ; %bb.4: ; %loop +; CHECK-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; CHECK-NEXT: s_mov_b64 exec, 0 ; CHECK-NEXT: s_mov_b64 vcc, 0 -; CHECK-NEXT: s_branch .LBB1_1 -; CHECK-NEXT: .LBB1_3: ; %Flow1 -; CHECK-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] -; CHECK-NEXT: s_xor_b64 exec, exec, s[0:1] -; CHECK-NEXT: s_cbranch_execz .LBB1_5 -; CHECK-NEXT: ; %bb.4: ; %end -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: exp mrt0 v0, off, v0, off done compr vm +; CHECK-NEXT: s_branch .LBB1_3 ; CHECK-NEXT: .LBB1_5: ; %UnifiedReturnBlock ; CHECK-NEXT: s_endpgm ; CHECK-NEXT: .LBB1_6: Index: llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -48,9 +48,9 @@ ; GCN-NEXT: s_endpgm ; IR-LABEL: @reduced_nested_loop_conditions( ; IR-NEXT: bb: -; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4 +; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4:[0-9]+]] ; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* [[ARG:%.*]], i32 [[MY_TMP]] -; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]] +; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]], align 4 ; IR-NEXT: br label [[BB5:%.*]] ; IR: bb3: ; IR-NEXT: br i1 true, label [[BB4:%.*]], label [[BB13:%.*]] @@ -84,7 +84,7 @@ ; IR: bb16: ; IR-NEXT: [[MY_TMP17:%.*]] = extractelement <2 x i32> [[MY_TMP15]], i64 1 ; IR-NEXT: [[MY_TMP18:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* undef, i32 [[MY_TMP17]] -; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]] +; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]], align 4 ; IR-NEXT: br label [[BB20]] ; IR: bb20: ; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], [[BB16]] ], [ 0, [[BB13]] ] @@ -93,6 +93,7 @@ ; IR: bb23: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]]) ; IR-NEXT: ret void +; bb: %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %my.tmp1 = getelementptr inbounds i64, i64 addrspace(3)* %arg, i32 %my.tmp @@ -190,19 +191,19 @@ ; GCN-NEXT: s_endpgm ; IR-LABEL: @nested_loop_conditions( ; IR-NEXT: bb: -; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4 +; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4]] ; IR-NEXT: [[MY_TMP1:%.*]] = zext i32 [[MY_TMP]] to i64 ; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[ARG:%.*]], i64 [[MY_TMP1]] ; IR-NEXT: [[MY_TMP3:%.*]] = load i64, i64 addrspace(1)* [[MY_TMP2]], align 16 ; IR-NEXT: [[MY_TMP932:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* undef, align 16 ; IR-NEXT: [[MY_TMP1033:%.*]] = extractelement <4 x i32> [[MY_TMP932]], i64 0 -; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; IR-NEXT: [[MY_TMP1235:%.*]] = icmp slt i32 [[MY_TMP1134]], 9 ; IR-NEXT: br i1 [[MY_TMP1235]], label [[BB14_LR_PH:%.*]], label [[FLOW:%.*]] ; IR: bb14.lr.ph: ; IR-NEXT: br label [[BB14:%.*]] ; IR: Flow3: -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP21:%.*]]) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP15:%.*]]) ; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP14:%.*]]) ; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 ; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 @@ -222,29 +223,30 @@ ; IR-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP5]], 1 ; IR-NEXT: br i1 [[TMP6]], label [[BB13:%.*]], label [[BB31]] ; IR: bb14: -; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP16:%.*]], [[FLOW1:%.*]] ], [ 0, [[BB14_LR_PH]] ] -; IR-NEXT: [[MY_TMP1037:%.*]] = phi i32 [ [[MY_TMP1033]], [[BB14_LR_PH]] ], [ [[TMP12:%.*]], [[FLOW1]] ] -; IR-NEXT: [[MY_TMP936:%.*]] = phi <4 x i32> [ [[MY_TMP932]], [[BB14_LR_PH]] ], [ [[TMP11:%.*]], [[FLOW1]] ] +; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP15]], [[FLOW2:%.*]] ], [ 0, [[BB14_LR_PH]] ] +; IR-NEXT: [[MY_TMP1037:%.*]] = phi i32 [ [[MY_TMP1033]], [[BB14_LR_PH]] ], [ [[TMP12:%.*]], [[FLOW2]] ] +; IR-NEXT: [[MY_TMP936:%.*]] = phi <4 x i32> [ [[MY_TMP932]], [[BB14_LR_PH]] ], [ [[TMP11:%.*]], [[FLOW2]] ] ; IR-NEXT: [[MY_TMP15:%.*]] = icmp eq i32 [[MY_TMP1037]], 1 +; IR-NEXT: br i1 true, label [[BB31_LOOPEXIT:%.*]], label [[FLOW1:%.*]] +; IR: Flow1: ; IR-NEXT: [[TMP8:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[MY_TMP15]]) ; IR-NEXT: [[TMP9:%.*]] = extractvalue { i1, i64 } [[TMP8]], 0 ; IR-NEXT: [[TMP10:%.*]] = extractvalue { i1, i64 } [[TMP8]], 1 -; IR-NEXT: br i1 [[TMP9]], label [[BB16:%.*]], label [[FLOW1]] +; IR-NEXT: br i1 [[TMP9]], label [[BB16:%.*]], label [[FLOW2]] ; IR: bb16: ; IR-NEXT: [[MY_TMP17:%.*]] = bitcast i64 [[MY_TMP3]] to <2 x i32> ; IR-NEXT: br label [[BB18:%.*]] -; IR: Flow1: -; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], [[BB21:%.*]] ], [ undef, [[BB14]] ] -; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], [[BB21]] ], [ undef, [[BB14]] ] -; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[MY_TMP12:%.*]], [[BB21]] ], [ true, [[BB14]] ] -; IR-NEXT: [[TMP14]] = phi i1 [ [[MY_TMP12]], [[BB21]] ], [ false, [[BB14]] ] -; IR-NEXT: [[TMP15:%.*]] = phi i1 [ false, [[BB21]] ], [ true, [[BB14]] ] +; IR: Flow2: +; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], [[BB21:%.*]] ], [ undef, [[FLOW1]] ] +; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], [[BB21]] ], [ undef, [[FLOW1]] ] +; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[MY_TMP12:%.*]], [[BB21]] ], [ true, [[FLOW1]] ] +; IR-NEXT: [[TMP14]] = phi i1 [ [[MY_TMP12]], [[BB21]] ], [ false, [[FLOW1]] ] ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]]) -; IR-NEXT: [[TMP16]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP13]], i64 [[PHI_BROKEN]]) -; IR-NEXT: [[TMP17:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP16]]) -; IR-NEXT: br i1 [[TMP17]], label [[FLOW2:%.*]], label [[BB14]] +; IR-NEXT: [[TMP15]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP13]], i64 [[PHI_BROKEN]]) +; IR-NEXT: [[TMP16:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP15]]) +; IR-NEXT: br i1 [[TMP16]], label [[FLOW3]], label [[BB14]] ; IR: bb18: -; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; IR-NEXT: [[MY_TMP20:%.*]] = icmp slt i32 [[MY_TMP19]], 9 ; IR-NEXT: br i1 [[MY_TMP20]], label [[BB21]], label [[BB18]] ; IR: bb21: @@ -261,21 +263,16 @@ ; IR-NEXT: [[MY_TMP8:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 [[MY_TMP7]] ; IR-NEXT: [[MY_TMP9]] = load <4 x i32>, <4 x i32> addrspace(1)* [[MY_TMP8]], align 16 ; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0 -; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; IR-NEXT: [[MY_TMP12]] = icmp sge i32 [[MY_TMP11]], 9 -; IR-NEXT: br label [[FLOW1]] -; IR: Flow2: -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]]) -; IR-NEXT: [[TMP19:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]]) -; IR-NEXT: [[TMP20:%.*]] = extractvalue { i1, i64 } [[TMP19]], 0 -; IR-NEXT: [[TMP21]] = extractvalue { i1, i64 } [[TMP19]], 1 -; IR-NEXT: br i1 [[TMP20]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]] +; IR-NEXT: br label [[FLOW2]] ; IR: bb31.loopexit: -; IR-NEXT: br label [[FLOW3]] +; IR-NEXT: br label [[FLOW1]] ; IR: bb31: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]]) -; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef +; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef, align 4 ; IR-NEXT: ret void +; bb: %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %my.tmp1 = zext i32 %my.tmp to i64 Index: llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll +++ llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll @@ -83,55 +83,48 @@ define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(float %val) #0 { ; GCN-LABEL: name: test_return_to_epilog_with_optimized_kill ; GCN: bb.0.entry: - ; GCN: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GCN: liveins: $vgpr0 ; GCN: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 $vgpr0, implicit $mode, implicit $exec - ; GCN: $sgpr0_sgpr1 = S_MOV_B64 $exec - ; GCN: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr1, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN: renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc - ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec - ; GCN: bb.1.flow.preheader: - ; GCN: successors: %bb.2(0x80000000) - ; GCN: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3 - ; GCN: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN: renamable $sgpr4_sgpr5 = S_MOV_B64 0 - ; GCN: bb.2.flow: - ; GCN: successors: %bb.3(0x04000000), %bb.2(0x7c000000) - ; GCN: liveins: $vcc, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 - ; GCN: renamable $sgpr6_sgpr7 = S_AND_B64 $exec, renamable $vcc, implicit-def $scc - ; GCN: renamable $sgpr4_sgpr5 = S_OR_B64 killed renamable $sgpr6_sgpr7, killed renamable $sgpr4_sgpr5, implicit-def $scc - ; GCN: $exec = S_ANDN2_B64 $exec, renamable $sgpr4_sgpr5, implicit-def $scc - ; GCN: S_CBRANCH_EXECNZ %bb.2, implicit $exec - ; GCN: bb.3.Flow: - ; GCN: successors: %bb.4(0x80000000) - ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 - ; GCN: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc - ; GCN: bb.4.Flow1: - ; GCN: successors: %bb.5(0x40000000), %bb.7(0x40000000) - ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 - ; GCN: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc - ; GCN: S_CBRANCH_EXECZ %bb.7, implicit $exec - ; GCN: bb.5.kill0: - ; GCN: successors: %bb.6(0x40000000), %bb.8(0x40000000) - ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 - ; GCN: dead renamable $sgpr0_sgpr1 = S_ANDN2_B64 killed renamable $sgpr0_sgpr1, $exec, implicit-def $scc - ; GCN: S_CBRANCH_SCC0 %bb.8, implicit $scc - ; GCN: bb.6.kill0: - ; GCN: successors: %bb.7(0x80000000) - ; GCN: liveins: $sgpr2_sgpr3, $scc + ; GCN: renamable $sgpr0_sgpr1 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, killed $vgpr1, 0, implicit $mode, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: bb.1.kill0: + ; GCN: successors: %bb.2(0x40000000), %bb.7(0x40000000) + ; GCN: liveins: $exec:0x000000000000000F, $vgpr0, $sgpr0_sgpr1 + ; GCN: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN: dead renamable $sgpr2_sgpr3 = S_ANDN2_B64 killed renamable $sgpr2_sgpr3, $exec, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.7, implicit $scc + ; GCN: bb.2.kill0: + ; GCN: successors: %bb.3(0x80000000) + ; GCN: liveins: $vgpr0, $sgpr0_sgpr1, $scc ; GCN: $exec = S_MOV_B64 0 - ; GCN: bb.7.end: - ; GCN: successors: %bb.9(0x80000000) + ; GCN: bb.3.Flow1: + ; GCN: successors: %bb.4(0x40000000), %bb.6(0x40000000) + ; GCN: liveins: $vgpr0, $sgpr0_sgpr1 + ; GCN: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.6, implicit $exec + ; GCN: bb.4.flow.preheader: + ; GCN: successors: %bb.5(0x80000000) + ; GCN: liveins: $vgpr0, $sgpr2_sgpr3 + ; GCN: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 0 + ; GCN: bb.5.flow: + ; GCN: successors: %bb.6(0x04000000), %bb.5(0x7c000000) + ; GCN: liveins: $vcc, $sgpr0_sgpr1, $sgpr2_sgpr3 + ; GCN: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, renamable $vcc, implicit-def $scc + ; GCN: renamable $sgpr0_sgpr1 = S_OR_B64 killed renamable $sgpr4_sgpr5, killed renamable $sgpr0_sgpr1, implicit-def $scc + ; GCN: $exec = S_ANDN2_B64 $exec, renamable $sgpr0_sgpr1, implicit-def $scc + ; GCN: S_CBRANCH_EXECNZ %bb.5, implicit $exec + ; GCN: bb.6.Flow2: + ; GCN: successors: %bb.8(0x80000000) ; GCN: liveins: $sgpr2_sgpr3 ; GCN: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc - ; GCN: S_BRANCH %bb.9 - ; GCN: bb.8: + ; GCN: S_BRANCH %bb.8 + ; GCN: bb.7: ; GCN: $exec = S_MOV_B64 0 ; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec ; GCN: S_ENDPGM 0 - ; GCN: bb.9: + ; GCN: bb.8: entry: %.i0 = fdiv reassoc nnan nsz arcp contract afn float 1.000000e+00, %val %cmp0 = fcmp olt float %.i0, 0.000000e+00 Index: llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -462,61 +462,66 @@ define amdgpu_kernel void @livevariables_update_missed_block(i8 addrspace(1)* %src1) { ; SI-LABEL: name: livevariables_update_missed_block ; SI: bb.0.entry: - ; SI-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; SI-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; SI-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY killed $sgpr0_sgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0 ; SI-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY1]](s32), implicit $exec - ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI-NEXT: S_BRANCH %bb.2 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; SI-NEXT: $vcc_lo = COPY killed [[S_MOV_B32_]] + ; SI-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc_lo + ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.1.if.then: - ; SI-NEXT: successors: %bb.7(0x80000000) + ; SI-NEXT: successors: %bb.2(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset.cast, align 4, addrspace 4) - ; SI-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %50, 0, implicit $exec - ; SI-NEXT: %43:vgpr_32, dead %45:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %43, %subreg.sub1 + ; SI-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed [[COPY1]](s32), 0, implicit $exec + ; SI-NEXT: %45:vgpr_32, dead %47:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %45, %subreg.sub1 ; SI-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8) from %ir.i10, addrspace 1) ; SI-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec ; SI-NEXT: GLOBAL_STORE_BYTE killed [[V_MOV_B]], killed [[GLOBAL_LOAD_UBYTE]], 0, 0, implicit $exec :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; SI-NEXT: S_BRANCH %bb.7 ; SI-NEXT: {{ $}} - ; SI-NEXT: bb.2.if.then9: - ; SI-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; SI-NEXT: bb.2.Flow: + ; SI-NEXT: successors: %bb.3(0x40000000), %bb.6(0x40000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: S_CBRANCH_SCC0 %bb.4, implicit undef $scc + ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; SI-NEXT: S_BRANCH %bb.3 ; SI-NEXT: {{ $}} - ; SI-NEXT: bb.3: - ; SI-NEXT: successors: %bb.6(0x80000000) + ; SI-NEXT: bb.3.if.then9: + ; SI-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: S_BRANCH %bb.6 + ; SI-NEXT: S_CBRANCH_SCC0 %bb.5, implicit undef $scc ; SI-NEXT: {{ $}} - ; SI-NEXT: bb.4.sw.bb: - ; SI-NEXT: successors: %bb.6(0x80000000) + ; SI-NEXT: bb.4: + ; SI-NEXT: successors: %bb.7(0x80000000) + ; SI-NEXT: {{ $}} + ; SI-NEXT: S_BRANCH %bb.7 + ; SI-NEXT: {{ $}} + ; SI-NEXT: bb.5.sw.bb: + ; SI-NEXT: successors: %bb.7(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec - ; SI-NEXT: [[GLOBAL_LOAD_UBYTE1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[V_MOV_B1]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s8) from `i8 addrspace(1)* null`, addrspace 1) - ; SI-NEXT: S_BRANCH %bb.6 + ; SI-NEXT: [[GLOBAL_LOAD_UBYTE1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[V_MOV_B1]], 0, 0, implicit $exec :: (load (s8) from `i8 addrspace(1)* null`, addrspace 1) + ; SI-NEXT: S_BRANCH %bb.7 ; SI-NEXT: {{ $}} - ; SI-NEXT: bb.5.Flow: - ; SI-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000) + ; SI-NEXT: bb.6.Flow1: + ; SI-NEXT: successors: %bb.8(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY1]](s32), %bb.0, undef %51:vgpr_32, %bb.6 - ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI-NEXT: S_BRANCH %bb.1 + ; SI-NEXT: SI_END_CF killed [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; SI-NEXT: S_BRANCH %bb.8 ; SI-NEXT: {{ $}} - ; SI-NEXT: bb.6.sw.bb18: - ; SI-NEXT: successors: %bb.5(0x80000000) + ; SI-NEXT: bb.7.sw.bb18: + ; SI-NEXT: successors: %bb.6(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI undef %39:vgpr_32, %bb.3, [[GLOBAL_LOAD_UBYTE1]], %bb.4 + ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %41:vgpr_32, %bb.4, [[GLOBAL_LOAD_UBYTE1]], %bb.5 ; SI-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec - ; SI-NEXT: GLOBAL_STORE_BYTE killed [[V_MOV_B2]], killed [[PHI1]], 0, 0, implicit $exec :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; SI-NEXT: S_BRANCH %bb.5 + ; SI-NEXT: GLOBAL_STORE_BYTE killed [[V_MOV_B2]], killed [[PHI]], 0, 0, implicit $exec :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; SI-NEXT: S_BRANCH %bb.6 ; SI-NEXT: {{ $}} - ; SI-NEXT: bb.7.UnifiedReturnBlock: - ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; SI-NEXT: bb.8.UnifiedReturnBlock: ; SI-NEXT: S_ENDPGM 0 entry: %i2 = tail call i32 @llvm.amdgcn.workitem.id.x() Index: llvm/test/CodeGen/AMDGPU/wave32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/wave32.ll +++ llvm/test/CodeGen/AMDGPU/wave32.ll @@ -177,10 +177,10 @@ ; GCN: s_cbranch_execz ; GCN: ; %bb.{{[0-9]+}}: ; GCN: .LBB{{.*}}: -; GFX1032: s_xor_b32 s{{[0-9]+}}, exec_lo, s{{[0-9]+}} -; GFX1064: s_xor_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] -; GCN: ; %bb.{{[0-9]+}}: +; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, s{{[0-9]+}} +; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} ; GCN: ; %bb.{{[0-9]+}}: +; GCN: .LBB{{.*}}: ; GFX1032: s_or_b32 exec_lo, exec_lo, s{{[0-9]+}} ; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, s{{[0-9]+}} ; GFX1064: s_or_b64 exec, exec, s[{{[0-9:]+}}] Index: llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll =================================================================== --- llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll +++ llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll @@ -28,7 +28,7 @@ ; CHECK-NEXT: [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4 ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: LOOP.HEADER: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[FLOW3:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[FLOW4:%.*]] ] ; CHECK-NEXT: call void asm sideeffect "s_nop 0x100b ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]] @@ -37,10 +37,9 @@ ; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 65535 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 1 ; CHECK-NEXT: br i1 [[TMP17]], label [[BB62:%.*]], label [[FLOW:%.*]] -; CHECK: Flow1: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I:%.*]] ], [ undef, [[BB62]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ] +; CHECK: Flow2: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I:%.*]] ], [ undef, [[FLOW1:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[FLOW1]] ] ; CHECK-NEXT: br label [[FLOW]] ; CHECK: bb18: ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP]], i64 0 @@ -48,10 +47,10 @@ ; CHECK-NEXT: [[TMP24:%.*]] = urem i32 [[TMP22]], 52 ; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] -; CHECK: Flow2: -; CHECK-NEXT: [[TMP3]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP6:%.*]], [[FLOW]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ] -; CHECK-NEXT: br i1 [[TMP4]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP2]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP4:%.*]], [[FLOW]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP5:%.*]], [[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW4]] ; CHECK: INNER_LOOP: ; CHECK-NEXT: [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ] ; CHECK-NEXT: call void asm sideeffect " @@ -61,32 +60,31 @@ ; CHECK: INNER_LOOP_BREAK: ; CHECK-NEXT: [[TMP59]] = extractelement <4 x i32> [[TMP14]], i64 2 ; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #[[ATTR0:[0-9]+]] -; CHECK-NEXT: br label [[FLOW2:%.*]] +; CHECK-NEXT: br label [[FLOW3:%.*]] ; CHECK: bb62: ; CHECK-NEXT: [[LOAD13:%.*]] = icmp uge i32 [[TMP16]], 271 -; CHECK-NEXT: br i1 [[LOAD13]], label [[INCREMENT_I]], label [[FLOW1:%.*]] -; CHECK: Flow3: -; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ] -; CHECK-NEXT: br i1 [[TMP5]], label [[FLOW4:%.*]], label [[LOOP_HEADER]] -; CHECK: Flow4: -; CHECK-NEXT: br i1 [[TMP7:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: br i1 true, label [[BB64:%.*]], label [[FLOW1]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP0]], [[FLOW2:%.*]] ], [ undef, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP5]] = phi i1 [ [[TMP1]], [[FLOW2]] ], [ false, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[FLOW2]] ], [ true, [[LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP6]], label [[BB18]], label [[FLOW3]] ; CHECK: bb64: ; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #[[ATTR0]] -; CHECK-NEXT: br label [[RETURN]] -; CHECK: Flow: -; CHECK-NEXT: [[TMP6]] = phi i32 [ [[TMP0]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP1]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ] -; CHECK-NEXT: br i1 [[TMP9]], label [[BB18]], label [[FLOW2]] +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: Flow1: +; CHECK-NEXT: br i1 [[LOAD13]], label [[INCREMENT_I]], label [[FLOW2]] ; CHECK: INCREMENT_I: ; CHECK-NEXT: [[INC_I]] = add i32 [[I]], 1 ; CHECK-NEXT: call void asm sideeffect "s_nop 0x1336 -; CHECK-NEXT: br label [[FLOW1]] +; CHECK-NEXT: br label [[FLOW2]] ; CHECK: END_ELSE_BLOCK: ; CHECK-NEXT: call void asm sideeffect "s_nop 0x1337 -; CHECK-NEXT: [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[TMP3]], -1 -; CHECK-NEXT: br label [[FLOW3]] +; CHECK-NEXT: [[CMP_END_ELSE_BLOCK:%.*]] = icmp eq i32 [[TMP2]], -1 +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW3]] ] +; CHECK-NEXT: br i1 [[TMP7]], label [[RETURN:%.*]], label [[LOOP_HEADER]] ; CHECK: RETURN: ; CHECK-NEXT: call void asm sideeffect "s_nop 0x99 ; CHECK-NEXT: store volatile <2 x float> [[LOAD1]], <2 x float> addrspace(1)* undef, align 8 Index: llvm/test/Transforms/StructurizeCFG/improve-order.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/StructurizeCFG/improve-order.ll @@ -0,0 +1,454 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -structurizecfg %s -o - | FileCheck %s +; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s + +; Test that exit blocks for a loop are reordered so that they +; appear after their predecessors rathers than after the loop. +; This reduces the number of values needed after the loop +; to record if the exit blocks are taken/not taken. + +define void @reorder_loop(i1 %PredEntry, i1 %PredA, i1 %PredB) { +; CHECK-LABEL: @reorder_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PREDB_INV:%.*]] = xor i1 [[PREDB:%.*]], true +; CHECK-NEXT: [[PREDA_INV:%.*]] = xor i1 [[PREDA:%.*]], true +; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[G:%.*]] +; CHECK: A: +; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FLOW1:%.*]] ] +; CHECK-NEXT: br i1 true, label [[B:%.*]], label [[FLOW:%.*]] +; CHECK: B: +; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow: +; CHECK-NEXT: br i1 [[PREDA_INV]], label [[C:%.*]], label [[FLOW1]] +; CHECK: C: +; CHECK-NEXT: br i1 true, label [[D:%.*]], label [[FLOW2:%.*]] +; CHECK: Flow1: +; CHECK-NEXT: [[TMP0]] = phi i32 [ [[TMP3:%.*]], [[FLOW3:%.*]] ], [ undef, [[FLOW]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP4:%.*]], [[FLOW3]] ], [ false, [[FLOW]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP5:%.*]], [[FLOW3]] ], [ true, [[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_EXIT_GUARD:%.*]], label [[A]] +; CHECK: D: +; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW2]] +; CHECK: Flow2: +; CHECK-NEXT: br i1 [[PREDB_INV]], label [[E:%.*]], label [[FLOW3]] +; CHECK: E: +; CHECK-NEXT: [[INC2:%.*]] = add i32 [[INC1]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[INC2]], 10 +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: F: +; CHECK-NEXT: unreachable +; CHECK: G: +; CHECK-NEXT: ret void +; CHECK: Flow3: +; CHECK-NEXT: [[TMP3]] = phi i32 [ [[INC2]], [[E]] ], [ undef, [[FLOW2]] ] +; CHECK-NEXT: [[TMP4]] = phi i1 [ true, [[E]] ], [ false, [[FLOW2]] ] +; CHECK-NEXT: [[TMP5]] = phi i1 [ [[CMP]], [[E]] ], [ true, [[FLOW2]] ] +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: loop.exit.guard: +; CHECK-NEXT: br i1 [[TMP1]], label [[G]], label [[F:%.*]] +; +entry: + br i1 %PredEntry, label %A, label %G + +A: + %inc1 = phi i32 [ 0, %entry ], [ %inc2, %E ] + br i1 %PredA, label %B, label %C + +B: + tail call fastcc void @check(i32 1) #0 + br label %loop.exit.guard + +C: + br i1 %PredB, label %D, label %E + +D: + tail call fastcc void @check(i32 2) #0 + br label %loop.exit.guard + +E: + %inc2 = add i32 %inc1, 1 + %cmp = icmp ult i32 %inc2, 10 + br i1 %cmp, label %A, label %loop.exit.guard + +F: + unreachable + +G: + ret void + +loop.exit.guard: + %Guard.G = phi i1 [ true, %E ], [ false, %B ], [ false, %D ] + br i1 %Guard.G, label %G, label %F +} + + +define void @reorder_inner_loop(i1 %PredEntry, i1 %PredA, i1 %PredB) { +; CHECK-LABEL: @reorder_inner_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PREDB_INV:%.*]] = xor i1 [[PREDB:%.*]], true +; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[I:%.*]] +; CHECK: A: +; CHECK-NEXT: [[OUTER1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[FLOW4:%.*]] ] +; CHECK-NEXT: br label [[B:%.*]] +; CHECK: B: +; CHECK-NEXT: [[INNER1:%.*]] = phi i32 [ [[TMP0:%.*]], [[FLOW1:%.*]] ], [ 0, [[A]] ] +; CHECK-NEXT: br i1 true, label [[C:%.*]], label [[FLOW:%.*]] +; CHECK: C: +; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow: +; CHECK-NEXT: br i1 [[PREDA:%.*]], label [[D:%.*]], label [[FLOW1]] +; CHECK: D: +; CHECK-NEXT: br i1 true, label [[E:%.*]], label [[FLOW2:%.*]] +; CHECK: Flow1: +; CHECK-NEXT: [[TMP0]] = phi i32 [ [[TMP6:%.*]], [[FLOW3:%.*]] ], [ undef, [[FLOW]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW3]] ], [ false, [[FLOW]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP8:%.*]], [[FLOW3]] ], [ true, [[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_EXIT_GUARD1:%.*]], label [[B]] +; CHECK: E: +; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW2]] +; CHECK: Flow2: +; CHECK-NEXT: br i1 [[PREDB_INV]], label [[F:%.*]], label [[FLOW3]] +; CHECK: F: +; CHECK-NEXT: [[INNER2:%.*]] = add i32 [[INNER1]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp uge i32 [[INNER2]], 20 +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: G: +; CHECK-NEXT: [[OUTER2:%.*]] = add i32 [[OUTER1]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp uge i32 [[OUTER2]], 10 +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: H: +; CHECK-NEXT: unreachable +; CHECK: I: +; CHECK-NEXT: ret void +; CHECK: Flow4: +; CHECK-NEXT: [[TMP3]] = phi i32 [ [[OUTER2]], [[G:%.*]] ], [ undef, [[LOOP_EXIT_GUARD1]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, [[G]] ], [ false, [[LOOP_EXIT_GUARD1]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[CMP2]], [[G]] ], [ true, [[LOOP_EXIT_GUARD1]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_EXIT_GUARD:%.*]], label [[A]] +; CHECK: loop.exit.guard: +; CHECK-NEXT: br i1 [[TMP4]], label [[I]], label [[H:%.*]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP6]] = phi i32 [ [[INNER2]], [[F]] ], [ undef, [[FLOW2]] ] +; CHECK-NEXT: [[TMP7]] = phi i1 [ true, [[F]] ], [ false, [[FLOW2]] ] +; CHECK-NEXT: [[TMP8]] = phi i1 [ [[CMP1]], [[F]] ], [ true, [[FLOW2]] ] +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: loop.exit.guard1: +; CHECK-NEXT: br i1 [[TMP1]], label [[G]], label [[FLOW4]] +; +entry: + br i1 %PredEntry, label %A, label %I + +A: + %outer1 = phi i32 [ 0, %entry ], [ %outer2, %G ] + br label %B + +B: + %inner1 = phi i32 [ 0, %A ], [ %inner2, %F ] + br i1 %PredA, label %D, label %C + +C: + tail call fastcc void @check(i32 1) #0 + br label %loop.exit.guard1 + +D: + br i1 %PredB, label %E, label %F + +E: + tail call fastcc void @check(i32 2) #0 + br label %loop.exit.guard1 + +F: + %inner2 = add i32 %inner1, 1 + %cmp1 = icmp ult i32 %inner2, 20 + br i1 %cmp1, label %B, label %loop.exit.guard1 + +G: + %outer2 = add i32 %outer1, 1 + %cmp2 = icmp ult i32 %outer2, 10 + br i1 %cmp2, label %A, label %loop.exit.guard + +H: + unreachable + +I: + ret void + +loop.exit.guard: + %Guard.I = phi i1 [ true, %G ], [ %Guard.I.moved, %loop.exit.guard1 ] + br i1 %Guard.I, label %I, label %H + +loop.exit.guard1: + %Guard.G = phi i1 [ true, %F ], [ false, %C ], [ false, %E ] + %Guard.I.moved = phi i1 [ undef, %F ], [ false, %C ], [ false, %E ] + br i1 %Guard.G, label %G, label %loop.exit.guard +} + +; Test when the common successor is not unreachable. + +define void @common_exit(i1 %PredEntry, i1 %PredA, i1 %PredB) { +; CHECK-LABEL: @common_exit( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PREDB_INV:%.*]] = xor i1 [[PREDB:%.*]], true +; CHECK-NEXT: [[PREDA_INV:%.*]] = xor i1 [[PREDA:%.*]], true +; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[G:%.*]] +; CHECK: A: +; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[TMP4:%.*]], [[FLOW:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br i1 [[PREDA_INV]], label [[C:%.*]], label [[FLOW]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, [[D:%.*]] ], [ false, [[FLOW2:%.*]] ] +; CHECK-NEXT: br i1 [[TMP6:%.*]], label [[B:%.*]], label [[FLOW4:%.*]] +; CHECK: B: +; CHECK-NEXT: tail call fastcc void @check1(i32 1) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: C: +; CHECK-NEXT: br i1 [[PREDB_INV]], label [[E:%.*]], label [[FLOW1:%.*]] +; CHECK: Flow1: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[INC2:%.*]], [[E]] ], [ undef, [[C]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[CMP:%.*]], [[E]] ], [ true, [[C]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ false, [[E]] ], [ true, [[C]] ] +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow2: +; CHECK-NEXT: br i1 [[TMP7:%.*]], label [[D]], label [[FLOW3:%.*]] +; CHECK: D: +; CHECK-NEXT: tail call fastcc void @check1(i32 2) #[[ATTR1]] +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP1]], [[FLOW1]] ], [ undef, [[A]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ true, [[A]] ] +; CHECK-NEXT: [[TMP6]] = phi i1 [ false, [[FLOW1]] ], [ true, [[A]] ] +; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP3]], [[FLOW1]] ], [ false, [[A]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[FLOW2]], label [[A]] +; CHECK: E: +; CHECK-NEXT: [[INC2]] = add i32 [[INC1]], 1 +; CHECK-NEXT: [[CMP]] = icmp uge i32 [[INC2]], 10 +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ true, [[B]] ], [ [[TMP0]], [[FLOW3]] ] +; CHECK-NEXT: br i1 [[TMP8]], label [[F:%.*]], label [[FLOW5:%.*]] +; CHECK: F: +; CHECK-NEXT: br label [[FLOW5]] +; CHECK: Flow5: +; CHECK-NEXT: br label [[G]] +; CHECK: G: +; CHECK-NEXT: ret void +; +entry: + br i1 %PredEntry, label %A, label %G + +A: + %inc1 = phi i32 [ 0, %entry ], [ %inc2, %E ] + br i1 %PredA, label %B, label %C + +B: + tail call fastcc void @check1(i32 1) #1 + br label %F + +C: + br i1 %PredB, label %D, label %E + +D: + tail call fastcc void @check1(i32 2) #1 + br label %F + +E: + %inc2 = add i32 %inc1, 1 + %cmp = icmp ult i32 %inc2, 10 + br i1 %cmp, label %A, label %G + +F: + br label %G + +G: + ret void +} + +; Test when there are multiple common successor blocks. + +define void @multiple_common_successors(i1 %PredEntry, i1 %PredA, i1 %PredB, i1 %PredC, i1 %PredD) { +; CHECK-LABEL: @multiple_common_successors( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PREDD_INV:%.*]] = xor i1 [[PREDD:%.*]], true +; CHECK-NEXT: [[PREDC_INV:%.*]] = xor i1 [[PREDC:%.*]], true +; CHECK-NEXT: [[PREDB_INV:%.*]] = xor i1 [[PREDB:%.*]], true +; CHECK-NEXT: [[PREDA_INV:%.*]] = xor i1 [[PREDA:%.*]], true +; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[FLOW11:%.*]] +; CHECK: A: +; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[TMP0:%.*]], [[FLOW4:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br i1 true, label [[B:%.*]], label [[FLOW3:%.*]] +; CHECK: B: +; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW3]] +; CHECK: Flow3: +; CHECK-NEXT: br i1 [[PREDA_INV]], label [[C:%.*]], label [[FLOW4]] +; CHECK: C: +; CHECK-NEXT: br i1 true, label [[D:%.*]], label [[FLOW5:%.*]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP0]] = phi i32 [ [[TMP4:%.*]], [[FLOW6:%.*]] ], [ undef, [[FLOW3]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP5:%.*]], [[FLOW6]] ], [ true, [[FLOW3]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP6:%.*]], [[FLOW6]] ], [ false, [[FLOW3]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW6]] ], [ true, [[FLOW3]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_EXIT_GUARD:%.*]], label [[A]] +; CHECK: D: +; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW5]] +; CHECK: Flow5: +; CHECK-NEXT: br i1 [[PREDB_INV]], label [[E:%.*]], label [[FLOW6]] +; CHECK: E: +; CHECK-NEXT: br i1 true, label [[F:%.*]], label [[FLOW7:%.*]] +; CHECK: Flow6: +; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP9:%.*]], [[FLOW8:%.*]] ], [ undef, [[FLOW5]] ] +; CHECK-NEXT: [[TMP5]] = phi i1 [ [[TMP10:%.*]], [[FLOW8]] ], [ true, [[FLOW5]] ] +; CHECK-NEXT: [[TMP6]] = phi i1 [ [[TMP11:%.*]], [[FLOW8]] ], [ false, [[FLOW5]] ] +; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP12:%.*]], [[FLOW8]] ], [ true, [[FLOW5]] ] +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: F: +; CHECK-NEXT: tail call fastcc void @check(i32 3) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW7]] +; CHECK: Flow7: +; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ false, [[F]] ], [ true, [[E]] ] +; CHECK-NEXT: br i1 [[PREDC_INV]], label [[G:%.*]], label [[FLOW8]] +; CHECK: G: +; CHECK-NEXT: br i1 true, label [[H:%.*]], label [[FLOW9:%.*]] +; CHECK: Flow8: +; CHECK-NEXT: [[TMP9]] = phi i32 [ [[TMP15:%.*]], [[FLOW10:%.*]] ], [ undef, [[FLOW7]] ] +; CHECK-NEXT: [[TMP10]] = phi i1 [ [[TMP16:%.*]], [[FLOW10]] ], [ [[TMP8]], [[FLOW7]] ] +; CHECK-NEXT: [[TMP11]] = phi i1 [ [[TMP17:%.*]], [[FLOW10]] ], [ false, [[FLOW7]] ] +; CHECK-NEXT: [[TMP12]] = phi i1 [ [[TMP18:%.*]], [[FLOW10]] ], [ true, [[FLOW7]] ] +; CHECK-NEXT: br label [[FLOW6]] +; CHECK: H: +; CHECK-NEXT: tail call fastcc void @check(i32 4) #[[ATTR0]] +; CHECK-NEXT: br label [[FLOW9]] +; CHECK: Flow9: +; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ false, [[H]] ], [ [[TMP8]], [[G]] ] +; CHECK-NEXT: br i1 [[PREDD_INV]], label [[I:%.*]], label [[FLOW10]] +; CHECK: I: +; CHECK-NEXT: [[INC2:%.*]] = add i32 [[INC1]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[INC2]], 10 +; CHECK-NEXT: br label [[FLOW10]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP14:%.*]] = phi i1 [ false, [[K:%.*]] ], [ true, [[LOOP_EXIT_GUARD1:%.*]] ] +; CHECK-NEXT: br i1 [[TMP14]], label [[J:%.*]], label [[FLOW1:%.*]] +; CHECK: J: +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: K: +; CHECK-NEXT: br label [[FLOW:%.*]] +; CHECK: Flow1: +; CHECK-NEXT: br label [[FLOW2:%.*]] +; CHECK: Flow2: +; CHECK-NEXT: br label [[FLOW11]] +; CHECK: L: +; CHECK-NEXT: ret void +; CHECK: Flow10: +; CHECK-NEXT: [[TMP15]] = phi i32 [ [[INC2]], [[I]] ], [ undef, [[FLOW9]] ] +; CHECK-NEXT: [[TMP16]] = phi i1 [ false, [[I]] ], [ [[TMP13]], [[FLOW9]] ] +; CHECK-NEXT: [[TMP17]] = phi i1 [ true, [[I]] ], [ false, [[FLOW9]] ] +; CHECK-NEXT: [[TMP18]] = phi i1 [ [[CMP]], [[I]] ], [ true, [[FLOW9]] ] +; CHECK-NEXT: br label [[FLOW8]] +; CHECK: Flow11: +; CHECK-NEXT: br label [[L:%.*]] +; CHECK: loop.exit.guard: +; CHECK-NEXT: [[GUARD_L_INV:%.*]] = xor i1 [[TMP2]], true +; CHECK-NEXT: [[GUARD_J_INV:%.*]] = xor i1 [[TMP1]], true +; CHECK-NEXT: br i1 [[GUARD_L_INV]], label [[LOOP_EXIT_GUARD1]], label [[FLOW2]] +; CHECK: loop.exit.guard1: +; CHECK-NEXT: br i1 [[GUARD_J_INV]], label [[K]], label [[FLOW]] +; +entry: + br i1 %PredEntry, label %A, label %L + +A: + %inc1 = phi i32 [ 0, %entry ], [ %inc2, %I ] + br i1 %PredA, label %B, label %C + +B: + tail call fastcc void @check(i32 1) #0 + br label %loop.exit.guard + +C: + br i1 %PredB, label %D, label %E + +D: + tail call fastcc void @check(i32 2) #0 + br label %loop.exit.guard + +E: + br i1 %PredC, label %F, label %G + +F: + tail call fastcc void @check(i32 3) #0 + br label %loop.exit.guard + +G: + br i1 %PredD, label %H, label %I + +H: + tail call fastcc void @check(i32 4) #0 + br label %loop.exit.guard + +I: + %inc2 = add i32 %inc1, 1 + %cmp = icmp ult i32 %inc2, 10 + br i1 %cmp, label %A, label %loop.exit.guard + +J: + br label %L + +K: + br label %L + +L: + ret void + +loop.exit.guard: + %Guard.L = phi i1 [ true, %I ], [ false, %B ], [ false, %D ], [ false, %F ], [ false, %H ] + %Guard.J = phi i1 [ false, %I ], [ true, %B ], [ true, %D ], [ false, %F ], [ false, %H ] + br i1 %Guard.L, label %L, label %loop.exit.guard1 + +loop.exit.guard1: + br i1 %Guard.J, label %J, label %K +} + +declare void @check(i32 noundef) #0 +declare void @check1(i32 noundef) #1 + +attributes #0 = { noreturn nounwind } +attributes #1 = { nounwind } + + +; When there are two blocks that can be moved closer to the predecessor, +; maintain the original, relative order since swapping the order shouldn't +; provide any benefit. + +define void @same_predecessor(i1 %PredA) { +; CHECK-LABEL: @same_predecessor( +; CHECK-NEXT: A: +; CHECK-NEXT: [[PREDA_INV:%.*]] = xor i1 [[PREDA:%.*]], true +; CHECK-NEXT: br i1 [[PREDA_INV]], label [[B:%.*]], label [[FLOW:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[B]] ], [ true, [[A:%.*]] ] +; CHECK-NEXT: br i1 [[TMP0]], label [[C:%.*]], label [[D:%.*]] +; CHECK: C: +; CHECK-NEXT: br label [[D]] +; CHECK: D: +; CHECK-NEXT: ret void +; +A: + br i1 %PredA, label %C, label %B + +B: + br label %D + +C: + br label %D + +D: + ret void +} + + Index: llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll =================================================================== --- llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll +++ llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll @@ -13,91 +13,90 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B3_INV:%.*]] = xor i1 [[B3:%.*]], true ; CHECK-NEXT: br label [[OUTER_LOOP_HEADER:%.*]] -; CHECK: Flow12: -; CHECK-NEXT: br i1 [[TMP3:%.*]], label [[EXIT_TRUE:%.*]], label [[FLOW13:%.*]] ; CHECK: exit.true: -; CHECK-NEXT: br label [[FLOW13]] +; CHECK-NEXT: br label [[FLOW3:%.*]] ; CHECK: Flow13: ; CHECK-NEXT: br i1 [[TMP2:%.*]], label [[NEWDEFAULT:%.*]], label [[FLOW14:%.*]] ; CHECK: NewDefault: ; CHECK-NEXT: br label [[EXIT_FALSE:%.*]] ; CHECK: Flow14: -; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[EXIT_FALSE]] ], [ true, [[FLOW13]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[EXIT_FALSE]] ], [ true, [[FLOW13:%.*]] ] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit.false: ; CHECK-NEXT: br label [[FLOW14]] ; CHECK: outer.loop.header: -; CHECK-NEXT: br i1 [[B1:%.*]], label [[OUTER_LOOP_BODY:%.*]], label [[FLOW3:%.*]] +; CHECK-NEXT: br i1 true, label [[EXIT_TRUE:%.*]], label [[FLOW3]] +; CHECK: Flow3: +; CHECK-NEXT: br i1 [[B1:%.*]], label [[OUTER_LOOP_BODY:%.*]], label [[FLOW4:%.*]] ; CHECK: outer.loop.body: ; CHECK-NEXT: br label [[INNER_LOOP_HEADER:%.*]] -; CHECK: Flow3: -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP16:%.*]], [[FLOW11:%.*]] ], [ true, [[OUTER_LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP2]] = phi i1 [ [[TMP12:%.*]], [[FLOW11]] ], [ false, [[OUTER_LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP3]] = phi i1 [ false, [[FLOW11]] ], [ true, [[OUTER_LOOP_HEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[FLOW12:%.*]], label [[OUTER_LOOP_HEADER]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP15:%.*]], [[FLOW12:%.*]] ], [ true, [[FLOW3]] ] +; CHECK-NEXT: [[TMP2]] = phi i1 [ [[TMP11:%.*]], [[FLOW12]] ], [ false, [[FLOW3]] ] +; CHECK-NEXT: br i1 [[TMP1]], label [[FLOW13]], label [[OUTER_LOOP_HEADER]] ; CHECK: inner.loop.header: -; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP8:%.*]], [[FLOW4:%.*]] ], [ false, [[OUTER_LOOP_BODY]] ] -; CHECK-NEXT: br i1 [[B2:%.*]], label [[INNER_LOOP_BODY:%.*]], label [[FLOW4]] -; CHECK: Flow6: -; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[INNER_LOOP_LATCH:%.*]] ], [ true, [[LEAFBLOCK:%.*]] ] -; CHECK-NEXT: br label [[FLOW5:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW5:%.*]] ], [ false, [[OUTER_LOOP_BODY]] ] +; CHECK-NEXT: br i1 [[B2:%.*]], label [[INNER_LOOP_BODY:%.*]], label [[FLOW5]] ; CHECK: Flow7: -; CHECK-NEXT: br i1 [[TMP10:%.*]], label [[INNER_LOOP_END:%.*]], label [[FLOW8:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[INNER_LOOP_LATCH:%.*]] ], [ true, [[LEAFBLOCK:%.*]] ] +; CHECK-NEXT: br label [[FLOW6:%.*]] +; CHECK: Flow8: +; CHECK-NEXT: br i1 [[TMP9:%.*]], label [[INNER_LOOP_END:%.*]], label [[FLOW9:%.*]] ; CHECK: inner.loop.end: -; CHECK-NEXT: br label [[FLOW8]] +; CHECK-NEXT: br label [[FLOW9]] ; CHECK: inner.loop.body: ; CHECK-NEXT: br i1 [[B3_INV]], label [[INNER_LOOP_BODY_ELSE:%.*]], label [[FLOW:%.*]] ; CHECK: inner.loop.body.else: ; CHECK-NEXT: br label [[FLOW]] ; CHECK: Flow: -; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[INNER_LOOP_BODY_ELSE]] ], [ true, [[INNER_LOOP_BODY]] ] -; CHECK-NEXT: br i1 [[TMP6]], label [[INNER_LOOP_BODY_THEN:%.*]], label [[INNER_LOOP_COND:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[INNER_LOOP_BODY_ELSE]] ], [ true, [[INNER_LOOP_BODY]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[INNER_LOOP_BODY_THEN:%.*]], label [[INNER_LOOP_COND:%.*]] ; CHECK: inner.loop.body.then: ; CHECK-NEXT: br label [[INNER_LOOP_COND]] -; CHECK: Flow4: -; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[TMP17:%.*]], [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP18:%.*]], [[FLOW5]] ], [ [[TMP4]], [[INNER_LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[TMP19:%.*]], [[FLOW5]] ], [ false, [[INNER_LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP10]] = phi i1 [ false, [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ] -; CHECK-NEXT: br i1 [[TMP7]], label [[FLOW7:%.*]], label [[INNER_LOOP_HEADER]] +; CHECK: Flow5: +; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ [[TMP16:%.*]], [[FLOW6]] ], [ true, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP17:%.*]], [[FLOW6]] ], [ [[TMP3]], [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ [[TMP18:%.*]], [[FLOW6]] ], [ false, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP9]] = phi i1 [ false, [[FLOW6]] ], [ true, [[INNER_LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP6]], label [[FLOW8:%.*]], label [[INNER_LOOP_HEADER]] ; CHECK: inner.loop.cond: ; CHECK-NEXT: br label [[NODEBLOCK:%.*]] ; CHECK: NodeBlock: ; CHECK-NEXT: [[PIVOT:%.*]] = icmp slt i32 [[X:%.*]], 1 -; CHECK-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK]], label [[FLOW5]] -; CHECK: Flow8: -; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ true, [[INNER_LOOP_END]] ], [ false, [[FLOW7]] ] -; CHECK-NEXT: br i1 [[TMP9]], label [[LEAFBLOCK1:%.*]], label [[FLOW9:%.*]] +; CHECK-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK]], label [[FLOW6]] +; CHECK: Flow9: +; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ true, [[INNER_LOOP_END]] ], [ false, [[FLOW8]] ] +; CHECK-NEXT: br i1 [[TMP8]], label [[LEAFBLOCK1:%.*]], label [[FLOW10:%.*]] ; CHECK: LeafBlock1: ; CHECK-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[X]], 1 -; CHECK-NEXT: br i1 [[SWITCHLEAF2]], label [[INNER_LOOP_BREAK:%.*]], label [[FLOW10:%.*]] +; CHECK-NEXT: br i1 [[SWITCHLEAF2]], label [[INNER_LOOP_BREAK:%.*]], label [[FLOW11:%.*]] ; CHECK: LeafBlock: ; CHECK-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: br i1 [[SWITCHLEAF]], label [[INNER_LOOP_LATCH]], label [[FLOW6:%.*]] -; CHECK: Flow9: -; CHECK-NEXT: [[TMP12]] = phi i1 [ [[TMP14:%.*]], [[FLOW10]] ], [ [[TMP8]], [[FLOW8]] ] -; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ [[TMP15:%.*]], [[FLOW10]] ], [ [[TMP11]], [[FLOW8]] ] -; CHECK-NEXT: br i1 [[TMP13]], label [[OUTER_LOOP_CLEANUP:%.*]], label [[FLOW11]] +; CHECK-NEXT: br i1 [[SWITCHLEAF]], label [[INNER_LOOP_LATCH]], label [[FLOW7:%.*]] +; CHECK: Flow10: +; CHECK-NEXT: [[TMP11]] = phi i1 [ [[TMP13:%.*]], [[FLOW11]] ], [ [[TMP7]], [[FLOW9]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ [[TMP14:%.*]], [[FLOW11]] ], [ [[TMP10]], [[FLOW9]] ] +; CHECK-NEXT: br i1 [[TMP12]], label [[OUTER_LOOP_CLEANUP:%.*]], label [[FLOW12]] ; CHECK: inner.loop.break: +; CHECK-NEXT: br label [[FLOW11]] +; CHECK: Flow11: +; CHECK-NEXT: [[TMP13]] = phi i1 [ false, [[INNER_LOOP_BREAK]] ], [ true, [[LEAFBLOCK1]] ] +; CHECK-NEXT: [[TMP14]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP10]], [[LEAFBLOCK1]] ] ; CHECK-NEXT: br label [[FLOW10]] -; CHECK: Flow10: -; CHECK-NEXT: [[TMP14]] = phi i1 [ false, [[INNER_LOOP_BREAK]] ], [ true, [[LEAFBLOCK1]] ] -; CHECK-NEXT: [[TMP15]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP11]], [[LEAFBLOCK1]] ] -; CHECK-NEXT: br label [[FLOW9]] ; CHECK: outer.loop.cleanup: ; CHECK-NEXT: br label [[OUTER_LOOP_LATCH:%.*]] -; CHECK: Flow11: -; CHECK-NEXT: [[TMP16]] = phi i1 [ false, [[OUTER_LOOP_LATCH]] ], [ true, [[FLOW9]] ] -; CHECK-NEXT: br label [[FLOW3]] -; CHECK: outer.loop.latch: -; CHECK-NEXT: br label [[FLOW11]] -; CHECK: Flow5: -; CHECK-NEXT: [[TMP17]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ true, [[NODEBLOCK]] ] -; CHECK-NEXT: [[TMP18]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ [[TMP4]], [[NODEBLOCK]] ] -; CHECK-NEXT: [[TMP19]] = phi i1 [ false, [[FLOW6]] ], [ true, [[NODEBLOCK]] ] +; CHECK: Flow12: +; CHECK-NEXT: [[TMP15]] = phi i1 [ false, [[OUTER_LOOP_LATCH]] ], [ true, [[FLOW10]] ] ; CHECK-NEXT: br label [[FLOW4]] +; CHECK: outer.loop.latch: +; CHECK-NEXT: br label [[FLOW12]] +; CHECK: Flow6: +; CHECK-NEXT: [[TMP16]] = phi i1 [ [[TMP4]], [[FLOW7]] ], [ true, [[NODEBLOCK]] ] +; CHECK-NEXT: [[TMP17]] = phi i1 [ [[TMP4]], [[FLOW7]] ], [ [[TMP3]], [[NODEBLOCK]] ] +; CHECK-NEXT: [[TMP18]] = phi i1 [ false, [[FLOW7]] ], [ true, [[NODEBLOCK]] ] +; CHECK-NEXT: br label [[FLOW5]] ; CHECK: inner.loop.latch: -; CHECK-NEXT: br label [[FLOW6]] +; CHECK-NEXT: br label [[FLOW7]] ; CHECK: exit: ; CHECK-NEXT: ret i1 [[TMP0]] ; Index: llvm/test/Transforms/UnifyLoopExits/reduce_guards.ll =================================================================== --- llvm/test/Transforms/UnifyLoopExits/reduce_guards.ll +++ llvm/test/Transforms/UnifyLoopExits/reduce_guards.ll @@ -142,6 +142,111 @@ ret void } +; Test with 3 nested loops to show the case when we need to change the loop for +; the predecessors of the loop guard. Since UnifyLoopExits works from the outer +; loop to the inner loop, the predecessors are added to the outermost loop. +; When a guard block is added to the exit of the inner loop, the predecessor +; block loops need to change to the middle loop. + +define void @three_loops(i1 %PredEntry, i1 %PredA, i1 %PredB) { +; CHECK-LABEL: @three_loops( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[L1:%.*]], label [[H:%.*]] +; CHECK: L1: +; CHECK-NEXT: [[I1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I2:%.*]], [[F:%.*]] ] +; CHECK-NEXT: br label [[L2:%.*]] +; CHECK: L2: +; CHECK-NEXT: [[J1:%.*]] = phi i32 [ 0, [[L1]] ], [ [[J2:%.*]], [[E:%.*]] ] +; CHECK-NEXT: br label [[L3:%.*]] +; CHECK: L3: +; CHECK-NEXT: [[K1:%.*]] = phi i32 [ 0, [[L2]] ], [ [[K2:%.*]], [[D:%.*]] ] +; CHECK-NEXT: br i1 [[PREDA:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]] +; CHECK-NEXT: br label [[LOOP_EXIT_GUARD2:%.*]] +; CHECK: B: +; CHECK-NEXT: br i1 [[PREDB:%.*]], label [[C:%.*]], label [[D]] +; CHECK: C: +; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]] +; CHECK-NEXT: br label [[LOOP_EXIT_GUARD2]] +; CHECK: D: +; CHECK-NEXT: [[K2]] = add i32 [[K1]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[K2]], 20 +; CHECK-NEXT: br i1 [[CMP1]], label [[L3]], label [[LOOP_EXIT_GUARD2]] +; CHECK: E: +; CHECK-NEXT: [[J2]] = add i32 [[J1]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[J2]], 10 +; CHECK-NEXT: br i1 [[CMP2]], label [[L2]], label [[LOOP_EXIT_GUARD1:%.*]] +; CHECK: F: +; CHECK-NEXT: [[I2]] = add i32 [[I1]], 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i32 [[I2]], 5 +; CHECK-NEXT: br i1 [[CMP3]], label [[L1]], label [[LOOP_EXIT_GUARD:%.*]] +; CHECK: G: +; CHECK-NEXT: unreachable +; CHECK: H: +; CHECK-NEXT: ret void +; CHECK: loop.exit.guard: +; CHECK-NEXT: [[GUARD_H:%.*]] = phi i1 [ true, [[F]] ], [ [[GUARD_H_MOVED:%.*]], [[LOOP_EXIT_GUARD1]] ] +; CHECK-NEXT: br i1 [[GUARD_H]], label [[H]], label [[G:%.*]] +; CHECK: loop.exit.guard1: +; CHECK-NEXT: [[GUARD_F:%.*]] = phi i1 [ true, [[E]] ], [ [[GUARD_F_MOVED:%.*]], [[LOOP_EXIT_GUARD2]] ] +; CHECK-NEXT: [[GUARD_H_MOVED]] = phi i1 [ undef, [[E]] ], [ [[GUARD_H_MOVED_MOVED:%.*]], [[LOOP_EXIT_GUARD2]] ] +; CHECK-NEXT: br i1 [[GUARD_F]], label [[F]], label [[LOOP_EXIT_GUARD]] +; CHECK: loop.exit.guard2: +; CHECK-NEXT: [[GUARD_E:%.*]] = phi i1 [ true, [[D]] ], [ false, [[A]] ], [ false, [[C]] ] +; CHECK-NEXT: [[GUARD_F_MOVED]] = phi i1 [ undef, [[D]] ], [ false, [[A]] ], [ false, [[C]] ] +; CHECK-NEXT: [[GUARD_H_MOVED_MOVED]] = phi i1 [ undef, [[D]] ], [ false, [[A]] ], [ false, [[C]] ] +; CHECK-NEXT: br i1 [[GUARD_E]], label [[E]], label [[LOOP_EXIT_GUARD1]] +; +entry: + br i1 %PredEntry, label %L1, label %H + +L1: + %i1 = phi i32 [ 0, %entry ], [ %i2, %F ] + br label %L2 + +L2: + %j1 = phi i32 [ 0, %L1 ], [ %j2, %E ] + br label %L3 + +L3: + %k1 = phi i32 [ 0, %L2 ], [ %k2, %D ] + br i1 %PredA, label %A, label %B + +A: + tail call fastcc void @check(i32 1) #0 + br label %G + +B: + br i1 %PredB, label %C, label %D + +C: + tail call fastcc void @check(i32 2) #0 + br label %G + +D: + %k2 = add i32 %k1, 1 + %cmp1 = icmp ult i32 %k2, 20 + br i1 %cmp1, label %L3, label %E + +E: + %j2 = add i32 %j1, 1 + %cmp2 = icmp ult i32 %j2, 10 + br i1 %cmp2, label %L2, label %F + +F: + %i2 = add i32 %i1, 1 + %cmp3 = icmp ult i32 %i2, 5 + br i1 %cmp3, label %L1, label %H + +G: + unreachable + +H: + ret void +} + + ; The common successor does not have to be an unreachable block. define void @common_exit(i1 %PredEntry, i1 %PredA, i1 %PredB) {