Index: llvm/lib/Transforms/Utils/LoopSimplify.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -250,22 +250,67 @@ BasicBlock *Header = L->getHeader(); assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); + SmallVector OuterLoopPreds; PHINode *PN = findPHIToPartitionLoops(L, DT, AC); - if (!PN) return nullptr; // No known way to partition. - - // Pull out all predecessors that have varying values in the loop. This - // handles the case when a PHI node has multiple instances of itself as - // arguments. - SmallVector OuterLoopPreds; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - if (PN->getIncomingValue(i) != PN || - !L->contains(PN->getIncomingBlock(i))) { + if (PN) { + // Pull out all predecessors that have varying values in the loop. This + // handles the case when a PHI node has multiple instances of itself as + // arguments. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingValue(i) != PN || + !L->contains(PN->getIncomingBlock(i))) { + // We can't split indirect control flow edges. + if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator()) + return nullptr; + OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + } + } + } else { + // Find predecessors of loop headers for outer loop using dominator tree. If + // a latch dominates other latches, it can be the latch for inner loop. The + // rest of predecessors are for outer loop. + BasicBlock *InnerLoopPred = nullptr; + for (auto *Pred : predecessors(Header)) { // We can't split indirect control flow edges. - if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator()) + if (Pred->getTerminator()->isIndirectTerminator()) return nullptr; - OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + + // Check the loop header's predecessor which is outside loop. + if (!L->contains(Pred)) { + OuterLoopPreds.push_back(Pred); + continue; + } + + // Initialize InnerLoopPred + if (!InnerLoopPred) { + InnerLoopPred = Pred; + continue; + } + + // There could be same predecessors from switch instruction. Ignore the + // case. + if (Pred == InnerLoopPred) + continue; + + // Check dominance relation between latches and update InnerLoopPred. + if (DT->dominates(InnerLoopPred, Pred)) { + OuterLoopPreds.push_back(Pred); + } else if (DT->dominates(Pred, InnerLoopPred)) { + OuterLoopPreds.push_back(InnerLoopPred); + InnerLoopPred = Pred; + } else { + // If there is latch which has no dominance relation with other latches, + // do not conver the loop. + return nullptr; + } } } + + // The number of predecessors of outer loop header should be more than 2 at + // least. one is preheader and other is latch. + if (OuterLoopPreds.size() < 2) + return nullptr; + LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); // If ScalarEvolution is around and knows anything about values in Index: llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll +++ llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -19,9 +19,9 @@ do.body.i: ; CHECK-LABEL: do.body.i: -; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 -; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* -; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 +; CHECK: %uglygep1 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NEXT: %4 = bitcast i8* %uglygep1 to i32* +; CHECK-NOT: %uglygep1 = getelementptr i8, i8* %uglygep, i64 1032 %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] Index: llvm/test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -221,6 +221,13 @@ ; GCN: s_andn2_b64 ; GCN-NEXT: s_cbranch_execz +; bb1 has multiple latches and it is converted to a nested loop with +; LoopSimplify. Below block is outcome from it. +; GCN: [[BB1_MULTI_LATCHES_OUTER_LOOP:BB[0-9]+_[0-9]+]]: +; GCN: s_and_b64 +; GCN: s_or_b64 +; GCN: s_mov_b64 + ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: ; GCN: s_andn2_b64 exec, exec, ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] Index: llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll =================================================================== --- llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll +++ llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll @@ -82,23 +82,23 @@ ; ENABLE-NEXT: bhi .LBB0_7 ; ENABLE-NEXT: @ %bb.14: @ %while.body24.preheader ; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 -; ENABLE-NEXT: sub r3, r3, #2 +; ENABLE-NEXT: sub lr, r3, #2 ; ENABLE-NEXT: .LBB0_15: @ %while.body24 ; ENABLE-NEXT: @ Parent Loop BB0_7 Depth=1 ; ENABLE-NEXT: @ => This Inner Loop Header: Depth=2 -; ENABLE-NEXT: mov r0, r3 -; ENABLE-NEXT: cmp r3, r2 +; ENABLE-NEXT: mov r0, lr +; ENABLE-NEXT: cmp lr, r2 ; ENABLE-NEXT: bls .LBB0_7 ; ENABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge ; ENABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 -; ENABLE-NEXT: mov r3, r0 -; ENABLE-NEXT: ldrsb lr, [r3], #-1 -; ENABLE-NEXT: cmn lr, #1 -; ENABLE-NEXT: uxtb r12, lr +; ENABLE-NEXT: mov lr, r0 +; ENABLE-NEXT: ldrsb r12, [lr], #-1 +; ENABLE-NEXT: cmn r12, #1 ; ENABLE-NEXT: bgt .LBB0_7 ; ENABLE-NEXT: @ %bb.17: @ %while.body24.land.rhs14_crit_edge ; ENABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 -; ENABLE-NEXT: cmp r12, #192 +; ENABLE-NEXT: uxtb r3, r12 +; ENABLE-NEXT: cmp r3, #192 ; ENABLE-NEXT: blo .LBB0_15 ; ENABLE-NEXT: b .LBB0_7 ; ENABLE-NEXT: .LBB0_18: @@ -172,23 +172,23 @@ ; DISABLE-NEXT: bhi .LBB0_7 ; DISABLE-NEXT: @ %bb.14: @ %while.body24.preheader ; DISABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 -; DISABLE-NEXT: sub r3, r3, #2 +; DISABLE-NEXT: sub lr, r3, #2 ; DISABLE-NEXT: .LBB0_15: @ %while.body24 ; DISABLE-NEXT: @ Parent Loop BB0_7 Depth=1 ; DISABLE-NEXT: @ => This Inner Loop Header: Depth=2 -; DISABLE-NEXT: mov r0, r3 -; DISABLE-NEXT: cmp r3, r2 +; DISABLE-NEXT: mov r0, lr +; DISABLE-NEXT: cmp lr, r2 ; DISABLE-NEXT: bls .LBB0_7 ; DISABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge ; DISABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 -; DISABLE-NEXT: mov r3, r0 -; DISABLE-NEXT: ldrsb lr, [r3], #-1 -; DISABLE-NEXT: cmn lr, #1 -; DISABLE-NEXT: uxtb r12, lr +; DISABLE-NEXT: mov lr, r0 +; DISABLE-NEXT: ldrsb r12, [lr], #-1 +; DISABLE-NEXT: cmn r12, #1 ; DISABLE-NEXT: bgt .LBB0_7 ; DISABLE-NEXT: @ %bb.17: @ %while.body24.land.rhs14_crit_edge ; DISABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 -; DISABLE-NEXT: cmp r12, #192 +; DISABLE-NEXT: uxtb r3, r12 +; DISABLE-NEXT: cmp r3, #192 ; DISABLE-NEXT: blo .LBB0_15 ; DISABLE-NEXT: b .LBB0_7 ; DISABLE-NEXT: .LBB0_18: Index: llvm/test/CodeGen/SystemZ/pr32372.ll =================================================================== --- llvm/test/CodeGen/SystemZ/pr32372.ll +++ llvm/test/CodeGen/SystemZ/pr32372.ll @@ -4,9 +4,11 @@ define void @pr32372(i8*) { ; CHECK-LABEL: pr32372: ; CHECK: # %bb.0: # %BB +; CHECK-NEXT: llc %r0, 0(%r2) ; CHECK-NEXT: mvhhi 0(%r1), -3825 ; CHECK-NEXT: .LBB0_1: # %CF251 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cijhe %r0, 0, .LBB0_1 ; CHECK-NEXT: j .LBB0_1 BB: %L = load i8, i8* %0 Index: llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll =================================================================== --- llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll +++ llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll @@ -22,42 +22,43 @@ ; CHECK-NEXT: jmp LBB0_2 ; CHECK-NEXT: LBB0_3: ## %bb13086.preheader ; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: jmp LBB0_4 ; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_6: ## %bb13101 +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: LBB0_7: ## %bb13107 +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: shll $16, %esi +; CHECK-NEXT: subl %edx, %esi +; CHECK-NEXT: incl %esi +; CHECK-NEXT: shrl $16, %esi +; CHECK-NEXT: subl %esi, %edx +; CHECK-NEXT: testw %dx, %dx +; CHECK-NEXT: jne LBB0_8 ; CHECK-NEXT: LBB0_4: ## %bb13088 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB0_5 -; CHECK-NEXT: ## %bb.6: ## %bb13101 -; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: je LBB0_6 +; CHECK-NEXT: ## %bb.5: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: movl $65535, %edx ## imm = 0xFFFF ; CHECK-NEXT: jmp LBB0_7 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_5: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: movl $65535, %ecx ## imm = 0xFFFF -; CHECK-NEXT: LBB0_7: ## %bb13107 +; CHECK-NEXT: LBB0_8: ## %bb13236 ; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: shll $16, %edx -; CHECK-NEXT: subl %ecx, %edx -; CHECK-NEXT: incl %edx -; CHECK-NEXT: shrl $16, %edx -; CHECK-NEXT: subl %edx, %ecx -; CHECK-NEXT: testw %cx, %cx +; CHECK-NEXT: cmpw $-1, %dx +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je LBB0_4 -; CHECK-NEXT: ## %bb.8: ## %bb13236 -; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: cmpw $-1, %cx -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB0_4 ; CHECK-NEXT: ## %bb.9: ## %bb13572 ; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: movzwl %cx, %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: shll $16, %edx -; CHECK-NEXT: subl %ecx, %edx -; CHECK-NEXT: incl %edx -; CHECK-NEXT: shrl $16, %edx -; CHECK-NEXT: movw %dx, 0 +; CHECK-NEXT: movzwl %dx, %edx +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: shll $16, %esi +; CHECK-NEXT: subl %edx, %esi +; CHECK-NEXT: incl %esi +; CHECK-NEXT: shrl $16, %esi +; CHECK-NEXT: movw %si, 0 ; CHECK-NEXT: jmp LBB0_4 ; CHECK-NEXT: LBB0_10: ## %return ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll =================================================================== --- llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts -; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "5 machinelicm" +; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "10 machinelicm" ; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s ; rdar://6627786 ; rdar://7792037 @@ -18,7 +18,7 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movq %rsi, %r14 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: orq $2097152, %r14 ## imm = 0x200000 +; CHECK-NEXT: orl $2097152, %r14d ## imm = 0x200000 ; CHECK-NEXT: andl $15728640, %r14d ## imm = 0xF00000 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_1: ## %bb4 Index: llvm/test/CodeGen/X86/bb_rotate.ll =================================================================== --- llvm/test/CodeGen/X86/bb_rotate.ll +++ llvm/test/CodeGen/X86/bb_rotate.ll @@ -1,16 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i686-linux < %s | FileCheck %s define i1 @no_viable_top_fallthrough() { -; CHECK-LABEL: no_viable_top_fallthrough -; CHECK: %.entry -; CHECK: %.bb1 -; CHECK: %.bb2 -; CHECK: %.middle -; CHECK: %.backedge -; CHECK: %.bb3 -; CHECK: %.header -; CHECK: %.exit -; CHECK: %.stop +; CHECK-LABEL: no_viable_top_fallthrough: +; CHECK: # %bb.0: # %.entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: # %bb.1: # %.bb1 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %.stop +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_6: # %.middle +; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_7 +; CHECK-NEXT: .LBB0_4: # %.header +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.5: # %.bb2 +; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: jne .LBB0_6 +; CHECK-NEXT: # %bb.8: # %.bb3 +; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .LBB0_7: # %.backedge +; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_2: # %.exit +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: jmp .LBB0_3 .entry: %val1 = call i1 @foo() br i1 %val1, label %.bb1, label %.header, !prof !10 Index: llvm/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll +++ llvm/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll @@ -8,17 +8,19 @@ define void @test() nounwind { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK-NEXT: br label [[WHILE_COND_OUTER_OUTER:%.*]] ; CHECK: while.cond.loopexit: -; CHECK-NEXT: br label [[WHILE_COND_BACKEDGE:%.*]] +; CHECK-NEXT: br label [[WHILE_COND_OUTER_OUTER]] +; CHECK: while.cond.outer.outer: +; CHECK-NEXT: br label [[WHILE_COND_OUTER:%.*]] +; CHECK: while.cond.outer: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: ; CHECK-NEXT: br i1 true, label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]] ; CHECK: while.body: -; CHECK-NEXT: br i1 undef, label [[IF_THEN165:%.*]], label [[WHILE_COND_BACKEDGE]] -; CHECK: while.cond.backedge: -; CHECK-NEXT: br label [[WHILE_COND]] +; CHECK-NEXT: br i1 true, label [[IF_THEN165:%.*]], label [[WHILE_COND]] ; CHECK: if.then165: -; CHECK-NEXT: br i1 undef, label [[WHILE_COND_BACKEDGE]], label [[FOR_BODY_LR_PH_I81:%.*]] +; CHECK-NEXT: br i1 false, label [[WHILE_COND_OUTER]], label [[FOR_BODY_LR_PH_I81:%.*]] ; CHECK: for.body.lr.ph.i81: ; CHECK-NEXT: br label [[FOR_BODY_I86:%.*]] ; CHECK: for.body.i86: Index: llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll +++ llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll @@ -64,16 +64,17 @@ ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND1:%.*]] = icmp eq i32* [[VAR:%.*]], null +; CHECK-NEXT: br label [[HEADER_OUTER:%.*]] +; CHECK: header.outer: +; CHECK-NEXT: [[PHI_INDVAR_PH:%.*]] = phi i32 [ [[INDVAR:%.*]], [[BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[PHI_INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR:%.*]], [[HEADER_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[PHI_INDVAR:%.*]] = phi i32 [ [[INDVAR]], [[HEADER]] ], [ [[PHI_INDVAR_PH]], [[HEADER_OUTER]] ] ; CHECK-NEXT: [[INDVAR]] = add i32 [[PHI_INDVAR]], 1 ; CHECK-NEXT: [[COND2:%.*]] = icmp eq i32 [[INDVAR]], 10 -; CHECK-NEXT: br i1 [[COND2]], label [[HEADER_BACKEDGE]], label [[BODY:%.*]] -; CHECK: header.backedge: -; CHECK-NEXT: br label [[HEADER]] +; CHECK-NEXT: br i1 [[COND2]], label [[HEADER]], label [[BODY]] ; CHECK: body: -; CHECK-NEXT: br i1 [[COND1]], label [[HEADER_BACKEDGE]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[COND1]], label [[HEADER_OUTER]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret i32 [[PHI_INDVAR]] ; Index: llvm/test/Transforms/LICM/hoist-mustexec.ll =================================================================== --- llvm/test/Transforms/LICM/hoist-mustexec.ll +++ llvm/test/Transforms/LICM/hoist-mustexec.ll @@ -274,12 +274,12 @@ ret i32 -1 } -; This works because loop-simplify is run implicitly, but test for it anyways +; After transforming loop with multiple latches to a nested loop from loop- +; simplify pass, CanProveNotTakenFirstIteration is failed becasue the iv phi's +; incoming value from preheader is related to outer loop's iv phi. define i32 @test-multiple-latch(i32* noalias nocapture readonly %a) nounwind uwtable { ; CHECK-LABEL: @test-multiple-latch( entry: -; CHECK: %i1 = load i32, i32* %a, align 4 -; CHECK-NEXT: br label %for.body br label %for.body for.body: Index: llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll =================================================================== --- llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll +++ llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll @@ -10,18 +10,19 @@ define i32 @dead_backedge_test_branch_loop(i32 %end) { ; CHECK-LABEL: @dead_backedge_test_branch_loop( ; CHECK-NEXT: preheader: +; CHECK-NEXT: br label [[HEADER_OUTER:%.*]] +; CHECK: header.outer: +; CHECK-NEXT: [[I_PH:%.*]] = phi i32 [ [[I_2:%.*]], [[DEAD_BACKEDGE:%.*]] ], [ 0, [[PREHEADER:%.*]] ] ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_BE:%.*]], [[HEADER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[I_1:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_1:%.*]], [[HEADER]] ], [ [[I_PH]], [[HEADER_OUTER]] ] +; CHECK-NEXT: [[I_1]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_1]], 100 -; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER_BACKEDGE]], label [[DEAD_BACKEDGE:%.*]] -; CHECK: header.backedge: -; CHECK-NEXT: [[I_BE]] = phi i32 [ [[I_1]], [[HEADER]] ], [ [[I_2:%.*]], [[DEAD_BACKEDGE]] ] -; CHECK-NEXT: br label [[HEADER]] +; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER]], label [[DEAD_BACKEDGE]] ; CHECK: dead_backedge: -; CHECK-NEXT: [[I_2]] = add i32 [[I_1]], 10 -; CHECK-NEXT: br i1 false, label [[HEADER_BACKEDGE]], label [[EXIT:%.*]] +; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i32 [ [[I_1]], [[HEADER]] ] +; CHECK-NEXT: [[I_2]] = add i32 [[I_1_LCSSA]], 10 +; CHECK-NEXT: br i1 false, label [[HEADER_OUTER]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: ret i32 [[I_2_LCSSA]] @@ -47,19 +48,20 @@ define i32 @dead_backedge_test_switch_loop(i32 %end) { ; CHECK-LABEL: @dead_backedge_test_switch_loop( ; CHECK-NEXT: preheader: +; CHECK-NEXT: br label [[HEADER_OUTER:%.*]] +; CHECK: header.outer: +; CHECK-NEXT: [[I_PH:%.*]] = phi i32 [ [[I_2:%.*]], [[DEAD_BACKEDGE:%.*]] ], [ 0, [[PREHEADER:%.*]] ] ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_BE:%.*]], [[HEADER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[I_1:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_1:%.*]], [[HEADER]] ], [ [[I_PH]], [[HEADER_OUTER]] ] +; CHECK-NEXT: [[I_1]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_1]], 100 -; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER_BACKEDGE]], label [[DEAD_BACKEDGE:%.*]] -; CHECK: header.backedge: -; CHECK-NEXT: [[I_BE]] = phi i32 [ [[I_1]], [[HEADER]] ], [ [[I_2:%.*]], [[DEAD_BACKEDGE]] ] -; CHECK-NEXT: br label [[HEADER]] +; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER]], label [[DEAD_BACKEDGE]] ; CHECK: dead_backedge: -; CHECK-NEXT: [[I_2]] = add i32 [[I_1]], 10 +; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i32 [ [[I_1]], [[HEADER]] ] +; CHECK-NEXT: [[I_2]] = add i32 [[I_1_LCSSA]], 10 ; CHECK-NEXT: switch i32 1, label [[EXIT:%.*]] [ -; CHECK-NEXT: i32 0, label [[HEADER_BACKEDGE]] +; CHECK-NEXT: i32 0, label [[HEADER_OUTER]] ; CHECK-NEXT: ] ; CHECK: exit: ; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ] @@ -2175,13 +2177,13 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: +; CHECK-NEXT: br label [[INTERMEDIATE_LOOP_OUTER:%.*]] +; CHECK: intermediate_loop.outer: ; CHECK-NEXT: br label [[INTERMEDIATE_LOOP:%.*]] ; CHECK: intermediate_loop: -; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE:%.*]], label [[INTERMEDIATE_BLOCK:%.*]] -; CHECK: intermediate_loop.backedge: -; CHECK-NEXT: br label [[INTERMEDIATE_LOOP]] +; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_BLOCK:%.*]] ; CHECK: intermediate_block: -; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_OUTER]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: ; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: @@ -2258,13 +2260,13 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: +; CHECK-NEXT: br label [[INTERMEDIATE_LOOP_OUTER:%.*]] +; CHECK: intermediate_loop.outer: ; CHECK-NEXT: br label [[INTERMEDIATE_LOOP:%.*]] ; CHECK: intermediate_loop: -; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE:%.*]], label [[INTERMEDIATE_BLOCK:%.*]] -; CHECK: intermediate_loop.backedge: -; CHECK-NEXT: br label [[INTERMEDIATE_LOOP]] +; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_BLOCK:%.*]] ; CHECK: intermediate_block: -; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_OUTER]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: ; CHECK-NEXT: switch i32 1, label [[LOOP_2_BACKEDGE]] [ ; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] @@ -2344,13 +2346,13 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: +; CHECK-NEXT: br label [[INTERMEDIATE_LOOP_OUTER:%.*]] +; CHECK: intermediate_loop.outer: ; CHECK-NEXT: br label [[INTERMEDIATE_LOOP:%.*]] ; CHECK: intermediate_loop: -; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE:%.*]], label [[INTERMEDIATE_BLOCK:%.*]] -; CHECK: intermediate_loop.backedge: -; CHECK-NEXT: br label [[INTERMEDIATE_LOOP]] +; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_BLOCK:%.*]] ; CHECK: intermediate_block: -; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_OUTER]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: ; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] ; CHECK: loop_3_backedge: @@ -2431,13 +2433,13 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: +; CHECK-NEXT: br label [[INTERMEDIATE_LOOP_OUTER:%.*]] +; CHECK: intermediate_loop.outer: ; CHECK-NEXT: br label [[INTERMEDIATE_LOOP:%.*]] ; CHECK: intermediate_loop: -; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE:%.*]], label [[INTERMEDIATE_BLOCK:%.*]] -; CHECK: intermediate_loop.backedge: -; CHECK-NEXT: br label [[INTERMEDIATE_LOOP]] +; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_BLOCK:%.*]] ; CHECK: intermediate_block: -; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_OUTER]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: ; CHECK-NEXT: switch i32 1, label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] [ ; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] Index: llvm/test/Transforms/LoopSimplifyCFG/update_parents.ll =================================================================== --- llvm/test/Transforms/LoopSimplifyCFG/update_parents.ll +++ llvm/test/Transforms/LoopSimplifyCFG/update_parents.ll @@ -16,14 +16,25 @@ ; CHECK: bb2.loopexit: ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: switch i32 0, label [[BB2_SPLIT:%.*]] [ -; CHECK-NEXT: i32 1, label [[BB1_LOOPEXIT:%.*]] -; CHECK-NEXT: i32 2, label [[BB2_LOOPEXIT:%.*]] +; CHECK-NEXT: br label [[BB3_OUTER:%.*]] +; CHECK: bb3.loopexit: +; CHECK-NEXT: br label [[BB3_OUTER]] +; CHECK: bb3.outer: +; CHECK-NEXT: switch i32 0, label [[BB3_OUTER_SPLIT:%.*]] [ +; CHECK-NEXT: i32 1, label [[BB4_PREHEADER:%.*]] ; CHECK-NEXT: ] -; CHECK: bb2.split: +; CHECK: bb3.outer.split: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: br label [[BB3]] +; CHECK: bb4.preheader: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: br i1 true, label [[BB1_LOOPEXIT:%.*]], label [[BB6:%.*]] +; CHECK: bb6: +; CHECK-NEXT: br i1 true, label [[BB2_LOOPEXIT:%.*]], label [[BB8:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br i1 true, label [[BB4]], label [[BB3_LOOPEXIT:%.*]] ; br label %bb1 @@ -57,14 +68,61 @@ ; CHECK: bb2.loopexit: ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: switch i32 0, label [[BB2_SPLIT:%.*]] [ -; CHECK-NEXT: i32 1, label [[BB1_LOOPEXIT:%.*]] -; CHECK-NEXT: i32 2, label [[BB2_LOOPEXIT:%.*]] +; CHECK-NEXT: br label [[BB3_OUTER:%.*]] +; CHECK: bb3.loopexit: +; CHECK-NEXT: br label [[BB3_OUTER]] +; CHECK: bb3.outer: +; CHECK-NEXT: switch i32 0, label [[BB3_OUTER_SPLIT:%.*]] [ +; CHECK-NEXT: i32 1, label [[BB4_PREHEADER:%.*]] ; CHECK-NEXT: ] -; CHECK: bb2.split: +; CHECK: bb3.outer.split: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: br label [[BB3]] +; CHECK: bb4.preheader: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: br i1 true, label [[BB1_LOOPEXIT:%.*]], label [[SUBLOOP1_PREHEADER:%.*]] +; CHECK: subloop1.preheader: +; CHECK-NEXT: br label [[SUBLOOP1:%.*]] +; CHECK: subloop1: +; CHECK-NEXT: br i1 [[C:%.*]], label [[SUBLOOP2_PREHEADER:%.*]], label [[SUBLOOP11_PREHEADER:%.*]] +; CHECK: subloop2.preheader: +; CHECK-NEXT: br label [[SUBLOOP2:%.*]] +; CHECK: subloop11.preheader: +; CHECK-NEXT: br label [[SUBLOOP11:%.*]] +; CHECK: subloop11: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP11]], label [[SUBLOOP12_PREHEADER:%.*]] +; CHECK: subloop12.preheader: +; CHECK-NEXT: br label [[SUBLOOP12:%.*]] +; CHECK: subloop12: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP12]], label [[SUBLOOP13_PREHEADER:%.*]] +; CHECK: subloop13.preheader: +; CHECK-NEXT: br label [[SUBLOOP13:%.*]] +; CHECK: subloop13: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP13]], label [[SUBLOOP1_LATCH:%.*]] +; CHECK: subloop1_latch: +; CHECK-NEXT: br label [[SUBLOOP1]] +; CHECK: subloop2: +; CHECK-NEXT: br i1 [[C]], label [[BB6:%.*]], label [[SUBLOOP21_PREHEADER:%.*]] +; CHECK: subloop21.preheader: +; CHECK-NEXT: br label [[SUBLOOP21:%.*]] +; CHECK: subloop21: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP21]], label [[SUBLOOP22_PREHEADER:%.*]] +; CHECK: subloop22.preheader: +; CHECK-NEXT: br label [[SUBLOOP22:%.*]] +; CHECK: subloop22: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP22]], label [[SUBLOOP23_PREHEADER:%.*]] +; CHECK: subloop23.preheader: +; CHECK-NEXT: br label [[SUBLOOP23:%.*]] +; CHECK: subloop23: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP23]], label [[SUBLOOP2_LATCH:%.*]] +; CHECK: subloop2_latch: +; CHECK-NEXT: br label [[SUBLOOP2]] +; CHECK: bb6: +; CHECK-NEXT: br i1 true, label [[BB2_LOOPEXIT:%.*]], label [[BB8:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br i1 true, label [[BB4]], label [[BB3_LOOPEXIT:%.*]] ; br label %bb1 Index: llvm/test/Transforms/LoopVectorize/loop-form.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/loop-form.ll +++ llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -885,44 +885,75 @@ } -; two back branches - loop simplify with convert this to the same form -; as previous before vectorizer sees it, but show that. +; loop-simplify pass transforms loop with multiple latches to a nested loop and +; the nested loop can be vectorized. define i32 @multiple_latch2(i16* %p) { ; CHECK-LABEL: @multiple_latch2( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY_OUTER:%.*]] +; CHECK: for.body.outer: +; CHECK-NEXT: [[I_02_PH:%.*]] = phi i32 [ [[INC_LCSSA:%.*]], [[FOR_SECOND:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[I_02_PH]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 16) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[I_02_PH]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[I_02_PH]], [[N_VEC]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I_02_PH]], [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: [[CAST_CMO:%.*]] = sub i32 [[N_VEC]], 1 +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = add i32 [[I_02_PH]], [[CAST_CMO]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_SECOND]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_02_PH]], [[FOR_BODY_OUTER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] -; CHECK: for.body.backedge: -; CHECK-NEXT: br label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_SECOND]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: for.second: -; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 +; CHECK-NEXT: [[I_02_LCSSA:%.*]] = phi i32 [ [[I_02]], [[FOR_BODY]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INC_LCSSA]] = phi i32 [ [[INC]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02_LCSSA]] to i64 ; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] ; CHECK-NEXT: store i16 0, i16* [[B]], align 4 -; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 -; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC_LCSSA]], 16 +; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_OUTER]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 0 ; ; TAILFOLD-LABEL: @multiple_latch2( ; TAILFOLD-NEXT: entry: +; TAILFOLD-NEXT: br label [[FOR_BODY_OUTER:%.*]] +; TAILFOLD: for.body.outer: +; TAILFOLD-NEXT: [[I_02_PH:%.*]] = phi i32 [ [[INC_LCSSA:%.*]], [[FOR_SECOND:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] ; TAILFOLD: for.body: -; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] +; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[I_02_PH]], [[FOR_BODY_OUTER]] ] ; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 ; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 -; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] -; TAILFOLD: for.body.backedge: -; TAILFOLD-NEXT: br label [[FOR_BODY]] +; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_SECOND]] ; TAILFOLD: for.second: -; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 +; TAILFOLD-NEXT: [[I_02_LCSSA:%.*]] = phi i32 [ [[I_02]], [[FOR_BODY]] ] +; TAILFOLD-NEXT: [[INC_LCSSA]] = phi i32 [ [[INC]], [[FOR_BODY]] ] +; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02_LCSSA]] to i64 ; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] ; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 -; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 -; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] +; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC_LCSSA]], 16 +; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_OUTER]], label [[FOR_END:%.*]] ; TAILFOLD: for.end: ; TAILFOLD-NEXT: ret i32 0 ; @@ -983,7 +1014,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1003,7 +1034,7 @@ ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1074,7 +1105,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]]) ; CHECK-NEXT: br label [[SCALAR_PH]] @@ -1093,7 +1124,7 @@ ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 -; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] ; CHECK-NEXT: ret i32 [[LCSSA]] Index: llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll +++ llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll @@ -1,9 +1,9 @@ ; RUN: opt < %s -loop-vectorize -debug-only=loop-vectorize -S -disable-output 2>&1 | FileCheck %s ; REQUIRES: asserts -; Make sure LV legal bails out when there is no exiting block +; loop-simplify pass transforms loop with multiple latches to a nested loop and +; the inner loop can be vectorized. ; CHECK-LABEL: "no_exiting_block" -; CHECK: LV: Not vectorizing: The loop must have a unique exit block. define i32 @no_exiting_block() { entry: br label %for.body Index: llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -6,15 +6,17 @@ define i16 @test(i16** %arg, i64 %N) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_OUTER:%.*]] +; CHECK: outer.outer: ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: ; CHECK-NEXT: [[L_1:%.*]] = load i16*, i16** [[ARG:%.*]], align 8 ; CHECK-NEXT: [[L_2:%.*]] = load i16*, i16** [[ARG]], align 8 ; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond() -; CHECK-NEXT: br i1 [[C_1]], label [[OUTER_BACKEDGE:%.*]], label [[INNER_PREHEADER:%.*]] -; CHECK: outer.backedge: -; CHECK-NEXT: br label [[OUTER]] +; CHECK-NEXT: br i1 [[C_1]], label [[OUTER]], label [[INNER_PREHEADER:%.*]] ; CHECK: inner.preheader: +; CHECK-NEXT: [[L_1_LCSSA:%.*]] = phi i16* [ [[L_1]], [[OUTER]] ] +; CHECK-NEXT: [[L_2_LCSSA:%.*]] = phi i16* [ [[L_2]], [[OUTER]] ] ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: ; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() @@ -23,21 +25,20 @@ ; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_3]], label [[LOOP3_PREHEADER:%.*]], label [[INNER_LATCH:%.*]] ; CHECK: loop3.preheader: -; CHECK-NEXT: [[L_1_LCSSA8:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ] -; CHECK-NEXT: [[L_1_LCSSA:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ] -; CHECK-NEXT: [[L_2_LCSSA:%.*]] = phi i16* [ [[L_2]], [[INNER_BB]] ] -; CHECK-NEXT: [[L_2_LCSSA3:%.*]] = bitcast i16* [[L_2_LCSSA]] to i8* +; CHECK-NEXT: [[L_1_LCSSA_LCSSA:%.*]] = phi i16* [ [[L_1_LCSSA]], [[INNER_BB]] ] +; CHECK-NEXT: [[L_2_LCSSA_LCSSA:%.*]] = phi i16* [ [[L_2_LCSSA]], [[INNER_BB]] ] +; CHECK-NEXT: [[L_2_LCSSA_LCSSA5:%.*]] = bitcast i16* [[L_2_LCSSA_LCSSA]] to i8* ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[L_2_LCSSA3]], i64 1 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[L_1_LCSSA]], i64 1 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[L_2_LCSSA_LCSSA5]], i64 1 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[L_1_LCSSA_LCSSA]], i64 1 ; CHECK-NEXT: [[SCEVGEP6:%.*]] = bitcast i16* [[SCEVGEP]] to i8* ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[N]], 2 -; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i16, i16* [[L_1_LCSSA8]], i64 [[TMP1]] -; CHECK-NEXT: [[SCEVGEP710:%.*]] = bitcast i16* [[SCEVGEP7]] to i8* -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA3]], [[SCEVGEP710]] +; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i16, i16* [[L_1_LCSSA_LCSSA]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP78:%.*]] = bitcast i16* [[SCEVGEP7]] to i8* +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA_LCSSA5]], [[SCEVGEP78]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP6]], [[UGLYGEP]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true @@ -50,18 +51,18 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[L_1]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[L_1_LCSSA_LCSSA]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, <2 x i16>* [[TMP6]], align 2, !alias.scope !0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[L_2]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[L_2_LCSSA_LCSSA]], i64 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: store i16 [[TMP8]], i16* [[TMP7]], align 2, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: store i16 [[TMP9]], i16* [[TMP7]], align 2, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -72,24 +73,24 @@ ; CHECK-NEXT: [[C_4:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_4]], label [[EXIT_LOOPEXIT1:%.*]], label [[INNER]] ; CHECK: outer.latch: -; CHECK-NEXT: br label [[OUTER_BACKEDGE]] +; CHECK-NEXT: br label [[OUTER_OUTER]] ; CHECK: loop3: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[C_5:%.*]] = icmp ult i64 [[IV]], [[N]] -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[L_1_LCSSA]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[L_1_LCSSA_LCSSA]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[LOOP_L_1:%.*]] = load i16, i16* [[GEP_1]], align 2 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[L_2_LCSSA]], i64 0 +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[L_2_LCSSA_LCSSA]], i64 0 ; CHECK-NEXT: store i16 [[LOOP_L_1]], i16* [[GEP_2]], align 2 -; CHECK-NEXT: br i1 [[C_5]], label [[LOOP3]], label [[EXIT_LOOPEXIT]], [[LOOP7:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[C_5]], label [[LOOP3]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit.loopexit1: -; CHECK-NEXT: [[L_1_LCSSA4:%.*]] = phi i16* [ [[L_1]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[L_1_LCSSA_LCSSA3:%.*]] = phi i16* [ [[L_1_LCSSA]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[L_15:%.*]] = phi i16* [ [[L_1_LCSSA4]], [[EXIT_LOOPEXIT1]] ], [ [[L_1_LCSSA]], [[EXIT_LOOPEXIT]] ] -; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[L_15]], align 2 +; CHECK-NEXT: [[L_1_LCSSA4:%.*]] = phi i16* [ [[L_1_LCSSA_LCSSA3]], [[EXIT_LOOPEXIT1]] ], [ [[L_1_LCSSA_LCSSA]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[L_1_LCSSA4]], align 2 ; CHECK-NEXT: ret i16 [[L_3]] ; entry: