Index: llvm/lib/Transforms/Utils/LoopSimplify.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -250,22 +250,111 @@ BasicBlock *Header = L->getHeader(); assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); + SmallVector OuterLoopPreds; PHINode *PN = findPHIToPartitionLoops(L, DT, AC); - if (!PN) return nullptr; // No known way to partition. - - // Pull out all predecessors that have varying values in the loop. This - // handles the case when a PHI node has multiple instances of itself as - // arguments. - SmallVector OuterLoopPreds; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - if (PN->getIncomingValue(i) != PN || - !L->contains(PN->getIncomingBlock(i))) { + if (PN) { + // Pull out all predecessors that have varying values in the loop. This + // handles the case when a PHI node has multiple instances of itself as + // arguments. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingValue(i) != PN || + !L->contains(PN->getIncomingBlock(i))) { + // We can't split indirect control flow edges. + if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator()) + return nullptr; + OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + } + } + } else { + // Find predecessors of loop headers for outer loop using dominator tree. If + // a latch dominates other latches, it can be the latch for inner loop. The + // rest of predecessors are for outer loop. + BasicBlock *InnerLoopPred = nullptr; + for (auto *Pred : predecessors(Header)) { // We can't split indirect control flow edges. - if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator()) + if (Pred->getTerminator()->isIndirectTerminator()) + return nullptr; + + // Check the loop header's predecessor which is outside loop. + if (!L->contains(Pred)) { + OuterLoopPreds.push_back(Pred); + continue; + } + + // Initialize InnerLoopPred + if (!InnerLoopPred) { + InnerLoopPred = Pred; + continue; + } + + // There could be same predecessors from switch instruction. Ignore the + // case. + if (Pred == InnerLoopPred) + continue; + + // Check dominance relation between latches and update InnerLoopPred. + if (DT->dominates(InnerLoopPred, Pred)) { + OuterLoopPreds.push_back(Pred); + } else if (DT->dominates(Pred, InnerLoopPred)) { + OuterLoopPreds.push_back(InnerLoopPred); + InnerLoopPred = Pred; + } else { + // If there is latch which has no dominance relation with other latches, + // do not conver the loop. return nullptr; - OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + } } } + + // The number of predecessors of outer loop header should be more than 2 at + // least. one is preheader and other is latch. + if (OuterLoopPreds.size() < 2) + return nullptr; + + // If loop header has phi nodes which has same incoming value from different + // incoming blocks inside loop, it causes cascaded phi nodes from outer loop + // header to inner loop header after transformation as below and it blocks + // optimizations later. + // + // loop.header: + // %iv = phi i32 [ 0, %entry ], [ %inc, %latch1 ], [ %inc, %latch2 ] + // + // After this transformation, + // + // loop.header.outer: + // %iv.ph = phi i32 [ %inc.lcssa, %latch2 ], [ 0, %entry ] + // + // loop.header.inner: + // %iv = phi i32 [ %inc, %latch1 ], [ %iv.ph, %loop.header.outer ] + auto ShouldIgnoreValue = [&L](PHINode &PN, unsigned idx) { + // Ignore incoming block which is located in outside loop. + if (!L->contains(PN.getIncomingBlock(idx))) + return true; + + // Ignore constant value. + if (isa(PN.getIncomingValue(idx))) + return true; + + return false; + }; + + for (PHINode &PN : Header->phis()) { + for (unsigned i = 0, e = PN.getNumIncomingValues()-1; i != e; ++i) { + if (ShouldIgnoreValue(PN, i)) + continue; + + // If phi node has same value from different incoming blocks, do not + // transform this loop. + for (unsigned j = i + 1, e = PN.getNumIncomingValues(); j != e; ++j) { + if (ShouldIgnoreValue(PN, j)) + continue; + + if (PN.getIncomingValue(i) == PN.getIncomingValue(j)) + return nullptr; + } + } + } + LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); // If ScalarEvolution is around and knows anything about values in Index: llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll +++ llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -19,9 +19,9 @@ do.body.i: ; CHECK-LABEL: do.body.i: -; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 -; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* -; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 +; CHECK: %uglygep1 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NEXT: %4 = bitcast i8* %uglygep1 to i32* +; CHECK-NOT: %uglygep1 = getelementptr i8, i8* %uglygep, i64 1032 %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] Index: llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll =================================================================== --- llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -113,8 +113,7 @@ ; verify the preheader is simplified by simplifycfg. ; CHECK: [[PH]]: -; CHECK: mov w22, #2 -; CHECK-NOT: mov w22, #4 +; CHECK: mov w22, #4 ; CHECK-NOT: cmn w22, #4 ; CHECK: [[LOOP2:LBB[0-9]+_[0-9]+]]: ; %for.cond ; CHECK-NOT: b.ne [[LOOP2]] Index: llvm/test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -221,6 +221,13 @@ ; GCN: s_andn2_b64 ; GCN-NEXT: s_cbranch_execz +; bb1 has multiple latches and it is converted to a nested loop with +; LoopSimplify. Below block is outcome from it. +; GCN: [[BB1_MULTI_LATCHES_OUTER_LOOP:BB[0-9]+_[0-9]+]]: +; GCN: s_and_b64 +; GCN: s_or_b64 +; GCN: s_mov_b64 + ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: ; GCN: s_andn2_b64 exec, exec, ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] Index: llvm/test/CodeGen/SystemZ/pr32372.ll =================================================================== --- llvm/test/CodeGen/SystemZ/pr32372.ll +++ llvm/test/CodeGen/SystemZ/pr32372.ll @@ -4,9 +4,11 @@ define void @pr32372(i8*) { ; CHECK-LABEL: pr32372: ; CHECK: # %bb.0: # %BB +; CHECK-NEXT: llc %r0, 0(%r2) ; CHECK-NEXT: mvhhi 0(%r1), -3825 ; CHECK-NEXT: .LBB0_1: # %CF251 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cijhe %r0, 0, .LBB0_1 ; CHECK-NEXT: j .LBB0_1 BB: %L = load i8, i8* %0 Index: llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll =================================================================== --- llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll +++ llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll @@ -22,42 +22,43 @@ ; CHECK-NEXT: jmp LBB0_2 ; CHECK-NEXT: LBB0_3: ## %bb13086.preheader ; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: jmp LBB0_4 ; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_6: ## %bb13101 +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: LBB0_7: ## %bb13107 +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: shll $16, %esi +; CHECK-NEXT: subl %edx, %esi +; CHECK-NEXT: incl %esi +; CHECK-NEXT: shrl $16, %esi +; CHECK-NEXT: subl %esi, %edx +; CHECK-NEXT: testw %dx, %dx +; CHECK-NEXT: jne LBB0_8 ; CHECK-NEXT: LBB0_4: ## %bb13088 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB0_5 -; CHECK-NEXT: ## %bb.6: ## %bb13101 -; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: je LBB0_6 +; CHECK-NEXT: ## %bb.5: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: movl $65535, %edx ## imm = 0xFFFF ; CHECK-NEXT: jmp LBB0_7 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_5: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: movl $65535, %ecx ## imm = 0xFFFF -; CHECK-NEXT: LBB0_7: ## %bb13107 +; CHECK-NEXT: LBB0_8: ## %bb13236 ; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: shll $16, %edx -; CHECK-NEXT: subl %ecx, %edx -; CHECK-NEXT: incl %edx -; CHECK-NEXT: shrl $16, %edx -; CHECK-NEXT: subl %edx, %ecx -; CHECK-NEXT: testw %cx, %cx +; CHECK-NEXT: cmpw $-1, %dx +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je LBB0_4 -; CHECK-NEXT: ## %bb.8: ## %bb13236 -; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: cmpw $-1, %cx -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB0_4 ; CHECK-NEXT: ## %bb.9: ## %bb13572 ; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: movzwl %cx, %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: shll $16, %edx -; CHECK-NEXT: subl %ecx, %edx -; CHECK-NEXT: incl %edx -; CHECK-NEXT: shrl $16, %edx -; CHECK-NEXT: movw %dx, 0 +; CHECK-NEXT: movzwl %dx, %edx +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: shll $16, %esi +; CHECK-NEXT: subl %edx, %esi +; CHECK-NEXT: incl %esi +; CHECK-NEXT: shrl $16, %esi +; CHECK-NEXT: movw %si, 0 ; CHECK-NEXT: jmp LBB0_4 ; CHECK-NEXT: LBB0_10: ## %return ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll =================================================================== --- llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts -; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "5 machinelicm" +; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "10 machinelicm" ; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s ; rdar://6627786 ; rdar://7792037 @@ -18,7 +18,7 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movq %rsi, %r14 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: orq $2097152, %r14 ## imm = 0x200000 +; CHECK-NEXT: orl $2097152, %r14d ## imm = 0x200000 ; CHECK-NEXT: andl $15728640, %r14d ## imm = 0xF00000 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_1: ## %bb4 Index: llvm/test/CodeGen/X86/bb_rotate.ll =================================================================== --- llvm/test/CodeGen/X86/bb_rotate.ll +++ llvm/test/CodeGen/X86/bb_rotate.ll @@ -1,16 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i686-linux < %s | FileCheck %s define i1 @no_viable_top_fallthrough() { -; CHECK-LABEL: no_viable_top_fallthrough -; CHECK: %.entry -; CHECK: %.bb1 -; CHECK: %.bb2 -; CHECK: %.middle -; CHECK: %.backedge -; CHECK: %.bb3 -; CHECK: %.header -; CHECK: %.exit -; CHECK: %.stop +; CHECK-LABEL: no_viable_top_fallthrough: +; CHECK: # %bb.0: # %.entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: # %bb.1: # %.bb1 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %.stop +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_6: # %.middle +; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_7 +; CHECK-NEXT: .LBB0_4: # %.header +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.5: # %.bb2 +; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: jne .LBB0_6 +; CHECK-NEXT: # %bb.8: # %.bb3 +; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .LBB0_7: # %.backedge +; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_2: # %.exit +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: jmp .LBB0_3 .entry: %val1 = call i1 @foo() br i1 %val1, label %.bb1, label %.header, !prof !10 Index: llvm/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll +++ llvm/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll @@ -8,17 +8,19 @@ define void @test() nounwind { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK-NEXT: br label [[WHILE_COND_OUTER_OUTER:%.*]] ; CHECK: while.cond.loopexit: -; CHECK-NEXT: br label [[WHILE_COND_BACKEDGE:%.*]] +; CHECK-NEXT: br label [[WHILE_COND_OUTER_OUTER]] +; CHECK: while.cond.outer.outer: +; CHECK-NEXT: br label [[WHILE_COND_OUTER:%.*]] +; CHECK: while.cond.outer: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: ; CHECK-NEXT: br i1 true, label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]] ; CHECK: while.body: -; CHECK-NEXT: br i1 undef, label [[IF_THEN165:%.*]], label [[WHILE_COND_BACKEDGE]] -; CHECK: while.cond.backedge: -; CHECK-NEXT: br label [[WHILE_COND]] +; CHECK-NEXT: br i1 true, label [[IF_THEN165:%.*]], label [[WHILE_COND]] ; CHECK: if.then165: -; CHECK-NEXT: br i1 undef, label [[WHILE_COND_BACKEDGE]], label [[FOR_BODY_LR_PH_I81:%.*]] +; CHECK-NEXT: br i1 false, label [[WHILE_COND_OUTER]], label [[FOR_BODY_LR_PH_I81:%.*]] ; CHECK: for.body.lr.ph.i81: ; CHECK-NEXT: br label [[FOR_BODY_I86:%.*]] ; CHECK: for.body.i86: Index: llvm/test/Transforms/LoopSimplify/multiple-latches.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopSimplify/multiple-latches.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes=loop-simplify | FileCheck %s + +; The loop with multiple latches can be transformed to a nested loop using +; dominator tree rather than checking phi node's condition. +define void @test_loop_simplify_multiple_latches_transfor_success(i1 %a, i32 %b, i8* noalias %src, i8* noalias %dst) { +; CHECK-LABEL: @test_loop_simplify_multiple_latches_transfor_success( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND_OUTER:%.*]] +; CHECK: while.cond.outer: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: br i1 [[A:%.*]], label [[WHILE_END6895:%.*]], label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: switch i32 [[B:%.*]], label [[SW_DEFAULT6833:%.*]] [ +; CHECK-NEXT: i32 82, label [[NO_RET_LOOPEXIT:%.*]] +; CHECK-NEXT: i32 30, label [[SW_BB4001:%.*]] +; CHECK-NEXT: i32 40, label [[WHILE_COND_BACKEDGE:%.*]] +; CHECK-NEXT: i32 41, label [[WHILE_COND_BACKEDGE]] +; CHECK-NEXT: ] +; CHECK: while.cond.backedge: +; CHECK-NEXT: br label [[WHILE_COND]] +; CHECK: sw.bb4001: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 31 +; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 +; CHECK-NEXT: store i8 [[RES]], i8* [[DST:%.*]], align 1 +; CHECK-NEXT: br label [[WHILE_COND_OUTER]] +; CHECK: sw.default6833: +; CHECK-NEXT: br label [[NO_RET:%.*]] +; CHECK: while.end6895: +; CHECK-NEXT: br label [[NO_RET]] +; CHECK: no_ret.loopexit: +; CHECK-NEXT: br label [[NO_RET]] +; CHECK: no_ret: +; CHECK-NEXT: ret void +; +entry: + br label %while.cond + +while.cond: + br i1 %a, label %while.end6895, label %while.body + +while.body: + switch i32 %b, label %sw.default6833 [ + i32 82, label %no_ret + i32 30, label %sw.bb4001 + i32 40, label %while.cond + i32 41, label %while.cond + ] + +sw.bb4001: + %addr = getelementptr i8, i8* %src, i32 31 + %res = load i8, i8* %addr + store i8 %res, i8* %dst + br label %while.cond + +sw.default6833: + br label %no_ret + +while.end6895: + br label %no_ret + +no_ret: + ret void +} + +; if the loop header has phi nodes which has same incoming value from incoming +; blocks inside loop, the transformation should be failed. +define i32 @test_loop_simplify_multiple_latches_transform_fail(i32* noalias nocapture readonly %a) { +; CHECK-LABEL: @test_loop_simplify_multiple_latches_transform_fail( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY_BACKEDGE]] ] +; CHECK-NEXT: [[R_CHK:%.*]] = icmp ult i32 [[IV]], 2000 +; CHECK-NEXT: br i1 [[R_CHK]], label [[LATCH1:%.*]], label [[FAIL:%.*]] +; CHECK: latch1: +; CHECK-NEXT: [[I1:%.*]] = load i32, i32* [[A:%.*]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[I1]], [[ACC]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[ADD]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[LATCH2:%.*]], label [[FOR_BODY_BACKEDGE]] +; CHECK: for.body.backedge: +; CHECK-NEXT: br label [[FOR_BODY]] +; CHECK: latch2: +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_BACKEDGE]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret i32 [[ADD]] +; CHECK: fail: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: ret i32 -1 +; +entry: + br label %for.body + +for.body: + %iv = phi i32 [ 0, %entry ], [ %inc, %latch1 ], [ %inc, %latch2 ] + %acc = phi i32 [ 0, %entry ], [ %add, %latch1 ], [ %add, %latch2 ] + %r.chk = icmp ult i32 %iv, 2000 + br i1 %r.chk, label %latch1, label %fail + +latch1: + %i1 = load i32, i32* %a, align 4 + %add = add nsw i32 %i1, %acc + %inc = add nuw nsw i32 %iv, 1 + %cmp = icmp eq i32 %add, 0 + br i1 %cmp, label %latch2, label %for.body + +latch2: + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret i32 %add + +fail: + call void @f() + ret i32 -1 +} + +declare void @f() nounwind Index: llvm/test/Transforms/LoopSimplify/preserve-scev.ll =================================================================== --- llvm/test/Transforms/LoopSimplify/preserve-scev.ll +++ llvm/test/Transforms/LoopSimplify/preserve-scev.ll @@ -23,17 +23,14 @@ ; Now simplify the loop, which should cause SCEV to re-compute more precise ; info here in addition to having preheader PHIs. Second SCEV print: ; CHECK-LABEL: Classifying expressions for: @test -; CHECK: phi i32 [ %{{.*}}, %if.then5 ], [ 0, %entry ] +; CHECK: phi i32 [ 0, %entry ], [ %{{.*}}, %for.cond.backedge ] ; CHECK-LABEL: Determining loop execution counts for: @test ; CHECK: Loop %for.body18: Unpredictable backedge-taken count. ; CHECK: Loop %for.body18: max backedge-taken count is 2147483646 ; CHECK: Loop %for.body18: Unpredictable predicated backedge-taken count. ; CHECK: Loop %for.cond: Unpredictable backedge-taken count. -; CHECK: Loop %for.cond: max backedge-taken count is -2147483647 +; CHECK: Loop %for.cond: max backedge-taken count is 1 ; CHECK: Loop %for.cond: Unpredictable predicated backedge-taken count. -; CHECK: Loop %for.cond.outer: Unpredictable backedge-taken count. -; CHECK: Loop %for.cond.outer: Unpredictable max backedge-taken count. -; CHECK: Loop %for.cond.outer: Unpredictable predicated backedge-taken count. define i32 @test() nounwind { entry: br label %for.cond Index: llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll =================================================================== --- llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll +++ llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll @@ -10,18 +10,19 @@ define i32 @dead_backedge_test_branch_loop(i32 %end) { ; CHECK-LABEL: @dead_backedge_test_branch_loop( ; CHECK-NEXT: preheader: +; CHECK-NEXT: br label [[HEADER_OUTER:%.*]] +; CHECK: header.outer: +; CHECK-NEXT: [[I_PH:%.*]] = phi i32 [ [[I_2:%.*]], [[DEAD_BACKEDGE:%.*]] ], [ 0, [[PREHEADER:%.*]] ] ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_BE:%.*]], [[HEADER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[I_1:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_1:%.*]], [[HEADER]] ], [ [[I_PH]], [[HEADER_OUTER]] ] +; CHECK-NEXT: [[I_1]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_1]], 100 -; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER_BACKEDGE]], label [[DEAD_BACKEDGE:%.*]] -; CHECK: header.backedge: -; CHECK-NEXT: [[I_BE]] = phi i32 [ [[I_1]], [[HEADER]] ], [ [[I_2:%.*]], [[DEAD_BACKEDGE]] ] -; CHECK-NEXT: br label [[HEADER]] +; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER]], label [[DEAD_BACKEDGE]] ; CHECK: dead_backedge: -; CHECK-NEXT: [[I_2]] = add i32 [[I_1]], 10 -; CHECK-NEXT: br i1 false, label [[HEADER_BACKEDGE]], label [[EXIT:%.*]] +; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i32 [ [[I_1]], [[HEADER]] ] +; CHECK-NEXT: [[I_2]] = add i32 [[I_1_LCSSA]], 10 +; CHECK-NEXT: br i1 false, label [[HEADER_OUTER]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ] ; CHECK-NEXT: ret i32 [[I_2_LCSSA]] @@ -47,19 +48,20 @@ define i32 @dead_backedge_test_switch_loop(i32 %end) { ; CHECK-LABEL: @dead_backedge_test_switch_loop( ; CHECK-NEXT: preheader: +; CHECK-NEXT: br label [[HEADER_OUTER:%.*]] +; CHECK: header.outer: +; CHECK-NEXT: [[I_PH:%.*]] = phi i32 [ [[I_2:%.*]], [[DEAD_BACKEDGE:%.*]] ], [ 0, [[PREHEADER:%.*]] ] ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ [[I_BE:%.*]], [[HEADER_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[I_1:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_1:%.*]], [[HEADER]] ], [ [[I_PH]], [[HEADER_OUTER]] ] +; CHECK-NEXT: [[I_1]] = add i32 [[I]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_1]], 100 -; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER_BACKEDGE]], label [[DEAD_BACKEDGE:%.*]] -; CHECK: header.backedge: -; CHECK-NEXT: [[I_BE]] = phi i32 [ [[I_1]], [[HEADER]] ], [ [[I_2:%.*]], [[DEAD_BACKEDGE]] ] -; CHECK-NEXT: br label [[HEADER]] +; CHECK-NEXT: br i1 [[CMP1]], label [[HEADER]], label [[DEAD_BACKEDGE]] ; CHECK: dead_backedge: -; CHECK-NEXT: [[I_2]] = add i32 [[I_1]], 10 +; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i32 [ [[I_1]], [[HEADER]] ] +; CHECK-NEXT: [[I_2]] = add i32 [[I_1_LCSSA]], 10 ; CHECK-NEXT: switch i32 1, label [[EXIT:%.*]] [ -; CHECK-NEXT: i32 0, label [[HEADER_BACKEDGE]] +; CHECK-NEXT: i32 0, label [[HEADER_OUTER]] ; CHECK-NEXT: ] ; CHECK: exit: ; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ] @@ -2175,13 +2177,13 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: +; CHECK-NEXT: br label [[INTERMEDIATE_LOOP_OUTER:%.*]] +; CHECK: intermediate_loop.outer: ; CHECK-NEXT: br label [[INTERMEDIATE_LOOP:%.*]] ; CHECK: intermediate_loop: -; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE:%.*]], label [[INTERMEDIATE_BLOCK:%.*]] -; CHECK: intermediate_loop.backedge: -; CHECK-NEXT: br label [[INTERMEDIATE_LOOP]] +; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_BLOCK:%.*]] ; CHECK: intermediate_block: -; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_OUTER]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: ; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_2_BACKEDGE]] ; CHECK: loop_3_backedge: @@ -2258,13 +2260,13 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: +; CHECK-NEXT: br label [[INTERMEDIATE_LOOP_OUTER:%.*]] +; CHECK: intermediate_loop.outer: ; CHECK-NEXT: br label [[INTERMEDIATE_LOOP:%.*]] ; CHECK: intermediate_loop: -; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE:%.*]], label [[INTERMEDIATE_BLOCK:%.*]] -; CHECK: intermediate_loop.backedge: -; CHECK-NEXT: br label [[INTERMEDIATE_LOOP]] +; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_BLOCK:%.*]] ; CHECK: intermediate_block: -; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_OUTER]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: ; CHECK-NEXT: switch i32 1, label [[LOOP_2_BACKEDGE]] [ ; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] @@ -2344,13 +2346,13 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: +; CHECK-NEXT: br label [[INTERMEDIATE_LOOP_OUTER:%.*]] +; CHECK: intermediate_loop.outer: ; CHECK-NEXT: br label [[INTERMEDIATE_LOOP:%.*]] ; CHECK: intermediate_loop: -; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE:%.*]], label [[INTERMEDIATE_BLOCK:%.*]] -; CHECK: intermediate_loop.backedge: -; CHECK-NEXT: br label [[INTERMEDIATE_LOOP]] +; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_BLOCK:%.*]] ; CHECK: intermediate_block: -; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_OUTER]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: ; CHECK-NEXT: br i1 false, label [[LOOP_3_BACKEDGE]], label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] ; CHECK: loop_3_backedge: @@ -2431,13 +2433,13 @@ ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[LOOP_2]] ], [ [[K_NEXT:%.*]], [[LOOP_3_BACKEDGE:%.*]] ] ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[LOOP_3_BACKEDGE]], label [[INTERMEDIATE:%.*]] ; CHECK: intermediate: +; CHECK-NEXT: br label [[INTERMEDIATE_LOOP_OUTER:%.*]] +; CHECK: intermediate_loop.outer: ; CHECK-NEXT: br label [[INTERMEDIATE_LOOP:%.*]] ; CHECK: intermediate_loop: -; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE:%.*]], label [[INTERMEDIATE_BLOCK:%.*]] -; CHECK: intermediate_loop.backedge: -; CHECK-NEXT: br label [[INTERMEDIATE_LOOP]] +; CHECK-NEXT: br i1 [[COND3:%.*]], label [[INTERMEDIATE_LOOP]], label [[INTERMEDIATE_BLOCK:%.*]] ; CHECK: intermediate_block: -; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_BACKEDGE]], label [[INTERMEDIATE_EXIT:%.*]] +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[INTERMEDIATE_LOOP_OUTER]], label [[INTERMEDIATE_EXIT:%.*]] ; CHECK: intermediate_exit: ; CHECK-NEXT: switch i32 1, label [[LOOP_1_BACKEDGE_LOOPEXIT:%.*]] [ ; CHECK-NEXT: i32 0, label [[LOOP_3_BACKEDGE]] Index: llvm/test/Transforms/LoopSimplifyCFG/update_parents.ll =================================================================== --- llvm/test/Transforms/LoopSimplifyCFG/update_parents.ll +++ llvm/test/Transforms/LoopSimplifyCFG/update_parents.ll @@ -16,14 +16,25 @@ ; CHECK: bb2.loopexit: ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: switch i32 0, label [[BB2_SPLIT:%.*]] [ -; CHECK-NEXT: i32 1, label [[BB1_LOOPEXIT:%.*]] -; CHECK-NEXT: i32 2, label [[BB2_LOOPEXIT:%.*]] +; CHECK-NEXT: br label [[BB3_OUTER:%.*]] +; CHECK: bb3.loopexit: +; CHECK-NEXT: br label [[BB3_OUTER]] +; CHECK: bb3.outer: +; CHECK-NEXT: switch i32 0, label [[BB3_OUTER_SPLIT:%.*]] [ +; CHECK-NEXT: i32 1, label [[BB4_PREHEADER:%.*]] ; CHECK-NEXT: ] -; CHECK: bb2.split: +; CHECK: bb3.outer.split: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: br label [[BB3]] +; CHECK: bb4.preheader: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: br i1 true, label [[BB1_LOOPEXIT:%.*]], label [[BB6:%.*]] +; CHECK: bb6: +; CHECK-NEXT: br i1 true, label [[BB2_LOOPEXIT:%.*]], label [[BB8:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br i1 true, label [[BB4]], label [[BB3_LOOPEXIT:%.*]] ; br label %bb1 @@ -57,14 +68,61 @@ ; CHECK: bb2.loopexit: ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: switch i32 0, label [[BB2_SPLIT:%.*]] [ -; CHECK-NEXT: i32 1, label [[BB1_LOOPEXIT:%.*]] -; CHECK-NEXT: i32 2, label [[BB2_LOOPEXIT:%.*]] +; CHECK-NEXT: br label [[BB3_OUTER:%.*]] +; CHECK: bb3.loopexit: +; CHECK-NEXT: br label [[BB3_OUTER]] +; CHECK: bb3.outer: +; CHECK-NEXT: switch i32 0, label [[BB3_OUTER_SPLIT:%.*]] [ +; CHECK-NEXT: i32 1, label [[BB4_PREHEADER:%.*]] ; CHECK-NEXT: ] -; CHECK: bb2.split: +; CHECK: bb3.outer.split: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: br label [[BB3]] +; CHECK: bb4.preheader: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: br i1 true, label [[BB1_LOOPEXIT:%.*]], label [[SUBLOOP1_PREHEADER:%.*]] +; CHECK: subloop1.preheader: +; CHECK-NEXT: br label [[SUBLOOP1:%.*]] +; CHECK: subloop1: +; CHECK-NEXT: br i1 [[C:%.*]], label [[SUBLOOP2_PREHEADER:%.*]], label [[SUBLOOP11_PREHEADER:%.*]] +; CHECK: subloop2.preheader: +; CHECK-NEXT: br label [[SUBLOOP2:%.*]] +; CHECK: subloop11.preheader: +; CHECK-NEXT: br label [[SUBLOOP11:%.*]] +; CHECK: subloop11: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP11]], label [[SUBLOOP12_PREHEADER:%.*]] +; CHECK: subloop12.preheader: +; CHECK-NEXT: br label [[SUBLOOP12:%.*]] +; CHECK: subloop12: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP12]], label [[SUBLOOP13_PREHEADER:%.*]] +; CHECK: subloop13.preheader: +; CHECK-NEXT: br label [[SUBLOOP13:%.*]] +; CHECK: subloop13: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP13]], label [[SUBLOOP1_LATCH:%.*]] +; CHECK: subloop1_latch: +; CHECK-NEXT: br label [[SUBLOOP1]] +; CHECK: subloop2: +; CHECK-NEXT: br i1 [[C]], label [[BB6:%.*]], label [[SUBLOOP21_PREHEADER:%.*]] +; CHECK: subloop21.preheader: +; CHECK-NEXT: br label [[SUBLOOP21:%.*]] +; CHECK: subloop21: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP21]], label [[SUBLOOP22_PREHEADER:%.*]] +; CHECK: subloop22.preheader: +; CHECK-NEXT: br label [[SUBLOOP22:%.*]] +; CHECK: subloop22: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP22]], label [[SUBLOOP23_PREHEADER:%.*]] +; CHECK: subloop23.preheader: +; CHECK-NEXT: br label [[SUBLOOP23:%.*]] +; CHECK: subloop23: +; CHECK-NEXT: br i1 [[C]], label [[SUBLOOP23]], label [[SUBLOOP2_LATCH:%.*]] +; CHECK: subloop2_latch: +; CHECK-NEXT: br label [[SUBLOOP2]] +; CHECK: bb6: +; CHECK-NEXT: br i1 true, label [[BB2_LOOPEXIT:%.*]], label [[BB8:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br i1 true, label [[BB4]], label [[BB3_LOOPEXIT:%.*]] ; br label %bb1 Index: llvm/test/Transforms/LoopVectorize/loop-form.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/loop-form.ll +++ llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -973,8 +973,8 @@ } -; two back branches - loop simplify with convert this to the same form -; as previous before vectorizer sees it, but show that. +; loop-simplify pass transforms loop with multiple latches to a nested loop and +; the nested loop can be vectorized. define i32 @multiple_latch2(i16* %p) { ; CHECK-LABEL: @multiple_latch2( ; CHECK-NEXT: entry: Index: llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -6,15 +6,17 @@ define i16 @test(i16** %arg, i64 %N) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_OUTER:%.*]] +; CHECK: outer.outer: ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: ; CHECK-NEXT: [[L_1:%.*]] = load i16*, i16** [[ARG:%.*]], align 8 ; CHECK-NEXT: [[L_2:%.*]] = load i16*, i16** [[ARG]], align 8 ; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond() -; CHECK-NEXT: br i1 [[C_1]], label [[OUTER_BACKEDGE:%.*]], label [[INNER_PREHEADER:%.*]] -; CHECK: outer.backedge: -; CHECK-NEXT: br label [[OUTER]] +; CHECK-NEXT: br i1 [[C_1]], label [[OUTER]], label [[INNER_PREHEADER:%.*]] ; CHECK: inner.preheader: +; CHECK-NEXT: [[L_1_LCSSA:%.*]] = phi i16* [ [[L_1]], [[OUTER]] ] +; CHECK-NEXT: [[L_2_LCSSA:%.*]] = phi i16* [ [[L_2]], [[OUTER]] ] ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: ; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() @@ -23,23 +25,22 @@ ; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_3_PREHEADER:%.*]], label [[INNER_LATCH:%.*]] ; CHECK: loop.3.preheader: -; CHECK-NEXT: [[L_1_LCSSA10:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ] -; CHECK-NEXT: [[L_1_LCSSA:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ] -; CHECK-NEXT: [[L_2_LCSSA:%.*]] = phi i16* [ [[L_2]], [[INNER_BB]] ] -; CHECK-NEXT: [[L_2_LCSSA3:%.*]] = bitcast i16* [[L_2_LCSSA]] to i8* +; CHECK-NEXT: [[L_1_LCSSA_LCSSA:%.*]] = phi i16* [ [[L_1_LCSSA]], [[INNER_BB]] ] +; CHECK-NEXT: [[L_2_LCSSA_LCSSA:%.*]] = phi i16* [ [[L_2_LCSSA]], [[INNER_BB]] ] +; CHECK-NEXT: [[L_2_LCSSA_LCSSA5:%.*]] = bitcast i16* [[L_2_LCSSA_LCSSA]] to i8* ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[L_2_LCSSA]], i64 1 -; CHECK-NEXT: [[SCEVGEP4:%.*]] = bitcast i16* [[SCEVGEP]] to i8* -; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[L_1_LCSSA]], i64 1 -; CHECK-NEXT: [[SCEVGEP58:%.*]] = bitcast i16* [[SCEVGEP5]] to i8* +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[L_2_LCSSA_LCSSA]], i64 1 +; CHECK-NEXT: [[SCEVGEP6:%.*]] = bitcast i16* [[SCEVGEP]] to i8* +; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i16, i16* [[L_1_LCSSA_LCSSA]], i64 1 +; CHECK-NEXT: [[SCEVGEP78:%.*]] = bitcast i16* [[SCEVGEP7]] to i8* ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[N]], 2 -; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i16, i16* [[L_1_LCSSA10]], i64 [[TMP1]] -; CHECK-NEXT: [[SCEVGEP912:%.*]] = bitcast i16* [[SCEVGEP9]] to i8* -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA3]], [[SCEVGEP912]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP58]], [[SCEVGEP4]] +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i16, i16* [[L_1_LCSSA_LCSSA]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP910:%.*]] = bitcast i16* [[SCEVGEP9]] to i8* +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA_LCSSA5]], [[SCEVGEP910]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP78]], [[SCEVGEP6]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true ; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] @@ -51,11 +52,11 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[L_1]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[L_1_LCSSA_LCSSA]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, <2 x i16>* [[TMP6]], align 2, !alias.scope !0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[L_2]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[L_2_LCSSA_LCSSA]], i64 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: store i16 [[TMP8]], i16* [[TMP7]], align 2, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 1 @@ -73,24 +74,24 @@ ; CHECK-NEXT: [[C_4:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_4]], label [[EXIT_LOOPEXIT1:%.*]], label [[INNER]] ; CHECK: outer.latch: -; CHECK-NEXT: br label [[OUTER_BACKEDGE]] +; CHECK-NEXT: br label [[OUTER_OUTER]] ; CHECK: loop.3: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[C_5:%.*]] = icmp ult i64 [[IV]], [[N]] -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[L_1_LCSSA]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[L_1_LCSSA_LCSSA]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[LOOP_L_1:%.*]] = load i16, i16* [[GEP_1]], align 2 -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[L_2_LCSSA]], i64 0 +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[L_2_LCSSA_LCSSA]], i64 0 ; CHECK-NEXT: store i16 [[LOOP_L_1]], i16* [[GEP_2]], align 2 ; CHECK-NEXT: br i1 [[C_5]], label [[LOOP_3]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit.loopexit1: -; CHECK-NEXT: [[L_1_LCSSA6:%.*]] = phi i16* [ [[L_1]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[L_1_LCSSA_LCSSA3:%.*]] = phi i16* [ [[L_1_LCSSA]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[L_17:%.*]] = phi i16* [ [[L_1_LCSSA6]], [[EXIT_LOOPEXIT1]] ], [ [[L_1_LCSSA]], [[EXIT_LOOPEXIT]] ] -; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[L_17]], align 2 +; CHECK-NEXT: [[L_1_LCSSA4:%.*]] = phi i16* [ [[L_1_LCSSA_LCSSA3]], [[EXIT_LOOPEXIT1]] ], [ [[L_1_LCSSA_LCSSA]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[L_1_LCSSA4]], align 2 ; CHECK-NEXT: ret i16 [[L_3]] ; entry: