Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -231,13 +231,15 @@ // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. - // We should not eliminate the loop header either, because eliminating - // a loop header might later prevent LoopSimplify from transforming nested - // loops into simplified form. + // We should not eliminate the loop header or latch either, because + // eliminating a loop header or latch might later prevent LoopSimplify + // from transforming nested loops into simplified form. We will rely on + // later passes in backend to clean up empty blocks. if (BI && BI->isUnconditional() && BB != &BB->getParent()->getEntryBlock() && // If the terminator is the only non-phi instruction, try to nuke it. - BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) { + BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) && + !LoopHeaders.count(BI->getSuccessor(0))) { // FIXME: It is always conservatively correct to drop the info // for a block even if it doesn't get erased. This isn't totally // awesome, but it allows us to use AssertingVH to prevent nasty Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -5656,20 +5656,22 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); + BasicBlock *Succ = BI->getSuccessor(0); if (SinkCommon && SinkThenElseCodeToEnd(BI)) return true; // If the Terminator is the only non-phi instruction, simplify the block. - // if LoopHeader is provided, check if the block is a loop header - // (This is for early invocations before loop simplify and vectorization - // to keep canonical loop forms for nested loops. - // These blocks can be eliminated when the pass is invoked later - // in the back-end.) + // if LoopHeader is provided, check if the block or its successor is a loop + // header (This is for early invocations before loop simplify and + // vectorization to keep canonical loop forms for nested loops. These blocks + // can be eliminated when the pass is invoked later in the back-end.) + bool NeedCanonicalLoop = + !LateSimplifyCFG && + (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - (!LoopHeaders || !LoopHeaders->count(BB)) && - TryToSimplifyUncondBranchFromEmptyBlock(BB)) + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; // If the only instruction in the block is a seteq/setne comparison Index: test/CodeGen/AArch64/aarch64-loop-gep-opt.ll =================================================================== --- test/CodeGen/AArch64/aarch64-loop-gep-opt.ll +++ test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -19,9 +19,9 @@ do.body.i: ; CHECK-LABEL: do.body.i: -; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 -; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* -; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 +; CHECK: %uglygep1 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NEXT: %4 = bitcast i8* %uglygep1 to i32* +; CHECK-NOT: %uglygep1 = getelementptr i8, i8* %uglygep, i64 1032 %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] Index: test/Transforms/JumpThreading/pr33605.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/pr33605.ll @@ -0,0 +1,64 @@ +; RUN: opt < %s -jump-threading -S | FileCheck %s + +; Skip simplifying unconditional branches from empty blocks in simplifyCFG, +; when it can destroy canonical loop structure. + +; void foo(); +; bool test(int a, int b, int *c) { +; bool changed = false; +; for (unsigned int i = 2; i--;) { +; int r = a | b; +; if ( r != c[i]) { +; c[i] = r; +; foo(); +; changed = true; +; } +; } +; return changed; +; } + +; CHECK-LABEL: @test( +; CHECK: for.cond: +; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] +; CHECK: for.body: +; CHECK: br i1 %cmp, label %if.end, label %if.then +; CHECK-NOT: br i1 %cmp, label %for.cond, label %if.then +; CHECK: if.then: +; CHECK: br label %if.end +; CHECK-NOT: br label %for.cond +; CHECK: if.end: +; CHECK br label %for.cond +define i1 @test(i32 %a, i32 %b, i32* %c) { +entry: + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] + %changed.0.off0 = phi i1 [ false, %entry ], [ %changed.1.off0, %if.end ] + %dec = add nsw i32 %i.0, -1 + %tobool = icmp eq i32 %i.0, 0 + br i1 %tobool, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.cond + %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ] + ret i1 %changed.0.off0.lcssa + +for.body: ; preds = %for.cond + %or = or i32 %a, %b + %idxprom = sext i32 %dec to i64 + %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %or, %0 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %for.body + store i32 %or, i32* %arrayidx, align 4 + call void @foo() + br label %if.end + +if.end: ; preds = %for.body, %if.then + %changed.1.off0 = phi i1 [ true, %if.then ], [ %changed.0.off0, %for.body ] + br label %for.cond +} + +declare void @foo() Index: test/Transforms/JumpThreading/static-profile.ll =================================================================== --- test/Transforms/JumpThreading/static-profile.ll +++ test/Transforms/JumpThreading/static-profile.ll @@ -86,7 +86,7 @@ ; Verify the new backedge: ; CHECK: check_2.thread: ; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br label %check_1 +; CHECK-NEXT: br label %check_3.thread check_2: %cond2 = icmp eq i32 %v, 2 @@ -100,7 +100,7 @@ ; Verify the new backedge: ; CHECK: eq_2: ; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br label %check_1 +; CHECK-NEXT: br label %check_3.thread check_3: %condE = icmp eq i32 %v, 3 Index: test/Transforms/LoopUnroll/peel-loop.ll =================================================================== --- test/Transforms/LoopUnroll/peel-loop.ll +++ test/Transforms/LoopUnroll/peel-loop.ll @@ -18,9 +18,11 @@ ; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2 ; CHECK: store i32 2, i32* %[[INC2]], align 4 ; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3 -; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP:.*]] +; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:.*]] +; CHECK: [[LOOP_PH]]: +; CHECK: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK: %[[IV:.*]] = phi i32 [ {{.*}}, %[[LOOP]] ], [ 3, %[[NEXT2]] ] +; CHECK: %[[IV:.*]] = phi i32 [ 3, %[[LOOP_PH]] ], [ {{.*}}, %[[LOOP]] ] define void @basic(i32* %p, i32 %k) #0 { entry: @@ -65,9 +67,11 @@ ; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2 ; CHECK: store i32 2, i32* %[[INC2]], align 4 ; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3 -; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP:.*]] +; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:.*]] +; CHECK: [[LOOP_PH]]: +; CHECK: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK: %[[IV:.*]] = phi i32 [ %[[IV:.*]], %[[LOOP]] ], [ 3, %[[NEXT2]] ] +; CHECK: %[[IV:.*]] = phi i32 [ 3, %[[LOOP_PH]] ], [ %[[IV:.*]], %[[LOOP]] ] ; CHECK: %ret = phi i32 [ 0, %entry ], [ 1, %[[NEXT0]] ], [ 2, %[[NEXT1]] ], [ 3, %[[NEXT2]] ], [ %[[IV]], %[[LOOP]] ] ; CHECK: ret i32 %ret define i32 @output(i32* %p, i32 %k) #0 { Index: test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll =================================================================== --- test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll +++ test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll @@ -16,7 +16,7 @@ %cmp1 = icmp eq i32 %a, 12345 br i1 %cmp1, label %if.then, label %if.else, !prof !0 ; CHECK: %cmp1 = icmp eq i32 %a, 12345 -; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0 +; CHECK-NEXT: br i1 %cmp1, label %for.body.preheader.split.us, label %for.body.preheader.split, !prof !0 if.then: ; preds = %for.body ; CHECK: for.body.us: ; CHECK: add nsw i32 %{{.*}}, 123 @@ -53,7 +53,7 @@ br label %for.body ;CHECK: entry: ;CHECK-NEXT: %cmp1 = icmp eq i32 1, 2 -;CHECK-NEXT: br i1 %cmp1, label %for.body, label %for.cond.cleanup.split, !prof !1 +;CHECK-NEXT: br i1 %cmp1, label %entry.split, label %for.cond.cleanup.split, !prof !1 ;CHECK: for.body: for.body: ; preds = %for.inc, %entry %inc.i = phi i32 [ 0, %entry ], [ %inc, %if.then ] Index: test/Transforms/LoopUnswitch/infinite-loop.ll =================================================================== --- test/Transforms/LoopUnswitch/infinite-loop.ll +++ test/Transforms/LoopUnswitch/infinite-loop.ll @@ -6,7 +6,7 @@ ; Loop unswitching shouldn't trivially unswitch the true case of condition %a ; in the code here because it leads to an infinite loop. While this doesn't ; contain any instructions with side effects, it's still a kind of side effect. -; It can trivially unswitch on the false cas of condition %a though. +; It can trivially unswitch on the false case of condition %a though. ; STATS: 2 loop-unswitch - Number of branches unswitched ; STATS: 2 loop-unswitch - Number of unswitches that are trivial @@ -16,7 +16,7 @@ ; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split ; CHECK: entry.split: -; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split +; CHECK-NEXT: br i1 %b, label %entry.split.split, label %abort1.split ; CHECK: for.body: ; CHECK-NEXT: br label %for.body Index: test/Transforms/LoopVectorize/X86/float-induction-x86.ll =================================================================== --- test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O3 -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s +; RUN: opt < %s -O3 -latesimplifycfg -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s ; This test checks auto-vectorization with FP induction variable. ; The FP operation is not "fast" and requires "fast-math" function attribute. Index: test/Transforms/LoopVectorize/float-induction.ll =================================================================== --- test/Transforms/LoopVectorize/float-induction.ll +++ test/Transforms/LoopVectorize/float-induction.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -latesimplifycfg -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s @fp_inc = common global float 0.000000e+00, align 4 Index: test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1322,8 +1322,8 @@ ; Speculation depth must be limited to avoid a zero-cost instruction cycle. ; CHECK-LABEL: @PR26308( -; CHECK: while.body: -; CHECK-NEXT: br label %while.body +; CHECK: cleanup4: +; CHECK-NEXT: br label %cleanup4 define i32 @PR26308(i1 %B, i64 %load) { entry: Index: test/Transforms/SimplifyCFG/multiple-phis.ll =================================================================== --- test/Transforms/SimplifyCFG/multiple-phis.ll +++ test/Transforms/SimplifyCFG/multiple-phis.ll @@ -1,4 +1,4 @@ -; RUN: opt -simplifycfg -S < %s | FileCheck %s +; RUN: opt -latesimplifycfg -S < %s | FileCheck %s ; It's not worthwhile to if-convert one of the phi nodes and leave ; the other behind, because that still requires a branch. If Index: test/Transforms/SimplifyCFG/pr33605.ll =================================================================== --- /dev/null +++ test/Transforms/SimplifyCFG/pr33605.ll @@ -0,0 +1,64 @@ +; RUN: opt < %s -simplifycfg -S | FileCheck %s + +; Skip simplifying unconditional branches from empty blocks in simplifyCFG, +; when it can destroy canonical loop structure. + +; void foo(); +; bool test(int a, int b, int *c) { +; bool changed = false; +; for (unsigned int i = 2; i--;) { +; int r = a | b; +; if ( r != c[i]) { +; c[i] = r; +; foo(); +; changed = true; +; } +; } +; return changed; +; } + +; CHECK-LABEL: @test( +; CHECK: for.cond: +; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] +; CHECK: for.body: +; CHECK: br i1 %cmp, label %if.end, label %if.then +; CHECK-NOT: br i1 %cmp, label %for.cond, label %if.then +; CHECK: if.then: +; CHECK: br label %if.end +; CHECK-NOT: br label %for.cond +; CHECK: if.end: +; CHECK br label %for.cond +define i1 @test(i32 %a, i32 %b, i32* %c) { +entry: + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] + %changed.0.off0 = phi i1 [ false, %entry ], [ %changed.1.off0, %if.end ] + %dec = add nsw i32 %i.0, -1 + %tobool = icmp eq i32 %i.0, 0 + br i1 %tobool, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.cond + %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ] + ret i1 %changed.0.off0.lcssa + +for.body: ; preds = %for.cond + %or = or i32 %a, %b + %idxprom = sext i32 %dec to i64 + %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %or, %0 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %for.body + store i32 %or, i32* %arrayidx, align 4 + call void @foo() + br label %if.end + +if.end: ; preds = %for.body, %if.then + %changed.1.off0 = phi i1 [ true, %if.then ], [ %changed.0.off0, %for.body ] + br label %for.cond +} + +declare void @foo() Index: test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll =================================================================== --- test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll +++ test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll @@ -1,4 +1,4 @@ -; RUN: opt -simplifycfg -S < %s | FileCheck %s +; RUN: opt -latesimplifycfg -S < %s | FileCheck %s define void @test1(i32 %n) #0 { entry: