Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -234,7 +234,7 @@ bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void eliminateMostlyEmptyBlock(BasicBlock *BB); bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, - bool isPreheader); + bool isPreheader, bool isLatch); bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); bool optimizeInst(Instruction *I, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *I, Value *Addr, @@ -645,12 +645,15 @@ /// blocks so we can split them the way we want them. bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { SmallPtrSet Preheaders; + SmallPtrSet Latches; SmallVector LoopList(LI->begin(), LI->end()); while (!LoopList.empty()) { Loop *L = LoopList.pop_back_val(); LoopList.insert(LoopList.end(), L->begin(), L->end()); if (BasicBlock *Preheader = L->getLoopPreheader()) Preheaders.insert(Preheader); + if (BasicBlock *Latch = L->getLoopLatch()) + Latches.insert(Latch); } bool MadeChange = false; @@ -658,10 +661,14 @@ for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { BasicBlock *BB = &*I++; BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB); - if (!DestBB || - !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) + if (!DestBB || !isMergingEmptyBlockProfitable( + BB, DestBB, Preheaders.count(BB), Latches.count(DestBB))) continue; + if (Latches.count(BB)) + Latches.erase(BB); + if (Preheaders.count(BB)) + Preheaders.erase(BB); eliminateMostlyEmptyBlock(BB); MadeChange = true; } @@ -670,7 +677,8 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, - bool isPreheader) { + bool isPreheader, + bool isLatch) { // Do not delete loop preheaders if doing so would create a critical edge. // Loop preheaders can be good locations to spill registers. If the // preheader is deleted and we create a critical edge, registers may be @@ -694,6 +702,11 @@ isa(Pred->getTerminator()))) return true; + // If the destination block is almost empty latch block then we can hoist + // the jump through the backedge, so it is profitable to merge. + if (isLatch && DestBB->getTerminator() == DestBB->getFirstNonPHI()) + return true; + if (BB->getTerminator() != BB->getFirstNonPHI()) return true; Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -231,13 +231,15 @@ // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. - // We should not eliminate the loop header either, because eliminating - // a loop header might later prevent LoopSimplify from transforming nested - // loops into simplified form. + // We should not eliminate the loop header or latch either, because + // eliminating a loop header or latch might later prevent LoopSimplify + // from transforming nested loops into simplified form. We will rely on + // later passes in backend to clean up empty blocks. if (BI && BI->isUnconditional() && BB != &BB->getParent()->getEntryBlock() && // If the terminator is the only non-phi instruction, try to nuke it. - BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) { + BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) && + !LoopHeaders.count(BI->getSuccessor(0))) { // FIXME: It is always conservatively correct to drop the info // for a block even if it doesn't get erased. This isn't totally // awesome, but it allows us to use AssertingVH to prevent nasty Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -5656,20 +5656,22 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); + BasicBlock *Succ = BI->getSuccessor(0); if (SinkCommon && SinkThenElseCodeToEnd(BI)) return true; // If the Terminator is the only non-phi instruction, simplify the block. - // if LoopHeader is provided, check if the block is a loop header - // (This is for early invocations before loop simplify and vectorization - // to keep canonical loop forms for nested loops. - // These blocks can be eliminated when the pass is invoked later - // in the back-end.) + // if LoopHeader is provided, check if the block or its successor is a loop + // header (This is for early invocations before loop simplify and + // vectorization to keep canonical loop forms for nested loops. These blocks + // can be eliminated when the pass is invoked later in the back-end.) + bool NeedCanonicalLoop = + !LateSimplifyCFG && + (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - (!LoopHeaders || !LoopHeaders->count(BB)) && - TryToSimplifyUncondBranchFromEmptyBlock(BB)) + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; // If the only instruction in the block is a seteq/setne comparison Index: test/CodeGen/AArch64/aarch64-loop-gep-opt.ll =================================================================== --- test/CodeGen/AArch64/aarch64-loop-gep-opt.ll +++ test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -19,9 +19,9 @@ do.body.i: ; CHECK-LABEL: do.body.i: -; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 -; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* -; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 +; CHECK: %uglygep1 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NEXT: %4 = bitcast i8* %uglygep1 to i32* +; CHECK-NOT: %uglygep1 = getelementptr i8, i8* %uglygep, i64 1032 %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] Index: test/Transforms/CodeGenPrepare/merge-empty-latch-block.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/merge-empty-latch-block.ll @@ -0,0 +1,118 @@ +; RUN: opt -codegenprepare < %s -mtriple=aarch64-none-linux-gnu -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +; Expect to merge empty latch block as it will hoist the jump through backedge. +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@finput = external local_unnamed_addr global %struct._IO_FILE*, align 8 +@.str = external hidden unnamed_addr constant [23 x i8], align 1 +@lineno = external local_unnamed_addr global i32, align 4 +@.str.1 = external hidden unnamed_addr constant [21 x i8], align 1 + +; Function Attrs: nounwind +; CHECK-LABEL: @skip_white_space +define i32 @skip_white_space() local_unnamed_addr #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %entry + %0 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call11 = tail call i32 @_IO_getc(%struct._IO_FILE* %0) + switch i32 %call11, label %sw.default [ + i32 47, label %sw.bb + i32 10, label %sw.bb25 + i32 32, label %for.cond.backedge + i32 9, label %for.cond.backedge + i32 12, label %for.cond.backedge + ] + +sw.bb: ; preds = %for.cond + %1 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call1 = tail call i32 @_IO_getc(%struct._IO_FILE* %1) + %cmp = icmp eq i32 %call1, 42 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %sw.bb + tail call void @fatals(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i64 0, i64 0), i32 %call1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) #3 + br label %if.end + +if.end: ; preds = %if.then, %sw.bb + %2 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call2 = tail call i32 @_IO_getc(%struct._IO_FILE* %2) + br label %while.body + +; CHECK-LABEL: while.body +; CHECK: %c.140 = phi i32 [ %call2, %if.end ], [ %call20, %if.else19 ], [ -1, %if.then18 ], [ %call15, %if.then14 ], [ %c.2, %while.cond5 ] +; CHECK-NOT: %c.140 = phi i32 [ %call2, %if.end ], [ %call20, %if.else19 ], [ -1, %if.then18 ], [ %call15, %if.then14 ], [ %c.2, %while.body.backedge.loopexit ] +while.body: ; preds = %while.body.backedge, %if.end + %c.140 = phi i32 [ %call2, %if.end ], [ %c.140.be, %while.body.backedge ] + switch i32 %c.140, label %if.else19 [ + i32 42, label %while.cond5.preheader + i32 10, label %if.then14 + i32 -1, label %if.then18 + ] + +while.cond5.preheader: ; preds = %while.body + br label %while.cond5 + +while.cond5: ; preds = %while.body7, %while.cond5.preheader + %c.2 = phi i32 [ %call8, %while.body7 ], [ 42, %while.cond5.preheader ] + switch i32 %c.2, label %while.body.backedge.loopexit [ + i32 42, label %while.body7 + i32 47, label %for.cond.backedge.loopexit + ] + +while.body7: ; preds = %while.cond5 + %3 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call8 = tail call i32 @_IO_getc(%struct._IO_FILE* %3) + br label %while.cond5 + +if.then14: ; preds = %while.body + %4 = load i32, i32* @lineno, align 4 + %inc = add nsw i32 %4, 1 + store i32 %inc, i32* @lineno, align 4 + %5 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call15 = tail call i32 @_IO_getc(%struct._IO_FILE* %5) + br label %while.body.backedge + +if.then18: ; preds = %while.body + tail call void @fatal(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0)) #3 + br label %while.body.backedge + +if.else19: ; preds = %while.body + %6 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call20 = tail call i32 @_IO_getc(%struct._IO_FILE* %6) + br label %while.body.backedge + +; CHECK-NOT-LABEL: while.body.backedge.loopexit +while.body.backedge.loopexit: ; preds = %while.cond5 + br label %while.body.backedge + +while.body.backedge: ; preds = %while.body.backedge.loopexit, %if.else19, %if.then18, %if.then14 + %c.140.be = phi i32 [ %call20, %if.else19 ], [ -1, %if.then18 ], [ %call15, %if.then14 ], [ %c.2, %while.body.backedge.loopexit ] + br label %while.body + +sw.bb25: ; preds = %for.cond + %7 = load i32, i32* @lineno, align 4 + %inc26 = add nsw i32 %7, 1 + store i32 %inc26, i32* @lineno, align 4 + br label %for.cond.backedge + +for.cond.backedge.loopexit: ; preds = %while.cond5 + br label %for.cond.backedge + +for.cond.backedge: ; preds = %for.cond.backedge.loopexit, %sw.bb25, %for.cond, %for.cond, %for.cond + br label %for.cond + +sw.default: ; preds = %for.cond + ret i32 %call11 +} +; Function Attrs: nounwind +declare i32 @_IO_getc(%struct._IO_FILE* nocapture) local_unnamed_addr #1 + +declare void @fatals(i8*, i32, i32, i32, i32, i32, i32, i32, i32) local_unnamed_addr #2 + +declare void @fatal(i8*) local_unnamed_addr #2 Index: test/Transforms/JumpThreading/pr33605.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/pr33605.ll @@ -0,0 +1,64 @@ +; RUN: opt < %s -jump-threading -S | FileCheck %s + +; Skip simplifying unconditional branches from empty blocks in simplifyCFG, +; when it can destroy canonical loop structure. + +; void foo(); +; bool test(int a, int b, int *c) { +; bool changed = false; +; for (unsigned int i = 2; i--;) { +; int r = a | b; +; if ( r != c[i]) { +; c[i] = r; +; foo(); +; changed = true; +; } +; } +; return changed; +; } + +; CHECK-LABEL: @test( +; CHECK: for.cond: +; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] +; CHECK: for.body: +; CHECK: br i1 %cmp, label %if.end, label %if.then +; CHECK-NOT: br i1 %cmp, label %for.cond, label %if.then +; CHECK: if.then: +; CHECK: br label %if.end +; CHECK-NOT: br label %for.cond +; CHECK: if.end: +; CHECK br label %for.cond +define i1 @test(i32 %a, i32 %b, i32* %c) { +entry: + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] + %changed.0.off0 = phi i1 [ false, %entry ], [ %changed.1.off0, %if.end ] + %dec = add nsw i32 %i.0, -1 + %tobool = icmp eq i32 %i.0, 0 + br i1 %tobool, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.cond + %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ] + ret i1 %changed.0.off0.lcssa + +for.body: ; preds = %for.cond + %or = or i32 %a, %b + %idxprom = sext i32 %dec to i64 + %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %or, %0 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %for.body + store i32 %or, i32* %arrayidx, align 4 + call void @foo() + br label %if.end + +if.end: ; preds = %for.body, %if.then + %changed.1.off0 = phi i1 [ true, %if.then ], [ %changed.0.off0, %for.body ] + br label %for.cond +} + +declare void @foo() Index: test/Transforms/JumpThreading/static-profile.ll =================================================================== --- test/Transforms/JumpThreading/static-profile.ll +++ test/Transforms/JumpThreading/static-profile.ll @@ -86,7 +86,7 @@ ; Verify the new backedge: ; CHECK: check_2.thread: ; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br label %check_1 +; CHECK-NEXT: br label %check_3.thread check_2: %cond2 = icmp eq i32 %v, 2 @@ -100,7 +100,7 @@ ; Verify the new backedge: ; CHECK: eq_2: ; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br label %check_1 +; CHECK-NEXT: br label %check_3.thread check_3: %condE = icmp eq i32 %v, 3 Index: test/Transforms/LoopUnroll/peel-loop.ll =================================================================== --- test/Transforms/LoopUnroll/peel-loop.ll +++ test/Transforms/LoopUnroll/peel-loop.ll @@ -18,9 +18,11 @@ ; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2 ; CHECK: store i32 2, i32* %[[INC2]], align 4 ; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3 -; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP:.*]] +; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:.*]] +; CHECK: [[LOOP_PH]]: +; CHECK: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK: %[[IV:.*]] = phi i32 [ {{.*}}, %[[LOOP]] ], [ 3, %[[NEXT2]] ] +; CHECK: %[[IV:.*]] = phi i32 [ 3, %[[LOOP_PH]] ], [ {{.*}}, %[[LOOP]] ] define void @basic(i32* %p, i32 %k) #0 { entry: @@ -65,9 +67,11 @@ ; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2 ; CHECK: store i32 2, i32* %[[INC2]], align 4 ; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3 -; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP:.*]] +; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:.*]] +; CHECK: [[LOOP_PH]]: +; CHECK: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK: %[[IV:.*]] = phi i32 [ %[[IV:.*]], %[[LOOP]] ], [ 3, %[[NEXT2]] ] +; CHECK: %[[IV:.*]] = phi i32 [ 3, %[[LOOP_PH]] ], [ %[[IV:.*]], %[[LOOP]] ] ; CHECK: %ret = phi i32 [ 0, %entry ], [ 1, %[[NEXT0]] ], [ 2, %[[NEXT1]] ], [ 3, %[[NEXT2]] ], [ %[[IV]], %[[LOOP]] ] ; CHECK: ret i32 %ret define i32 @output(i32* %p, i32 %k) #0 { Index: test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll =================================================================== --- test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll +++ test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll @@ -16,7 +16,7 @@ %cmp1 = icmp eq i32 %a, 12345 br i1 %cmp1, label %if.then, label %if.else, !prof !0 ; CHECK: %cmp1 = icmp eq i32 %a, 12345 -; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0 +; CHECK-NEXT: br i1 %cmp1, label %for.body.preheader.split.us, label %for.body.preheader.split, !prof !0 if.then: ; preds = %for.body ; CHECK: for.body.us: ; CHECK: add nsw i32 %{{.*}}, 123 @@ -53,7 +53,7 @@ br label %for.body ;CHECK: entry: ;CHECK-NEXT: %cmp1 = icmp eq i32 1, 2 -;CHECK-NEXT: br i1 %cmp1, label %for.body, label %for.cond.cleanup.split, !prof !1 +;CHECK-NEXT: br i1 %cmp1, label %entry.split, label %for.cond.cleanup.split, !prof !1 ;CHECK: for.body: for.body: ; preds = %for.inc, %entry %inc.i = phi i32 [ 0, %entry ], [ %inc, %if.then ] Index: test/Transforms/LoopUnswitch/infinite-loop.ll =================================================================== --- test/Transforms/LoopUnswitch/infinite-loop.ll +++ test/Transforms/LoopUnswitch/infinite-loop.ll @@ -6,7 +6,7 @@ ; Loop unswitching shouldn't trivially unswitch the true case of condition %a ; in the code here because it leads to an infinite loop. While this doesn't ; contain any instructions with side effects, it's still a kind of side effect. -; It can trivially unswitch on the false cas of condition %a though. +; It can trivially unswitch on the false case of condition %a though. ; STATS: 2 loop-unswitch - Number of branches unswitched ; STATS: 2 loop-unswitch - Number of unswitches that are trivial @@ -16,7 +16,7 @@ ; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split ; CHECK: entry.split: -; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split +; CHECK-NEXT: br i1 %b, label %entry.split.split, label %abort1.split ; CHECK: for.body: ; CHECK-NEXT: br label %for.body Index: test/Transforms/LoopVectorize/X86/float-induction-x86.ll =================================================================== --- test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O3 -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s +; RUN: opt < %s -O3 -latesimplifycfg -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s ; This test checks auto-vectorization with FP induction variable. ; The FP operation is not "fast" and requires "fast-math" function attribute. Index: test/Transforms/LoopVectorize/float-induction.ll =================================================================== --- test/Transforms/LoopVectorize/float-induction.ll +++ test/Transforms/LoopVectorize/float-induction.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -latesimplifycfg -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s @fp_inc = common global float 0.000000e+00, align 4 Index: test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1322,8 +1322,8 @@ ; Speculation depth must be limited to avoid a zero-cost instruction cycle. ; CHECK-LABEL: @PR26308( -; CHECK: while.body: -; CHECK-NEXT: br label %while.body +; CHECK: cleanup4: +; CHECK-NEXT: br label %cleanup4 define i32 @PR26308(i1 %B, i64 %load) { entry: Index: test/Transforms/SimplifyCFG/multiple-phis.ll =================================================================== --- test/Transforms/SimplifyCFG/multiple-phis.ll +++ test/Transforms/SimplifyCFG/multiple-phis.ll @@ -1,4 +1,4 @@ -; RUN: opt -simplifycfg -S < %s | FileCheck %s +; RUN: opt -latesimplifycfg -S < %s | FileCheck %s ; It's not worthwhile to if-convert one of the phi nodes and leave ; the other behind, because that still requires a branch. If Index: test/Transforms/SimplifyCFG/pr33605.ll =================================================================== --- /dev/null +++ test/Transforms/SimplifyCFG/pr33605.ll @@ -0,0 +1,64 @@ +; RUN: opt < %s -simplifycfg -S | FileCheck %s + +; Skip simplifying unconditional branches from empty blocks in simplifyCFG, +; when it can destroy canonical loop structure. + +; void foo(); +; bool test(int a, int b, int *c) { +; bool changed = false; +; for (unsigned int i = 2; i--;) { +; int r = a | b; +; if ( r != c[i]) { +; c[i] = r; +; foo(); +; changed = true; +; } +; } +; return changed; +; } + +; CHECK-LABEL: @test( +; CHECK: for.cond: +; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] +; CHECK: for.body: +; CHECK: br i1 %cmp, label %if.end, label %if.then +; CHECK-NOT: br i1 %cmp, label %for.cond, label %if.then +; CHECK: if.then: +; CHECK: br label %if.end +; CHECK-NOT: br label %for.cond +; CHECK: if.end: +; CHECK br label %for.cond +define i1 @test(i32 %a, i32 %b, i32* %c) { +entry: + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] + %changed.0.off0 = phi i1 [ false, %entry ], [ %changed.1.off0, %if.end ] + %dec = add nsw i32 %i.0, -1 + %tobool = icmp eq i32 %i.0, 0 + br i1 %tobool, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.cond + %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ] + ret i1 %changed.0.off0.lcssa + +for.body: ; preds = %for.cond + %or = or i32 %a, %b + %idxprom = sext i32 %dec to i64 + %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %or, %0 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %for.body + store i32 %or, i32* %arrayidx, align 4 + call void @foo() + br label %if.end + +if.end: ; preds = %for.body, %if.then + %changed.1.off0 = phi i1 [ true, %if.then ], [ %changed.0.off0, %for.body ] + br label %for.cond +} + +declare void @foo() Index: test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll =================================================================== --- test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll +++ test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll @@ -1,4 +1,4 @@ -; RUN: opt -simplifycfg -S < %s | FileCheck %s +; RUN: opt -latesimplifycfg -S < %s | FileCheck %s define void @test1(i32 %n) #0 { entry: