Index: llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp @@ -402,7 +402,8 @@ // counts. If we later unroll the loop, we want these to be updated. if (Peeled) { // According to our guards and profitability checks the only - // meaningful exit should be latch block. + // meaningful exit should be latch block. Other exits go to deopt, + // so we do not worry about them. BasicBlock *ExitingBlock = L->getLoopLatch(); assert(ExitingBlock && "Loop without exiting block?"); assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?"); Index: llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -61,6 +61,10 @@ "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); +static cl::opt UnrollPeelMultiDeoptExit( + "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden, + cl::desc("Allow peeling of loops with multiple deopt exits.")); + // Designates that a Phi is estimated to become invariant after an "infinite" // number of loop iterations (i.e. only may become an invariant if the loop is // fully unrolled). @@ -73,6 +77,22 @@ if (!L->isLoopSimplifyForm()) return false; + if (UnrollPeelMultiDeoptExit) { + SmallVector Exits; + L->getUniqueNonLatchExitBlocks(Exits); + + if (!Exits.empty()) { + // Latch's terminator is a conditional branch, Latch is exiting and + // all non Latch exits ends up with deoptimize. + const BasicBlock *Latch = L->getLoopLatch(); + const BranchInst *T = dyn_cast(Latch->getTerminator()); + return T && T->isConditional() && L->isLoopExiting(Latch) && + all_of(Exits, [](const BasicBlock *BB) { + return BB->getTerminatingDeoptimizeCall(); + }); + } + } + // Only peel loops that contain a single exit if (!L->getExitingBlock() || !L->getUniqueExitBlock()) return false; Index: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll +++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll @@ -0,0 +1,80 @@ +; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s +; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require,function(require,unroll)' 2>&1 | FileCheck %s + +; Make sure we use the profile information correctly to peel-off 3 iterations +; from the loop, and update the branch weights for the peeled loop properly. + +; CHECK: Loop Unroll: F[basic] +; CHECK: PEELING loop %for.body with iteration count 3! + +; CHECK-LABEL: @basic +; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !16 +; CHECK: [[NEXT0]]: +; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !17 +; CHECK: [[NEXT1]]: +; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !18 +; CHECK: [[NEXT2]]: +; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !19 + +define i32 @basic(i32* %p, i32 %k, i1 %c) #0 !prof !15 { +entry: + %cmp3 = icmp slt i32 0, %k + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %continue ] + %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %continue ] + %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1 + store i32 %i.05, i32* %p.addr.04, align 4 + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %c, label %continue, label %side_exit, !prof !17 + +continue: + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !prof !16 + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %res = phi i32 [ 0, %entry ], [ %inc, %for.cond.for.end_crit_edge ] + ret i32 %res + +side_exit: + %rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %inc) ] + ret i32 %rval +} + +declare i32 @llvm.experimental.deoptimize.i32(...) + +attributes #0 = { nounwind } +attributes #1 = { nounwind optsize } + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10} +!5 = !{!"MaxCount", i64 3} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 3} +!8 = !{!"NumCounts", i64 2} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 3, i32 2} +!13 = !{i32 999000, i64 1, i32 10} +!14 = !{i32 999999, i64 1, i32 10} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 3001, i32 1001} +!17 = !{!"branch_weights", i32 1, i32 0} + +;CHECK: !16 = !{!"branch_weights", i32 900, i32 101} +;CHECK: !17 = !{!"branch_weights", i32 540, i32 360} +;CHECK: !18 = !{!"branch_weights", i32 162, i32 378} +;CHECK: !19 = !{!"branch_weights", i32 1399, i32 162} +