Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -784,7 +784,15 @@ } } - // 4rd priority is partial unrolling. + // 4th priority is loop peeling + computePeelCount(L, LoopSize, UP); + if (UP.PeelCount) { + UP.Runtime = false; + UP.Count = 1; + return ExplicitUnroll; + } + + // 5th priority is partial unrolling. // Try partial unroll only when TripCount could be staticaly calculated. if (TripCount) { UP.Partial |= ExplicitUnroll; @@ -847,14 +855,6 @@ << "Unable to fully unroll loop as directed by unroll(full) pragma " "because loop has a runtime trip count."); - // 5th priority is loop peeling - computePeelCount(L, LoopSize, UP); - if (UP.PeelCount) { - UP.Runtime = false; - UP.Count = 1; - return ExplicitUnroll; - } - // 6th priority is runtime unrolling. // Don't unroll a runtime trip count loop when it is disabled. if (HasRuntimeUnrollDisablePragma(L)) { Index: lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollPeel.cpp +++ lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -106,6 +106,44 @@ } } + // Try to find a Phi node that has the same loop invariant as an input from + // its only back edge. If there is such Phi, peeling 1 iteration from the + // loop is profitable, because starting from 2nd iteration we will have an + // invariant instead of this Phi. + BasicBlock *Header = L->getHeader(); + // Find the only back edge. + BasicBlock *BackEdge = nullptr; + for (auto PI = pred_begin(Header), PE = pred_end(Header); PI != PE; ++PI) { + BasicBlock *BB = *PI; + if (!L->contains(BB)) + continue; + if (BackEdge != nullptr) { + BackEdge = nullptr; + break; + } + BackEdge = BB; + } + + if (BackEdge) { + // Iterate over Phis to find one with invariant input on back edge. + bool FoundCandidate = false; + PHINode *Phi; + for (auto BI = Header->begin(); (Phi = dyn_cast(&*BI)); ++BI) { + Value *Input = Phi->getIncomingValueForBlock(BackEdge); + if (L->isLoopInvariant(Input)) { + FoundCandidate = true; + break; + } + } + if (FoundCandidate) { + DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi + << " because starting from 2nd iteration it is always" + << " an invariant\n"); + UP.PeelCount = 1; + return; + } + } + return; } Index: test/Transforms/LoopUnroll/peel-loop-not-forced.ll =================================================================== --- test/Transforms/LoopUnroll/peel-loop-not-forced.ll +++ test/Transforms/LoopUnroll/peel-loop-not-forced.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -S -loop-unroll -indvars -loop-deletion -simplifycfg -instcombine | FileCheck %s + +define i32 @invariant_backedge_1(i32 %a, i32 %b) { +; CHECK-LABEL: @invariant_backedge_1 +; CHECK: entry: +; CHECK-NEXT: %0 = mul i32 %b, 999 +; CHECK-NEXT: %1 = add i32 %0, %a +; CHECK-NEXT: ret i32 %1 +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ] + %plus = phi i32 [ %a, %entry ], [ %b, %loop ] + + %incsum = add i32 %sum, %plus + %inc = add i32 %i, 1 + %cmp = icmp slt i32 %i, 1000 + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %sum +}