Index: llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -71,6 +71,31 @@ if (!L->empty()) return; + // Try to find a Phi node that has the same loop invariant as an input from + // its only back edge. If there is such Phi, peeling 1 iteration from the + // loop is profitable, because starting from 2nd iteration we will have an + // invariant instead of this Phi. + if (auto *BackEdge = L->getLoopLatch()) { + BasicBlock *Header = L->getHeader(); + // Iterate over Phis to find one with invariant input on back edge. + bool FoundCandidate = false; + PHINode *Phi; + for (auto BI = Header->begin(); Phi = dyn_cast(&*BI); ++BI) { + Value *Input = Phi->getIncomingValueForBlock(BackEdge); + if (L->isLoopInvariant(Input)) { + FoundCandidate = true; + break; + } + } + if (FoundCandidate) { + DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi + << " because starting from 2nd iteration it is always" + << " an invariant\n"); + UP.PeelCount = 1; + return; + } + } + // Bail if we know the statically calculated trip count. // In this case we rather prefer partial unrolling. if (TripCount) Index: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll +++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -S -loop-unroll | FileCheck %s + +define i32 @invariant_backedge_1(i32 %a, i32 %b) { +; CHECK-LABEL: @invariant_backedge_1 +; CHECK-NOT: %plus = phi +; CHECK: loop.peel: +; CHECK: loop: +; CHECK: %i = phi +; CHECK: %sum = phi +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ] + %plus = phi i32 [ %a, %entry ], [ %b, %loop ] + + %incsum = add i32 %sum, %plus + %inc = add i32 %i, 1 + %cmp = icmp slt i32 %i, 1000 + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %sum +}