Index: lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollPeel.cpp +++ lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -106,6 +106,40 @@ } } + // Try to find a Phi node that has the same loop invariant as an input from + // its only back edge. If there is such Phi, peeling 1 iteration from the + // loop is profitable, because starting from 2nd iteration we will have an + // invariant instead of this Phi. + if (L->getNumBackEdges() == 1) { + BasicBlock *Header = L->getHeader(); + // Find the only back edge. + BasicBlock *BackEdge = nullptr; + for (BasicBlock *Pred : predecessors(Header)) { + if (L->contains(Pred)) { + BackEdge = Pred; + break; + } + } + assert(BackEdge && "No back edge in loop?"); + // Iterate over Phis to find one with invariant input on back edge. + bool FoundCandidate = false; + PHINode *Phi; + for (auto BI = Header->begin(); (Phi = dyn_cast(&*BI)); ++BI) { + Value *Input = Phi->getIncomingValueForBlock(BackEdge); + if (L->isLoopInvariant(Input)) { + FoundCandidate = true; + break; + } + } + if (FoundCandidate) { + DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi + << " because starting from 2nd iteration it is always" + << " an invariant\n"); + UP.PeelCount = 1; + return; + } + } + return; } Index: test/Transforms/LoopUnroll/peel-loop-not-forced.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/peel-loop-not-forced.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -S -loop-unroll -indvars -loop-deletion -simplifycfg -instcombine | FileCheck %s + +define i32 @invariant_backedge_1(i32 %a, i32 %b) { +; CHECK-LABEL: @invariant_backedge_1 +; CHECK: entry: +; CHECK-NEXT: %0 = mul i32 %b, 999 +; CHECK-NEXT: %1 = add i32 %0, %a +; CHECK-NEXT: ret i32 %1 +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ] + %plus = phi i32 [ %a, %entry ], [ %b, %loop ] + + %incsum = add i32 %sum, %plus + %inc = add i32 %i, 1 + %cmp = icmp slt i32 %i, 1000 + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %sum +}