Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -64,6 +64,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/IVUsers.h" @@ -130,6 +131,7 @@ #define DEBUG_TYPE "loop-reduce" +STATISTIC(NumIVs, "Number of IVs changed to SCEV expression"); /// MaxIVUsers is an arbitrary threshold that provides an early opportunity for /// bail out. This threshold is far beyond the number of users that LSR can /// conceivably solve, so it should not affect generated code, but catches the @@ -5601,6 +5603,92 @@ DeadInsts.emplace_back(OperandIsInstr); } +// LSR may at times remove all uses of an induction variable from a loop. +// The only remaining use is the PHI in the exit block. +// When this is the case, if the exit value of the IV can be calculated using +// SCEV, we can replace the exit block PHI with the final value of the IV and +// skip the updates in each loop iteration. +bool ReplaceExitPHIsWithFinalVal(Loop *L, ScalarEvolution &SE) { + + SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", + false); + + bool Changed = false; + + BasicBlock *LCSSAExitBB = L->getExitBlock(); + if (!LCSSAExitBB) + return false; + + SmallVector PHIsToDelete; + for (PHINode &LCSSAPhi : LCSSAExitBB->phis()) { + if (LCSSAPhi.getNumIncomingValues() != 1) + break; + + // Check if this LCSSAPhi has an incoming value from the prolog which is + // used in an induction variable in the prolog. + BasicBlock *Pred = LCSSAPhi.getIncomingBlock(0); + Value *IVNext = LCSSAPhi.getIncomingValueForBlock(Pred); + if (!SE.isSCEVable(LCSSAPhi.getType())) + continue; + + PHINode *IV = nullptr; + bool ExitUsesIV = false; + for (PHINode &HeaderPhi : L->getHeader()->phis()) { + // Check if the LCSSAPhi is using IV or IVNext + if (llvm::any_of(HeaderPhi.incoming_values(), + [&](const Value *V) { return IVNext == V; })) { + IV = &HeaderPhi; + ExitUsesIV = true; + break; + } + Instruction *HeaderPhiInst = dyn_cast(&HeaderPhi); + if (HeaderPhiInst == IVNext) + break; + } + + // Check that this IV isn't used anywhere in the prolog except for + // when it is updated in each iteration of the loop. + if (ExitUsesIV) { + if (!IV || IV->getNumUses() != 1) + continue; + bool SingleUseIsUpdate = true; + for (auto U : IV->users()) { + if (U != IVNext) { + SingleUseIsUpdate = false; + break; + } + } + if (!SingleUseIsUpdate) + continue; + } + // Check that the updated value of the IV isn't used anywhere except for + // the prolog PHI and the exit block PHI. + if (IVNext->getNumUses() != 2) + continue; + + // Check if we can use SCEV to calculate the final value and replace + // the PHI with the SCEV computed value. + const SCEVAddRecExpr *IndVar = dyn_cast(SE.getSCEV(IVNext)); + if (!IndVar) + continue; + + const SCEV *Final = SE.getSCEVAtScope(IVNext, L->getParentLoop()); + if (Final == IndVar) + continue; + BasicBlock *LCSSAExitBB = L->getExitBlock(); + Value *ExitVal = Rewriter.expandCodeFor(Final, IVNext->getType(), + LCSSAExitBB->getFirstNonPHI()); + LCSSAPhi.replaceAllUsesWith(ExitVal); + PHIsToDelete.push_back(&LCSSAPhi); + ++NumIVs; + Changed = true; + } + for (PHINode *DeletedPHI : PHIsToDelete) + DeletedPHI->eraseFromParent(); + + return Changed; +} + /// Rewrite all the fixup locations with new values, following the chosen /// solution. void LSRInstance::ImplementSolution( @@ -6376,6 +6464,8 @@ } } + Changed |= ReplaceExitPHIsWithFinalVal(L, SE); + if (SalvageableDVI.empty()) return Changed; Index: llvm/test/Transforms/LoopStrengthReduce/remove_scev_indvars.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopStrengthReduce/remove_scev_indvars.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -S -loop-reduce | FileCheck %s + +define void @testIVNext(i64* nocapture %a, i64 signext %m, i64 signext %n) { +entry: + br label %for.body + +for.body: + %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body ], [ %m, %entry ] + %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] + %uglygep138 = getelementptr i64, i64* %a, i64 %i + store i64 55, i64* %uglygep138, align 4 + %indvars.iv.next.prol = add nuw nsw i64 %indvars.iv.prol, 1 + %i.next = add i64 %i, 1 + %i.cmp.not = icmp eq i64 %i.next, %n + br i1 %i.cmp.not, label %for.exit, label %for.body + +; CHECK: entry: +; CHECK: %0 = add i64 %n, %m +; CHECK-NOT: %indvars.iv.next.prol +; CHECK-NOT: %indvars.iv.prol +for.exit: + %iv.prol.lcssa = phi i64 [ %indvars.iv.next.prol, %for.body ] + ret void +} + +define void @testIV(i64* nocapture %a, i64 signext %m, i64 signext %n) { +entry: + br label %for.body + +for.body: + %iv.prol = phi i64 [ %iv.next.prol, %for.body ], [ %m, %entry ] + %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] + %uglygep138 = getelementptr i64, i64* %a, i64 %i + store i64 55, i64* %uglygep138, align 4 + %iv.next.prol = add nuw nsw i64 %iv.prol, 1 + %i.next = add i64 %i, 1 + %i.cmp.not = icmp eq i64 %i.next, %n + br i1 %i.cmp.not, label %for.exit, label %for.body + +; CHECK: entry: +; CHECK: %0 = add i64 %n, %m +; CHECK: %1 = add i64 %0, -1 +; CHECK-NOT: %iv.next.prol +; CHECK-NOT: %iv.prol +for.exit: + %iv.prol.lcssa = phi i64 [ %iv.prol, %for.body ] + ret void +}