Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -382,6 +382,7 @@ bool UpperBound; /// Allow peeling off loop iterations for loops with low dynamic tripcount. bool AllowPeeling; + bool PeelRemainder; }; /// \brief Get target-customized preferences for the generic loop unrolling Index: include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- include/llvm/Transforms/Utils/UnrollLoop.h +++ include/llvm/Transforms/Utils/UnrollLoop.h @@ -42,14 +42,17 @@ bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, - unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, + unsigned TripMultiple, unsigned PeelCount, bool PeelRemainder, + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA); bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, - bool UseEpilogRemainder, LoopInfo *LI, + bool UseEpilogRemainder, bool PeelRemainder, + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, bool PreserveLCSSA); void computePeelCount(Loop *L, unsigned LoopSize, Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -152,6 +152,7 @@ UP.Partial = false; UP.Runtime = false; UP.AllowRemainder = true; + UP.PeelRemainder = false; UP.AllowExpensiveTripCount = false; UP.Force = false; UP.UpperBound = false; @@ -938,7 +939,7 @@ Optional ProvidedUpperBound) { DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); - if (HasUnrollDisablePragma(L)) + if (HasUnrollDisablePragma(L)) return false; if (!L->isLoopSimplifyForm()) { DEBUG( @@ -1032,7 +1033,8 @@ // Unroll the loop. if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero, - TripMultiple, UP.PeelCount, LI, &SE, &DT, &AC, &ORE, + TripMultiple, UP.PeelCount, UP.PeelRemainder, + LI, &SE, &DT, &AC, &ORE, PreserveLCSSA)) return false; Index: lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- lib/Transforms/Utils/LoopUnroll.cpp +++ lib/Transforms/Utils/LoopUnroll.cpp @@ -295,7 +295,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, - unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, + unsigned TripMultiple, unsigned PeelCount, + bool PeelRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { @@ -418,7 +419,8 @@ if (RuntimeTripCount && TripMultiple % Count != 0 && !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount, - EpilogProfitability, LI, SE, DT, + EpilogProfitability, PeelRemainder, + LI, SE, DT, AC, PreserveLCSSA)) { if (Force) RuntimeTripCount = false; Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -294,7 +294,8 @@ /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, - const bool UseEpilogRemainder, BasicBlock *InsertTop, + const bool UseEpilogRemainder, const bool PeelRemainder, + BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { @@ -413,10 +414,13 @@ } LLVMContext &Context = NewLoop->getHeader()->getContext(); - SmallVector DisableOperands; - DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); - MDNode *DisableNode = MDNode::get(Context, DisableOperands); - MDs.push_back(DisableNode); + if (!PeelRemainder) { + SmallVector DisableOperands; + DisableOperands.push_back(MDString::get(Context, + "llvm.loop.unroll.disable")); + MDNode *DisableNode = MDNode::get(Context, DisableOperands); + MDs.push_back(DisableNode); + } MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. @@ -525,8 +529,10 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, + bool PeelRemainder, LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, bool PreserveLCSSA) { + DominatorTree *DT, AssumptionCache *AC, + bool PreserveLCSSA) { DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); DEBUG(L->dump()); @@ -739,7 +745,8 @@ BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( - L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, + L, ModVal, CreateRemainderLoop, UseEpilogRemainder, PeelRemainder, + InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. @@ -883,6 +890,9 @@ formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); } + if (remainderLoop && PeelRemainder) + peelLoop(remainderLoop, Count - 1, LI, SE, DT, AC, PreserveLCSSA); + NumRuntimeUnrolled++; return true; }