Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h @@ -261,6 +261,9 @@ /// loop body even when the number of loop iterations is not known at /// compile time). bool Runtime; + /// Allow emitting expensive instructions (such as divisions) when computing + /// the trip count of a loop for runtime unrolling. + bool AllowExpensiveTripCount; }; /// \brief Get target-customized preferences for the generic loop unrolling Index: llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h +++ llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h @@ -28,11 +28,13 @@ class Pass; bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, - unsigned TripMultiple, LoopInfo *LI, Pass *PP, - LPPassManager *LPM, AssumptionCache *AC); - -bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, - LPPassManager* LPM); + bool AllowExpensiveTripCount, unsigned TripMultiple, + LoopInfo *LI, Pass *PP, LPPassManager *LPM, + AssumptionCache *AC); + +bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, + bool AllowExpensiveTripCount, LoopInfo *LI, + LPPassManager *LPM); MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name); } Index: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -165,6 +165,7 @@ UP.MaxCount = UINT_MAX; UP.Partial = CurrentAllowPartial; UP.Runtime = CurrentRuntime; + UP.AllowExpensiveTripCount = false; TTI.getUnrollingPreferences(L, UP); } @@ -886,8 +887,8 @@ } // Unroll the loop. - if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, - &LPM, &AC)) + if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount, + TripMultiple, LI, this, &LPM, &AC)) return false; return true; Index: llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp @@ -146,6 +146,13 @@ /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// +/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that +/// have a runtime (i.e. not compile time constant) trip count. Unrolling these +/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" +/// iterations before branching into the unrolled loop. UnrollLoop will not +/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and +/// AllowExpensiveTripCount is false. +/// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be @@ -154,8 +161,9 @@ /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, - bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, - Pass *PP, LPPassManager *LPM, AssumptionCache *AC) { + bool AllowRuntime, bool AllowExpensiveTripCount, + unsigned TripMultiple, LoopInfo *LI, Pass *PP, + LPPassManager *LPM, AssumptionCache *AC) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -218,7 +226,8 @@ // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); - if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) + if (RuntimeTripCount && + !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM)) return false; // Notify ScalarEvolution that the loop will be substantially changed, Index: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -278,7 +278,8 @@ /// ... /// End: /// -bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, +bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, + bool AllowExpensiveTripCount, LoopInfo *LI, LPPassManager *LPM) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) @@ -312,6 +313,12 @@ if (isa(TripCountSC)) return false; + BasicBlock *Header = L->getHeader(); + const DataLayout &DL = Header->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "loop-unroll"); + if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) + return false; + // We only handle cases when the unroll factor is a power of 2. // Count is the loop unroll factor, the number of extra copies added + 1. if (!isPowerOf2_32(Count)) @@ -332,18 +339,15 @@ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; BasicBlock *PH = L->getLoopPreheader(); - BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to splits the original preheader twice, one for the end of the // prolog code and one for a new loop preheader BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI); BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI); BranchInst *PreHeaderBR = cast(PH->getTerminator()); - const DataLayout &DL = Header->getModule()->getDataLayout(); // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % (loop unroll factor + 1) - SCEVExpander Expander(*SE, DL, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), Index: llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll +++ llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -unroll-runtime -loop-unroll < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +;; Check that we don't emit expensive instructions to compute trip +;; counts when unrolling loops. + +define i32 @test(i64 %v12, i8* %array, i64* %loc) { +; CHECK-LABEL: @test( +; CHECK-NOT: udiv +entry: + %step = load i64, i64* %loc, !range !0 + br label %loop + +loop: ; preds = %entry, %loop + %k.015 = phi i64 [ %v15, %loop ], [ %v12, %entry ] + %v14 = getelementptr inbounds i8, i8* %array, i64 %k.015 + store i8 0, i8* %v14 + %v15 = add nuw nsw i64 %k.015, %step + %v16 = icmp slt i64 %v15, 8193 + br i1 %v16, label %loop, label %loopexit + +loopexit: ; preds = %loop + ret i32 0 +} + +!0 = !{i64 1, i64 100} Index: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll +++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll @@ -1,5 +1,7 @@ ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + ; Tests for unrolling loops with run-time trip counts ; CHECK: %xtraiter = and i32 %n Index: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll +++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll @@ -20,7 +20,8 @@ br label %loop2.header loop2.header: - br label %loop2 + %e = icmp uge i32 %iter, 1 + br i1 %e, label %loop2, label %exit2 loop2: %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]